From 60147c6034e32f687ef82bafe3f0d7fdf451072a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 20 Jan 2022 14:49:12 +0100 Subject: [PATCH 001/946] [EarlyCSE] Regenerate test checks (NFC) --- llvm/test/Transforms/EarlyCSE/atomics.ll | 194 +++++++++------ llvm/test/Transforms/EarlyCSE/basic.ll | 226 +++++++++++------- .../Transforms/EarlyCSE/const-speculation.ll | 18 +- .../test/Transforms/EarlyCSE/floatingpoint.ll | 37 +-- llvm/test/Transforms/EarlyCSE/memoryssa.ll | 173 +++++++++----- llvm/test/Transforms/EarlyCSE/pr33406.ll | 20 +- .../Transforms/EarlyCSE/readnone-mayunwind.ll | 9 +- llvm/test/Transforms/EarlyCSE/writeonly.ll | 8 +- 8 files changed, 439 insertions(+), 246 deletions(-) diff --git a/llvm/test/Transforms/EarlyCSE/atomics.ll b/llvm/test/Transforms/EarlyCSE/atomics.ll index 4a4b76666344a..4d67858237bc9 100644 --- a/llvm/test/Transforms/EarlyCSE/atomics.ll +++ b/llvm/test/Transforms/EarlyCSE/atomics.ll @@ -1,71 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s -; CHECK-LABEL: @test12( define i32 @test12(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]] +; CHECK-NEXT: ret i32 [[SEL]] +; %load0 = load i32, i32* %P1 %1 = load atomic i32, i32* %P2 seq_cst, align 4 %load1 = load i32, i32* %P1 %sel = select i1 %B, i32 %load0, i32 %load1 ret i32 %sel - ; CHECK: load i32, i32* %P1 - ; CHECK: load i32, i32* %P1 } -; CHECK-LABEL: @test13( ; atomic to non-atomic forwarding is legal define i32 @test13(i1 %B, i32* %P1) { +; CHECK-LABEL: @test13( +; CHECK-NEXT: [[A:%.*]] = load atomic i32, i32* [[P1:%.*]] seq_cst, align 4 +; CHECK-NEXT: ret i32 0 +; %a = load atomic i32, i32* %P1 seq_cst, align 4 %b = load i32, i32* %P1 %res = sub i32 %a, %b ret i32 %res - ; CHECK: load atomic i32, i32* %P1 - ; CHECK: ret i32 0 } -; CHECK-LABEL: @test14( ; atomic to unordered atomic forwarding is legal define i32 @test14(i1 %B, i32* %P1) { +; CHECK-LABEL: @test14( +; CHECK-NEXT: [[A:%.*]] = load atomic i32, i32* [[P1:%.*]] seq_cst, align 4 +; CHECK-NEXT: ret i32 0 +; %a = load atomic i32, i32* %P1 seq_cst, align 4 %b = load atomic i32, i32* %P1 unordered, align 4 %res = sub i32 %a, %b ret i32 %res - ; CHECK: load atomic i32, i32* %P1 seq_cst - ; CHECK-NEXT: ret i32 0 } -; CHECK-LABEL: @test15( ; implementation restriction: can't forward to stonger ; than unordered define i32 @test15(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test15( +; CHECK-NEXT: [[A:%.*]] = load atomic i32, i32* [[P1:%.*]] seq_cst, align 4 +; CHECK-NEXT: [[B:%.*]] = load atomic i32, i32* [[P1]] seq_cst, align 4 +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[RES]] +; %a = load atomic i32, i32* %P1 seq_cst, align 4 %b = load atomic i32, i32* %P1 seq_cst, align 4 %res = sub i32 %a, %b ret i32 %res - ; CHECK: load atomic i32, i32* %P1 - ; CHECK: load atomic i32, i32* %P1 } -; CHECK-LABEL: @test16( ; forwarding non-atomic to atomic is wrong! (However, ; it would be legal to use the later value in place of the ; former in this particular example. We just don't ; do that right now.) define i32 @test16(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test16( +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[B:%.*]] = load atomic i32, i32* [[P1]] unordered, align 4 +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[RES]] +; %a = load i32, i32* %P1, align 4 %b = load atomic i32, i32* %P1 unordered, align 4 %res = sub i32 %a, %b ret i32 %res - ; CHECK: load i32, i32* %P1 - ; CHECK: load atomic i32, i32* %P1 } ; Can't DSE across a full fence define void @fence_seq_cst_store(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @fence_seq_cst_store -; CHECK: store -; CHECK: store atomic -; CHECK: store +; CHECK-LABEL: @fence_seq_cst_store( +; CHECK-NEXT: store i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: store atomic i32 0, i32* [[P2:%.*]] seq_cst, align 4 +; CHECK-NEXT: store i32 0, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store i32 0, i32* %P1, align 4 store atomic i32 0, i32* %P2 seq_cst, align 4 store i32 0, i32* %P1, align 4 @@ -74,10 +89,12 @@ define void @fence_seq_cst_store(i1 %B, i32* %P1, i32* %P2) { ; Can't DSE across a full fence define void @fence_seq_cst(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @fence_seq_cst -; CHECK: store -; CHECK: fence seq_cst -; CHECK: store +; CHECK-LABEL: @fence_seq_cst( +; CHECK-NEXT: store i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: store i32 0, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store i32 0, i32* %P1, align 4 fence seq_cst store i32 0, i32* %P1, align 4 @@ -86,10 +103,12 @@ define void @fence_seq_cst(i1 %B, i32* %P1, i32* %P2) { ; Can't DSE across a full fence define void @fence_asm_sideeffect(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @fence_asm_sideeffect -; CHECK: store -; CHECK: call void asm sideeffect -; CHECK: store +; CHECK-LABEL: @fence_asm_sideeffect( +; CHECK-NEXT: store i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: call void asm sideeffect "", ""() +; CHECK-NEXT: store i32 0, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store i32 0, i32* %P1, align 4 call void asm sideeffect "", ""() store i32 0, i32* %P1, align 4 @@ -98,10 +117,12 @@ define void @fence_asm_sideeffect(i1 %B, i32* %P1, i32* %P2) { ; Can't DSE across a full fence define void @fence_asm_memory(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @fence_asm_memory -; CHECK: store -; CHECK: call void asm -; CHECK: store +; CHECK-LABEL: @fence_asm_memory( +; CHECK-NEXT: store i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: call void asm "", "~{memory}"() +; CHECK-NEXT: store i32 0, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store i32 0, i32* %P1, align 4 call void asm "", "~{memory}"() store i32 0, i32* %P1, align 4 @@ -110,32 +131,39 @@ define void @fence_asm_memory(i1 %B, i32* %P1, i32* %P2) { ; Can't remove a volatile load define i32 @volatile_load(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @volatile_load( +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[B:%.*]] = load volatile i32, i32* [[P1]], align 4 +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[RES]] +; %a = load i32, i32* %P1, align 4 %b = load volatile i32, i32* %P1, align 4 %res = sub i32 %a, %b ret i32 %res - ; CHECK-LABEL: @volatile_load - ; CHECK: load i32, i32* %P1 - ; CHECK: load volatile i32, i32* %P1 } ; Can't remove redundant volatile loads define i32 @redundant_volatile_load(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @redundant_volatile_load( +; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[B:%.*]] = load volatile i32, i32* [[P1]], align 4 +; CHECK-NEXT: [[RES:%.*]] = sub i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[RES]] +; %a = load volatile i32, i32* %P1, align 4 %b = load volatile i32, i32* %P1, align 4 %res = sub i32 %a, %b ret i32 %res - ; CHECK-LABEL: @redundant_volatile_load - ; CHECK: load volatile i32, i32* %P1 - ; CHECK: load volatile i32, i32* %P1 - ; CHECK: sub } ; Can't DSE a volatile store define void @volatile_store(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @volatile_store -; CHECK: store volatile -; CHECK: store +; CHECK-LABEL: @volatile_store( +; CHECK-NEXT: store volatile i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store volatile i32 0, i32* %P1, align 4 store i32 3, i32* %P1, align 4 ret void @@ -143,9 +171,11 @@ define void @volatile_store(i1 %B, i32* %P1, i32* %P2) { ; Can't DSE a redundant volatile store define void @redundant_volatile_store(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @redundant_volatile_store -; CHECK: store volatile -; CHECK: store volatile +; CHECK-LABEL: @redundant_volatile_store( +; CHECK-NEXT: store volatile i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: store volatile i32 0, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store volatile i32 0, i32* %P1, align 4 store volatile i32 0, i32* %P1, align 4 ret void @@ -153,21 +183,24 @@ define void @redundant_volatile_store(i1 %B, i32* %P1, i32* %P2) { ; Can value forward from volatiles define i32 @test20(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test20( +; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: ret i32 0 +; %a = load volatile i32, i32* %P1, align 4 %b = load i32, i32* %P1, align 4 %res = sub i32 %a, %b ret i32 %res - ; CHECK-LABEL: @test20 - ; CHECK: load volatile i32, i32* %P1 - ; CHECK: ret i32 0 } ; Can DSE a non-volatile store in favor of a volatile one ; currently a missed optimization define void @test21(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test21 -; CHECK: store -; CHECK: store volatile +; CHECK-LABEL: @test21( +; CHECK-NEXT: store i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: store volatile i32 3, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store i32 0, i32* %P1, align 4 store volatile i32 3, i32* %P1, align 4 ret void @@ -175,8 +208,10 @@ define void @test21(i1 %B, i32* %P1, i32* %P2) { ; Can DSE a normal store in favor of a unordered one define void @test22(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test22 -; CHECK-NEXT: store atomic +; CHECK-LABEL: @test22( +; CHECK-NEXT: store atomic i32 3, i32* [[P1:%.*]] unordered, align 4 +; CHECK-NEXT: ret void +; store i32 0, i32* %P1, align 4 store atomic i32 3, i32* %P1 unordered, align 4 ret void @@ -184,8 +219,10 @@ define void @test22(i1 %B, i32* %P1, i32* %P2) { ; Can also DSE a unordered store in favor of a normal one define void @test23(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test23 -; CHECK-NEXT: store i32 0 +; CHECK-LABEL: @test23( +; CHECK-NEXT: store i32 0, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: ret void +; store atomic i32 3, i32* %P1 unordered, align 4 store i32 0, i32* %P1, align 4 ret void @@ -195,9 +232,11 @@ define void @test23(i1 %B, i32* %P1, i32* %P2) { ; Note that we could remove the earlier store if we could ; represent the required ordering. define void @test24(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test24 -; CHECK-NEXT: store atomic -; CHECK-NEXT: store i32 0 +; CHECK-LABEL: @test24( +; CHECK-NEXT: store atomic i32 3, i32* [[P1:%.*]] release, align 4 +; CHECK-NEXT: store i32 0, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store atomic i32 3, i32* %P1 release, align 4 store i32 0, i32* %P1, align 4 ret void @@ -206,9 +245,11 @@ define void @test24(i1 %B, i32* %P1, i32* %P2) { ; Can't remove volatile stores - each is independently observable and ; the count of such stores is an observable program side effect. define void @test25(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test25 -; CHECK-NEXT: store volatile -; CHECK-NEXT: store volatile +; CHECK-LABEL: @test25( +; CHECK-NEXT: store volatile i32 3, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: store volatile i32 0, i32* [[P1]], align 4 +; CHECK-NEXT: ret void +; store volatile i32 3, i32* %P1, align 4 store volatile i32 0, i32* %P1, align 4 ret void @@ -216,9 +257,10 @@ define void @test25(i1 %B, i32* %P1, i32* %P2) { ; Can DSE a unordered store in favor of a unordered one define void @test26(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test26 -; CHECK-NEXT: store atomic i32 3, i32* %P1 unordered, align 4 -; CHECK-NEXT: ret +; CHECK-LABEL: @test26( +; CHECK-NEXT: store atomic i32 3, i32* [[P1:%.*]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic i32 0, i32* %P1 unordered, align 4 store atomic i32 3, i32* %P1 unordered, align 4 ret void @@ -227,10 +269,11 @@ define void @test26(i1 %B, i32* %P1, i32* %P2) { ; Can DSE a unordered store in favor of a ordered one, ; but current don't due to implementation limits define void @test27(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test27 -; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4 -; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4 -; CHECK-NEXT: ret +; CHECK-LABEL: @test27( +; CHECK-NEXT: store atomic i32 0, i32* [[P1:%.*]] unordered, align 4 +; CHECK-NEXT: store atomic i32 3, i32* [[P1]] release, align 4 +; CHECK-NEXT: ret void +; store atomic i32 0, i32* %P1 unordered, align 4 store atomic i32 3, i32* %P1 release, align 4 ret void @@ -239,10 +282,11 @@ define void @test27(i1 %B, i32* %P1, i32* %P2) { ; Can DSE an unordered atomic store in favor of an ; ordered one, but current don't due to implementation limits define void @test28(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test28 -; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4 -; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4 -; CHECK-NEXT: ret +; CHECK-LABEL: @test28( +; CHECK-NEXT: store atomic i32 0, i32* [[P1:%.*]] unordered, align 4 +; CHECK-NEXT: store atomic i32 3, i32* [[P1]] release, align 4 +; CHECK-NEXT: ret void +; store atomic i32 0, i32* %P1 unordered, align 4 store atomic i32 3, i32* %P1 release, align 4 ret void @@ -251,9 +295,11 @@ define void @test28(i1 %B, i32* %P1, i32* %P2) { ; As an implementation limitation, can't remove ordered stores ; see also: @test24 define void @test29(i1 %B, i32* %P1, i32* %P2) { -; CHECK-LABEL: @test29 -; CHECK-NEXT: store atomic -; CHECK-NEXT: store atomic +; CHECK-LABEL: @test29( +; CHECK-NEXT: store atomic i32 3, i32* [[P1:%.*]] release, align 4 +; CHECK-NEXT: store atomic i32 0, i32* [[P1]] unordered, align 4 +; CHECK-NEXT: ret void +; store atomic i32 3, i32* %P1 release, align 4 store atomic i32 0, i32* %P1 unordered, align 4 ret void diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll index 5178e5a89e205..df4c5c6c13ac3 100644 --- a/llvm/test/Transforms/EarlyCSE/basic.ll +++ b/llvm/test/Transforms/EarlyCSE/basic.ll @@ -1,62 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -passes=early-cse | FileCheck %s declare void @llvm.assume(i1) nounwind -; CHECK-LABEL: @test1( define void @test1(i8 %V, i32 *%P) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: store i32 23, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[C:%.*]] = zext i8 [[V:%.*]] to i32 +; CHECK-NEXT: store volatile i32 [[C]], i32* [[P]], align 4 +; CHECK-NEXT: store volatile i32 [[C]], i32* [[P]], align 4 +; CHECK-NEXT: [[E:%.*]] = add i32 [[C]], [[C]] +; CHECK-NEXT: store volatile i32 [[E]], i32* [[P]], align 4 +; CHECK-NEXT: store volatile i32 [[E]], i32* [[P]], align 4 +; CHECK-NEXT: store volatile i32 [[E]], i32* [[P]], align 4 +; CHECK-NEXT: ret void +; %A = bitcast i64 42 to double ;; dead %B = add i32 4, 19 ;; constant folds store i32 %B, i32* %P - ; CHECK-NEXT: store i32 23, i32* %P - + %C = zext i8 %V to i32 %D = zext i8 %V to i32 ;; CSE store volatile i32 %C, i32* %P store volatile i32 %D, i32* %P - ; CHECK-NEXT: %C = zext i8 %V to i32 - ; CHECK-NEXT: store volatile i32 %C - ; CHECK-NEXT: store volatile i32 %C - + %E = add i32 %C, %C %F = add i32 %C, %C store volatile i32 %E, i32* %P store volatile i32 %F, i32* %P - ; CHECK-NEXT: %E = add i32 %C, %C - ; CHECK-NEXT: store volatile i32 %E - ; CHECK-NEXT: store volatile i32 %E %G = add nuw i32 %C, %C store volatile i32 %G, i32* %P - ; CHECK-NEXT: store volatile i32 %E ret void } ;; Simple load value numbering. -; CHECK-LABEL: @test2( define i32 @test2(i32 *%P) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 0 +; %V1 = load i32, i32* %P %V2 = load i32, i32* %P %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: ret i32 0 } -; CHECK-LABEL: @test2a( define i32 @test2a(i32 *%P, i1 %b) { +; CHECK-LABEL: @test2a( +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[B:%.*]]) +; CHECK-NEXT: ret i32 0 +; %V1 = load i32, i32* %P tail call void @llvm.assume(i1 %b) %V2 = load i32, i32* %P %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: ret i32 0 } ;; Cross block load value numbering. -; CHECK-LABEL: @test3( define i32 @test3(i32 *%P, i1 %Cond) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: store i32 4, i32* [[P]], align 4 +; CHECK-NEXT: ret i32 42 +; CHECK: F: +; CHECK-NEXT: ret i32 0 +; %V1 = load i32, i32* %P br i1 %Cond, label %T, label %F T: @@ -66,12 +82,19 @@ F: %V2 = load i32, i32* %P %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: F: - ; CHECK: ret i32 0 } -; CHECK-LABEL: @test3a( define i32 @test3a(i32 *%P, i1 %Cond, i1 %b) { +; CHECK-LABEL: @test3a( +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: store i32 4, i32* [[P]], align 4 +; CHECK-NEXT: ret i32 42 +; CHECK: F: +; CHECK-NEXT: tail call void @llvm.assume(i1 [[B:%.*]]) +; CHECK-NEXT: ret i32 0 +; %V1 = load i32, i32* %P br i1 %Cond, label %T, label %F T: @@ -82,13 +105,20 @@ F: %V2 = load i32, i32* %P %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: F: - ; CHECK: ret i32 0 } ;; Cross block load value numbering stops when stores happen. -; CHECK-LABEL: @test4( define i32 @test4(i32 *%P, i1 %Cond) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i32 42 +; CHECK: F: +; CHECK-NEXT: store i32 42, i32* [[P]], align 4 +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[V1]], 42 +; CHECK-NEXT: ret i32 [[DIFF]] +; %V1 = load i32, i32* %P br i1 %Cond, label %T, label %F T: @@ -96,142 +126,166 @@ T: F: ; Clobbers V1 store i32 42, i32* %P - + %V2 = load i32, i32* %P %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: F: - ; CHECK: ret i32 %Diff } declare i32 @func(i32 *%P) readonly ;; Simple call CSE'ing. -; CHECK-LABEL: @test5( define i32 @test5(i32 *%P) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[V1:%.*]] = call i32 @func(i32* [[P:%.*]]) +; CHECK-NEXT: ret i32 0 +; %V1 = call i32 @func(i32* %P) %V2 = call i32 @func(i32* %P) %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: ret i32 0 } ;; Trivial Store->load forwarding -; CHECK-LABEL: @test6( define i32 @test6(i32 *%P) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: store i32 42, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 42 +; store i32 42, i32* %P %V1 = load i32, i32* %P ret i32 %V1 - ; CHECK: ret i32 42 } -; CHECK-LABEL: @test6a( define i32 @test6a(i32 *%P, i1 %b) { +; CHECK-LABEL: @test6a( +; CHECK-NEXT: store i32 42, i32* [[P:%.*]], align 4 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[B:%.*]]) +; CHECK-NEXT: ret i32 42 +; store i32 42, i32* %P tail call void @llvm.assume(i1 %b) %V1 = load i32, i32* %P ret i32 %V1 - ; CHECK: ret i32 42 } ;; Trivial dead store elimination. -; CHECK-LABEL: @test7( define void @test7(i32 *%P) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: store i32 45, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; store i32 42, i32* %P store i32 45, i32* %P ret void - ; CHECK-NEXT: store i32 45 - ; CHECK-NEXT: ret void } ;; Readnone functions aren't invalidated by stores. -; CHECK-LABEL: @test8( define i32 @test8(i32 *%P) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[V1:%.*]] = call i32 @func(i32* [[P:%.*]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: store i32 4, i32* [[P]], align 4 +; CHECK-NEXT: ret i32 0 +; %V1 = call i32 @func(i32* %P) readnone store i32 4, i32* %P %V2 = call i32 @func(i32* %P) readnone %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: ret i32 0 } ;; Trivial DSE can't be performed across a readonly call. The call ;; can observe the earlier write. -; CHECK-LABEL: @test9( define i32 @test9(i32 *%P) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: store i32 4, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[V1:%.*]] = call i32 @func(i32* [[P]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: store i32 5, i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V1]] +; store i32 4, i32* %P %V1 = call i32 @func(i32* %P) readonly - store i32 5, i32* %P + store i32 5, i32* %P ret i32 %V1 - ; CHECK: store i32 4, i32* %P - ; CHECK-NEXT: %V1 = call i32 @func(i32* %P) - ; CHECK-NEXT: store i32 5, i32* %P - ; CHECK-NEXT: ret i32 %V1 } ;; Trivial DSE can be performed across a readnone call. -; CHECK-LABEL: @test10 define i32 @test10(i32 *%P) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[V1:%.*]] = call i32 @func(i32* [[P:%.*]]) #[[ATTR2]] +; CHECK-NEXT: store i32 5, i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V1]] +; store i32 4, i32* %P %V1 = call i32 @func(i32* %P) readnone - store i32 5, i32* %P + store i32 5, i32* %P ret i32 %V1 - ; CHECK-NEXT: %V1 = call i32 @func(i32* %P) - ; CHECK-NEXT: store i32 5, i32* %P - ; CHECK-NEXT: ret i32 %V1 } ;; Trivial dead store elimination - should work for an entire series of dead stores too. -; CHECK-LABEL: @test11( define void @test11(i32 *%P) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: store i32 45, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; store i32 42, i32* %P store i32 43, i32* %P store i32 44, i32* %P store i32 45, i32* %P ret void - ; CHECK-NEXT: store i32 45 - ; CHECK-NEXT: ret void } -; CHECK-LABEL: @test12( define i32 @test12(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]] +; CHECK-NEXT: ret i32 [[SEL]] +; %load0 = load i32, i32* %P1 %1 = load atomic i32, i32* %P2 seq_cst, align 4 %load1 = load i32, i32* %P1 %sel = select i1 %B, i32 %load0, i32 %load1 ret i32 %sel - ; CHECK: load i32, i32* %P1 - ; CHECK: load i32, i32* %P1 } define void @dse1(i32 *%P) { -; CHECK-LABEL: @dse1 -; CHECK-NOT: store +; CHECK-LABEL: @dse1( +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; %v = load i32, i32* %P store i32 %v, i32* %P ret void } define void @dse2(i32 *%P) { -; CHECK-LABEL: @dse2 -; CHECK-NOT: store +; CHECK-LABEL: @dse2( +; CHECK-NEXT: [[V:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: ret void +; %v = load atomic i32, i32* %P seq_cst, align 4 store i32 %v, i32* %P ret void } define void @dse3(i32 *%P) { -; CHECK-LABEL: @dse3 -; CHECK-NOT: store +; CHECK-LABEL: @dse3( +; CHECK-NEXT: [[V:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4 +; CHECK-NEXT: ret void +; %v = load atomic i32, i32* %P seq_cst, align 4 store atomic i32 %v, i32* %P unordered, align 4 ret void } define i32 @dse4(i32 *%P, i32 *%Q) { -; CHECK-LABEL: @dse4 -; CHECK-NOT: store -; CHECK: ret i32 0 +; CHECK-LABEL: @dse4( +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4 +; CHECK-NEXT: [[V:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: ret i32 0 +; %a = load i32, i32* %Q %v = load atomic i32, i32* %P unordered, align 4 store atomic i32 %v, i32* %P unordered, align 4 @@ -242,14 +296,16 @@ define i32 @dse4(i32 *%P, i32 *%Q) { ; Note that in this example, %P and %Q could in fact be the same ; pointer. %v could be different than the value observed for %a -; and that's okay because we're using relaxed memory ordering. -; The only guarantee we have to provide is that each of the loads -; has to observe some value written to that location. We do -; not have to respect the order in which those writes were done. +; and that's okay because we're using relaxed memory ordering. +; The only guarantee we have to provide is that each of the loads +; has to observe some value written to that location. We do +; not have to respect the order in which those writes were done. define i32 @dse5(i32 *%P, i32 *%Q) { -; CHECK-LABEL: @dse5 -; CHECK-NOT: store -; CHECK: ret i32 0 +; CHECK-LABEL: @dse5( +; CHECK-NEXT: [[V:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4 +; CHECK-NEXT: [[A:%.*]] = load atomic i32, i32* [[Q:%.*]] unordered, align 4 +; CHECK-NEXT: ret i32 0 +; %v = load atomic i32, i32* %P unordered, align 4 %a = load atomic i32, i32* %Q unordered, align 4 store atomic i32 %v, i32* %P unordered, align 4 @@ -260,8 +316,10 @@ define i32 @dse5(i32 *%P, i32 *%Q) { define void @dse_neg1(i32 *%P) { -; CHECK-LABEL: @dse_neg1 -; CHECK: store +; CHECK-LABEL: @dse_neg1( +; CHECK-NEXT: store i32 5, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret void +; %v = load i32, i32* %P store i32 5, i32* %P ret void @@ -270,8 +328,11 @@ define void @dse_neg1(i32 *%P) { ; Could remove the store, but only if ordering was somehow ; encoded. define void @dse_neg2(i32 *%P) { -; CHECK-LABEL: @dse_neg2 -; CHECK: store +; CHECK-LABEL: @dse_neg2( +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: store atomic i32 [[V]], i32* [[P]] seq_cst, align 4 +; CHECK-NEXT: ret void +; %v = load i32, i32* %P store atomic i32 %v, i32* %P seq_cst, align 4 ret void @@ -280,11 +341,14 @@ define void @dse_neg2(i32 *%P) { @c = external global i32, align 4 declare i32 @reads_c(i32 returned) define void @pr28763() { -entry: ; CHECK-LABEL: @pr28763( -; CHECK: store i32 0, i32* @c, align 4 -; CHECK: call i32 @reads_c(i32 0) -; CHECK: store i32 2, i32* @c, align 4 +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 0, i32* @c, align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @reads_c(i32 0) +; CHECK-NEXT: store i32 2, i32* @c, align 4 +; CHECK-NEXT: ret void +; +entry: %load = load i32, i32* @c, align 4 store i32 0, i32* @c, align 4 %call = call i32 @reads_c(i32 0) @@ -293,10 +357,12 @@ entry: } define i1 @cse_freeze(i1 %a) { -entry: ; CHECK-LABEL: @cse_freeze( -; CHECK: %b = freeze i1 %a -; CHECK: ret i1 %b +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = freeze i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[B]] +; +entry: %b = freeze i1 %a %c = freeze i1 %a %and = and i1 %b, %c diff --git a/llvm/test/Transforms/EarlyCSE/const-speculation.ll b/llvm/test/Transforms/EarlyCSE/const-speculation.ll index a531c14da770c..bf4469ca37331 100644 --- a/llvm/test/Transforms/EarlyCSE/const-speculation.ll +++ b/llvm/test/Transforms/EarlyCSE/const-speculation.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -early-cse -earlycse-debug-hash -S %s | FileCheck %s %mystruct = type { i32 } @@ -15,15 +16,20 @@ ; crash. define i1 @test_constant_speculation() { -; CHECK-LABEL: define i1 @test_constant_speculation +; CHECK-LABEL: @test_constant_speculation( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 undef, label [[END:%.*]], label [[SELECT:%.*]] +; CHECK: select: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP:%.*]] = phi i32* [ null, [[ENTRY:%.*]] ], [ getelementptr inbounds ([[MYSTRUCT:%.*]], %mystruct* @var, i64 0, i32 0), [[SELECT]] ] +; CHECK-NEXT: [[RES:%.*]] = icmp eq i32* [[TMP]], null +; CHECK-NEXT: ret i1 [[RES]] +; entry: br i1 undef, label %end, label %select select: -; CHECK: select: -; CHECK-NOT: icmp -; CHECK-NOT: getelementptr -; CHECK-NOT: select %tst = icmp eq i32 1, 0 %elt = getelementptr %mystruct, %mystruct* @var, i64 0, i32 0 @@ -31,8 +37,6 @@ select: br label %end end: -; CHECK: end: -; CHECK: %tmp = phi i32* [ null, %entry ], [ getelementptr inbounds (%mystruct, %mystruct* @var, i64 0, i32 0), %select ] %tmp = phi i32* [null, %entry], [%sel, %select] %res = icmp eq i32* %tmp, null ret i1 %res diff --git a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll index a4293f5eed9c1..c7579adfdd3cd 100644 --- a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll +++ b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll @@ -1,27 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; Ensure we don't simplify away additions vectors of +0.0's (same as scalars). define <4 x float> @fV( <4 x float> %a) { - ; CHECK: %b = fadd <4 x float> %a, zeroinitializer - %b = fadd <4 x float> %a, - ret <4 x float> %b +; CHECK-LABEL: @fV( +; CHECK-NEXT: [[B:%.*]] = fadd <4 x float> [[A:%.*]], zeroinitializer +; CHECK-NEXT: ret <4 x float> [[B]] +; + %b = fadd <4 x float> %a, + ret <4 x float> %b } define <4 x float> @fW( <4 x float> %a) { - ; CHECK: ret <4 x float> %a - %b = fadd <4 x float> %a, - ret <4 x float> %b +; CHECK-LABEL: @fW( +; CHECK-NEXT: ret <4 x float> [[A:%.*]] +; + %b = fadd <4 x float> %a, + ret <4 x float> %b } ; CSE unary fnegs. define void @fX(<4 x float> *%p, <4 x float> %a) { - ; CHECK: %x = fneg <4 x float> %a - ; CHECK-NEXT: store volatile <4 x float> %x, <4 x float>* %p - ; CHECK-NEXT: store volatile <4 x float> %x, <4 x float>* %p - %x = fneg <4 x float> %a - %y = fneg <4 x float> %a - store volatile <4 x float> %x, <4 x float>* %p - store volatile <4 x float> %y, <4 x float>* %p - ret void +; CHECK-LABEL: @fX( +; CHECK-NEXT: [[X:%.*]] = fneg <4 x float> [[A:%.*]] +; CHECK-NEXT: store volatile <4 x float> [[X]], <4 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: store volatile <4 x float> [[X]], <4 x float>* [[P]], align 16 +; CHECK-NEXT: ret void +; + %x = fneg <4 x float> %a + %y = fneg <4 x float> %a + store volatile <4 x float> %x, <4 x float>* %p + store volatile <4 x float> %y, <4 x float>* %p + ret void } diff --git a/llvm/test/Transforms/EarlyCSE/memoryssa.ll b/llvm/test/Transforms/EarlyCSE/memoryssa.ll index 23c7137ca8c63..730e8104452dc 100644 --- a/llvm/test/Transforms/EarlyCSE/memoryssa.ll +++ b/llvm/test/Transforms/EarlyCSE/memoryssa.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s --check-prefix=CHECK-NOMEMSSA ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s ; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefix=CHECK-NOMEMSSA @@ -8,61 +9,86 @@ @G3 = global i32 zeroinitializer ;; Simple load value numbering across non-clobbering store. -; CHECK-LABEL: @test1( -; CHECK-NOMEMSSA-LABEL: @test1( define i32 @test1() { +; CHECK-NOMEMSSA-LABEL: @test1( +; CHECK-NOMEMSSA-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: store i32 0, i32* @G2, align 4 +; CHECK-NOMEMSSA-NEXT: [[V2:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: [[DIFF:%.*]] = sub i32 [[V1]], [[V2]] +; CHECK-NOMEMSSA-NEXT: ret i32 [[DIFF]] +; +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NEXT: store i32 0, i32* @G2, align 4 +; CHECK-NEXT: ret i32 0 +; %V1 = load i32, i32* @G1 store i32 0, i32* @G2 %V2 = load i32, i32* @G1 - ; CHECK-NOMEMSSA: sub i32 %V1, %V2 %Diff = sub i32 %V1, %V2 ret i32 %Diff - ; CHECK: ret i32 0 } ;; Simple dead store elimination across non-clobbering store. -; CHECK-LABEL: @test2( -; CHECK-NOMEMSSA-LABEL: @test2( define void @test2() { +; CHECK-NOMEMSSA-LABEL: @test2( +; CHECK-NOMEMSSA-NEXT: entry: +; CHECK-NOMEMSSA-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: store i32 0, i32* @G2, align 4 +; CHECK-NOMEMSSA-NEXT: store i32 [[V1]], i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: ret void +; +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NEXT: store i32 0, i32* @G2, align 4 +; CHECK-NEXT: ret void +; entry: %V1 = load i32, i32* @G1 - ; CHECK: store i32 0, i32* @G2 store i32 0, i32* @G2 - ; CHECK-NOT: store - ; CHECK-NOMEMSSA: store i32 %V1, i32* @G1 store i32 %V1, i32* @G1 ret void } ;; Check that memoryphi optimization happens during EarlyCSE, enabling ;; more load CSE opportunities. -; CHECK-LABEL: @test_memphiopt( -; CHECK-NOMEMSSA-LABEL: @test_memphiopt( define void @test_memphiopt(i1 %c, i32* %p) { -; CHECK-LABEL: entry: -; CHECK-NOMEMSSA-LABEL: entry: +; CHECK-NOMEMSSA-LABEL: @test_memphiopt( +; CHECK-NOMEMSSA-NEXT: entry: +; CHECK-NOMEMSSA-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK-NOMEMSSA: then: +; CHECK-NOMEMSSA-NEXT: [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NOMEMSSA-NEXT: br label [[END]] +; CHECK-NOMEMSSA: end: +; CHECK-NOMEMSSA-NEXT: [[V2:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: [[SUM:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-NOMEMSSA-NEXT: store i32 [[SUM]], i32* @G2, align 4 +; CHECK-NOMEMSSA-NEXT: ret void +; +; CHECK-LABEL: @test_memphiopt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[V1]], [[V1]] +; CHECK-NEXT: store i32 [[SUM]], i32* @G2, align 4 +; CHECK-NEXT: ret void +; entry: -; CHECK: load -; CHECK-NOMEMSSA: load %v1 = load i32, i32* @G1 br i1 %c, label %then, label %end -; CHECK-LABEL: then: -; CHECK-NOMEMSSA-LABEL: then: then: -; CHECK: load -; CHECK-NOMEMSSA: load %pv = load i32, i32* %p -; CHECK-NOT: store -; CHECK-NOMEMSSA-NOT: store store i32 %pv, i32* %p br label %end -; CHECK-LABEL: end: -; CHECK-NOMEMSSA-LABEL: end: end: -; CHECK-NOT: load -; CHECK-NOMEMSSA: load %v2 = load i32, i32* @G1 %sum = add i32 %v1, %v2 store i32 %sum, i32* @G2 @@ -72,36 +98,43 @@ end: ;; Check that MemoryPhi optimization and MemoryUse re-optimization ;; happens during EarlyCSE, enabling more load CSE opportunities. -; CHECK-LABEL: @test_memphiopt2( -; CHECK-NOMEMSSA-LABEL: @test_memphiopt2( define void @test_memphiopt2(i1 %c, i32* %p) { -; CHECK-LABEL: entry: -; CHECK-NOMEMSSA-LABEL: entry: +; CHECK-NOMEMSSA-LABEL: @test_memphiopt2( +; CHECK-NOMEMSSA-NEXT: entry: +; CHECK-NOMEMSSA-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: store i32 [[V1]], i32* @G2, align 4 +; CHECK-NOMEMSSA-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK-NOMEMSSA: then: +; CHECK-NOMEMSSA-NEXT: [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NOMEMSSA-NEXT: br label [[END]] +; CHECK-NOMEMSSA: end: +; CHECK-NOMEMSSA-NEXT: [[V2:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NOMEMSSA-NEXT: store i32 [[V2]], i32* @G3, align 4 +; CHECK-NOMEMSSA-NEXT: ret void +; +; CHECK-LABEL: @test_memphiopt2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V1:%.*]] = load i32, i32* @G1, align 4 +; CHECK-NEXT: store i32 [[V1]], i32* @G2, align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: store i32 [[V1]], i32* @G3, align 4 +; CHECK-NEXT: ret void +; entry: -; CHECK: load -; CHECK-NOMEMSSA: load %v1 = load i32, i32* @G1 -; CHECK: store -; CHECK-NOMEMSSA: store store i32 %v1, i32* @G2 br i1 %c, label %then, label %end -; CHECK-LABEL: then: -; CHECK-NOMEMSSA-LABEL: then: then: -; CHECK: load -; CHECK-NOMEMSSA: load %pv = load i32, i32* %p -; CHECK-NOT: store -; CHECK-NOMEMSSA-NOT: store store i32 %pv, i32* %p br label %end -; CHECK-LABEL: end: -; CHECK-NOMEMSSA-LABEL: end: end: -; CHECK-NOT: load -; CHECK-NOMEMSSA: load %v2 = load i32, i32* @G1 store i32 %v2, i32* @G3 ret void @@ -109,39 +142,69 @@ end: ;; Check that we respect lifetime.start/lifetime.end intrinsics when deleting ;; stores that, without the lifetime calls, would be writebacks. -; CHECK-LABEL: @test_writeback_lifetimes( -; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes( define void @test_writeback_lifetimes(i32* %p) { +; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes( +; CHECK-NOMEMSSA-NEXT: entry: +; CHECK-NOMEMSSA-NEXT: [[Q:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 1 +; CHECK-NOMEMSSA-NEXT: [[PV:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NOMEMSSA-NEXT: [[QV:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NOMEMSSA-NEXT: call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]]) +; CHECK-NOMEMSSA-NEXT: call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]]) +; CHECK-NOMEMSSA-NEXT: store i32 [[PV]], i32* [[P]], align 4 +; CHECK-NOMEMSSA-NEXT: store i32 [[QV]], i32* [[Q]], align 4 +; CHECK-NOMEMSSA-NEXT: ret void +; +; CHECK-LABEL: @test_writeback_lifetimes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[Q:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[PV:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: [[QV:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]]) +; CHECK-NEXT: store i32 [[PV]], i32* [[P]], align 4 +; CHECK-NEXT: store i32 [[QV]], i32* [[Q]], align 4 +; CHECK-NEXT: ret void +; entry: %q = getelementptr i32, i32* %p, i64 1 %pv = load i32, i32* %p %qv = load i32, i32* %q call void @llvm.lifetime.end.p0i8(i64 8, i32* %p) call void @llvm.lifetime.start.p0i8(i64 8, i32* %p) - ; CHECK: store i32 %pv - ; CHECK-NOMEMSSA-LABEL: store i32 %pv store i32 %pv, i32* %p - ; CHECK: store i32 %qv, i32* %q - ; CHECK-NOMEMSSA-LABEL: store i32 %qv, i32* %q store i32 %qv, i32* %q ret void } ;; Check that we respect lifetime.start/lifetime.end intrinsics when deleting ;; stores that, without the lifetime calls, would be writebacks. -; CHECK-LABEL: @test_writeback_lifetimes_multi_arg( -; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes_multi_arg( define void @test_writeback_lifetimes_multi_arg(i32* %p, i32* %q) { +; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes_multi_arg( +; CHECK-NOMEMSSA-NEXT: entry: +; CHECK-NOMEMSSA-NEXT: [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NOMEMSSA-NEXT: [[QV:%.*]] = load i32, i32* [[Q:%.*]], align 4 +; CHECK-NOMEMSSA-NEXT: call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]]) +; CHECK-NOMEMSSA-NEXT: call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]]) +; CHECK-NOMEMSSA-NEXT: store i32 [[PV]], i32* [[P]], align 4 +; CHECK-NOMEMSSA-NEXT: store i32 [[QV]], i32* [[Q]], align 4 +; CHECK-NOMEMSSA-NEXT: ret void +; +; CHECK-LABEL: @test_writeback_lifetimes_multi_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[QV:%.*]] = load i32, i32* [[Q:%.*]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]]) +; CHECK-NEXT: store i32 [[PV]], i32* [[P]], align 4 +; CHECK-NEXT: store i32 [[QV]], i32* [[Q]], align 4 +; CHECK-NEXT: ret void +; entry: %pv = load i32, i32* %p %qv = load i32, i32* %q call void @llvm.lifetime.end.p0i8(i64 8, i32* %p) call void @llvm.lifetime.start.p0i8(i64 8, i32* %p) - ; CHECK: store i32 %pv - ; CHECK-NOMEMSSA-LABEL: store i32 %pv store i32 %pv, i32* %p - ; CHECK: store i32 %qv, i32* %q - ; CHECK-NOMEMSSA-LABEL: store i32 %qv, i32* %q store i32 %qv, i32* %q ret void } diff --git a/llvm/test/Transforms/EarlyCSE/pr33406.ll b/llvm/test/Transforms/EarlyCSE/pr33406.ll index 903b8bc9f2ace..e0d2cccb48ac1 100644 --- a/llvm/test/Transforms/EarlyCSE/pr33406.ll +++ b/llvm/test/Transforms/EarlyCSE/pr33406.ll @@ -1,18 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -early-cse-memssa -earlycse-debug-hash -S %s | FileCheck %s -; CHECK: define void @patatino() { -; CHECK: for.cond: -; CHECK-NEXT: br i1 true, label %if.end, label %for.inc -; CHECK: if.end: -; CHECK-NEXT: %tinkywinky = load i32, i32* @b -; CHECK-NEXT: br i1 true, label %for.inc, label %for.inc -; CHECK: for.inc: -; CHECK-NEXT: ret void - - @b = external global i32 define void @patatino() { +; CHECK-LABEL: @patatino( +; CHECK-NEXT: for.cond: +; CHECK-NEXT: br i1 true, label [[IF_END:%.*]], label [[FOR_INC:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[TINKYWINKY:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: br i1 true, label [[FOR_INC]], label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: ret void +; for.cond: br i1 true, label %if.end, label %for.inc diff --git a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll index d83a42780c647..baa050a433d80 100644 --- a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll +++ b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll @@ -1,12 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s declare void @readnone_may_unwind() readnone define void @f(i32* %ptr) { ; CHECK-LABEL: @f( -; CHECK: store i32 100, i32* %ptr -; CHECK: call void @readnone_may_unwind() -; CHECK: store i32 200, i32* %ptr +; CHECK-NEXT: store i32 100, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: call void @readnone_may_unwind() +; CHECK-NEXT: store i32 200, i32* [[PTR]], align 4 +; CHECK-NEXT: ret void +; store i32 100, i32* %ptr call void @readnone_may_unwind() diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll index b28af8535083c..3c95efb012a86 100644 --- a/llvm/test/Transforms/EarlyCSE/writeonly.ll +++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s @var = global i32 undef @@ -5,11 +6,12 @@ declare void @foo() nounwind define void @test() { ; CHECK-LABEL: @test( -; CHECK-NOT: store +; CHECK-NEXT: call void @foo() #[[ATTR1:[0-9]+]] +; CHECK-NEXT: store i32 2, i32* @var, align 4 +; CHECK-NEXT: ret void +; store i32 1, i32* @var -; CHECK: call void @foo() call void @foo() writeonly -; CHECK: store i32 2, i32* @var store i32 2, i32* @var ret void } From 35737df4dcd28534bd3090157c224c19b501278a Mon Sep 17 00:00:00 2001 From: Mubashar Ahmad Date: Thu, 23 Dec 2021 16:37:44 +0000 Subject: [PATCH 002/946] [Clang][AArch64][ARM] Unaligned Access Warning Added Added warning for potential cases of unaligned access when option -mno-unaligned-access has been specified Differential Revision: https://reviews.llvm.org/D116221 --- .../include/clang/Basic/DiagnosticASTKinds.td | 5 + clang/include/clang/Basic/DiagnosticGroups.td | 1 + clang/lib/AST/RecordLayoutBuilder.cpp | 17 + clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 11 +- clang/lib/Driver/ToolChains/Arch/AArch64.h | 1 + clang/lib/Driver/ToolChains/Arch/ARM.cpp | 28 +- clang/lib/Driver/ToolChains/Clang.cpp | 3 +- clang/test/Sema/test-wunaligned-access.c | 516 ++++++++++++++++++ clang/test/Sema/test-wunaligned-access.cpp | 274 ++++++++++ 9 files changed, 846 insertions(+), 10 deletions(-) create mode 100644 clang/test/Sema/test-wunaligned-access.c create mode 100644 clang/test/Sema/test-wunaligned-access.cpp diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index d788c85179142..a89bdff1a10c2 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -590,4 +590,9 @@ def warn_padded_struct_size : Warning< InGroup, DefaultIgnore; def warn_unnecessary_packed : Warning< "packed attribute is unnecessary for %0">, InGroup, DefaultIgnore; + +// -Wunaligned-access +def warn_unaligned_access : Warning< + "field %1 within %0 is less aligned than %2 and is usually due to %0 being " + "packed, which can lead to unaligned accesses">, InGroup, DefaultIgnore; } diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 1bc879a68a8c7..608e16147b1cd 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -543,6 +543,7 @@ def ExplicitInitializeCall : DiagGroup<"explicit-initialize-call">; def OrderedCompareFunctionPointers : DiagGroup<"ordered-compare-function-pointers">; def Packed : DiagGroup<"packed">; def Padded : DiagGroup<"padded">; +def UnalignedAccess : DiagGroup<"unaligned-access">; def PessimizingMove : DiagGroup<"pessimizing-move">; def ReturnStdMove : DiagGroup<"return-std-move">; diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 3e39ec1c718d1..61a30ead165ef 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -2021,6 +2021,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, CharUnits UnpackedFieldAlign = !DefaultsToAIXPowerAlignment ? FieldAlign : PreferredAlign; CharUnits UnpackedFieldOffset = FieldOffset; + CharUnits OriginalFieldAlign = UnpackedFieldAlign; if (FieldPacked) { FieldAlign = CharUnits::One(); @@ -2105,6 +2106,22 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, // Remember max struct/class ABI-specified alignment. UnadjustedAlignment = std::max(UnadjustedAlignment, FieldAlign); UpdateAlignment(FieldAlign, UnpackedFieldAlign, PreferredAlign); + + // For checking the alignment of inner fields against + // the alignment of its parent record. + if (const RecordDecl *RD = D->getParent()) { + // Check if packed attribute or pragma pack is present. + if (RD->hasAttr() || !MaxFieldAlignment.isZero()) + if (FieldAlign < OriginalFieldAlign) + if (D->getType()->isRecordType()) { + // If the offset is a multiple of the alignment of + // the type, raise the warning. + // TODO: Takes no account the alignment of the outer struct + if (FieldOffset % OriginalFieldAlign != 0) + Diag(D->getLocation(), diag::warn_unaligned_access) + << Context.getTypeDeclType(RD) << D->getName() << D->getType(); + } + } } void ItaniumRecordLayoutBuilder::FinishLayout(const NamedDecl *D) { diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 9ffb5d73b2aad..89a77a368ef02 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -221,6 +221,7 @@ getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, void aarch64::getAArch64TargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, std::vector &Features, bool ForAS) { Arg *A; @@ -464,10 +465,16 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, options::OPT_munaligned_access)) { - if (A->getOption().matches(options::OPT_mno_unaligned_access)) + if (A->getOption().matches(options::OPT_mno_unaligned_access)) { Features.push_back("+strict-align"); - } else if (Triple.isOSOpenBSD()) + if (!ForAS) + CmdArgs.push_back("-Wunaligned-access"); + } + } else if (Triple.isOSOpenBSD()) { Features.push_back("+strict-align"); + if (!ForAS) + CmdArgs.push_back("-Wunaligned-access"); + } if (Args.hasArg(options::OPT_ffixed_x1)) Features.push_back("+reserve-x1"); diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.h b/clang/lib/Driver/ToolChains/Arch/AArch64.h index d47c402d4a42d..0cdc2ec725e02 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.h +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.h @@ -22,6 +22,7 @@ namespace aarch64 { void getAArch64TargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, std::vector &Features, bool ForAS); diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 4013cf230026b..1055d7800b63e 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -769,10 +769,12 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple, } // Kernel code has more strict alignment requirements. - if (KernelOrKext) + if (KernelOrKext) { Features.push_back("+strict-align"); - else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, - options::OPT_munaligned_access)) { + if (!ForAS) + CmdArgs.push_back("-Wunaligned-access"); + } else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, + options::OPT_munaligned_access)) { if (A->getOption().matches(options::OPT_munaligned_access)) { // No v6M core supports unaligned memory access (v6M ARM ARM A3.2). if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) @@ -781,8 +783,11 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple, // access either. else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline) D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base"; - } else + } else { Features.push_back("+strict-align"); + if (!ForAS) + CmdArgs.push_back("-Wunaligned-access"); + } } else { // Assume pre-ARMv6 doesn't support unaligned accesses. // @@ -801,14 +806,23 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple, int VersionNum = getARMSubArchVersionNumber(Triple); if (Triple.isOSDarwin() || Triple.isOSNetBSD()) { if (VersionNum < 6 || - Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) + Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) { Features.push_back("+strict-align"); + if (!ForAS) + CmdArgs.push_back("-Wunaligned-access"); + } } else if (Triple.isOSLinux() || Triple.isOSNaCl() || Triple.isOSWindows()) { - if (VersionNum < 7) + if (VersionNum < 7) { Features.push_back("+strict-align"); - } else + if (!ForAS) + CmdArgs.push_back("-Wunaligned-access"); + } + } else { Features.push_back("+strict-align"); + if (!ForAS) + CmdArgs.push_back("-Wunaligned-access"); + } } // llvm does not support reserving registers in general. There is support diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 253c52cf0ba85..96d949be17eea 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -346,7 +346,8 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: - aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, ForAS); + aarch64::getAArch64TargetFeatures(D, Triple, Args, CmdArgs, Features, + ForAS); break; case llvm::Triple::x86: case llvm::Triple::x86_64: diff --git a/clang/test/Sema/test-wunaligned-access.c b/clang/test/Sema/test-wunaligned-access.c new file mode 100644 index 0000000000000..55c2149634d1a --- /dev/null +++ b/clang/test/Sema/test-wunaligned-access.c @@ -0,0 +1,516 @@ +// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm +// REQUIRES: arm-registered-target +// +// This test suite tests the warning triggered by the -Wunaligned-access option. +// The warning occurs when a struct or other type of record contains a field +// that is itself a record. The outer record must be a packed structure, while +// while the inner record must be unpacked. This is the fundamental condition +// for the warning to be triggered. Some of these tests may have three layers. +// +// The command line option -fsyntax-only is not used as Clang needs to be +// forced to layout the structs used in this test. +// The triple in the command line above is used for the assumptions about +// size and alignment of types. + +// Set 1 +struct T1 { + char a; + int b; +}; + +struct __attribute__((packed)) U1 { + char a; + struct T1 b; // expected-warning {{field b within 'struct U1' is less aligned than 'struct T1' and is usually due to 'struct U1' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U2 { + char a; + struct T1 b __attribute__((aligned(2))); // expected-warning {{field b within 'struct U2' is less aligned than 'struct T1' and is usually due to 'struct U2' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U3 { + char a; + struct T1 b __attribute__((aligned(4))); + int c; +}; + +struct __attribute__((aligned(2))) U4 { + char a; + struct T1 b; + int c; +}; + +struct U5 { + char a; + struct T1 b; + int c; +}; + +struct U6 { + char a; + int b; + struct T1 c __attribute__((aligned(2))); +}; + +struct __attribute__((packed)) U7 { + short a; + short b; + char c; + struct T1 d; // expected-warning {{field d within 'struct U7' is less aligned than 'struct T1' and is usually due to 'struct U7' being packed, which can lead to unaligned accesses}} +}; + +struct U8 { + short a; + short b; + char c; + struct T1 d; +}; + +struct __attribute__((packed)) U9 { + short a; + short b; + char c; + struct T1 d __attribute__((aligned(4))); +}; + +struct __attribute__((packed)) U10 { + short a; + short b; + char c; + struct T1 d __attribute__((aligned(2))); // expected-warning {{field d within 'struct U10' is less aligned than 'struct T1' and is usually due to 'struct U10' being packed, which can lead to unaligned accesses}} +}; + +struct __attribute__((aligned(2))) U11 { + short a; + short b; + char c; + struct T1 d; +}; + +// Set 2 +#pragma pack(push, 1) + +struct U12 { + char a; + struct T1 b; // expected-warning {{field b within 'struct U12' is less aligned than 'struct T1' and is usually due to 'struct U12' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U13 { + char a; + struct T1 b; // expected-warning {{field b within 'struct U13' is less aligned than 'struct T1' and is usually due to 'struct U13' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U14 { + char a; + struct T1 b __attribute__((aligned(4))); // expected-warning {{field b within 'struct U14' is less aligned than 'struct T1' and is usually due to 'struct U14' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((aligned(2))) U15 { + char a; + struct T1 b; // expected-warning {{field b within 'struct U15' is less aligned than 'struct T1' and is usually due to 'struct U15' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct U16 { + char a; + char b; + short c; + struct T1 d; +}; + +struct U17 { + char a; + char b; + short c; + struct T1 d __attribute__((aligned(4))); +}; + +struct __attribute__((packed)) U18 { + char a; + short b; + struct T1 c __attribute__((aligned(4))); // expected-warning {{field c within 'struct U18' is less aligned than 'struct T1' and is usually due to 'struct U18' being packed, which can lead to unaligned accesses}} +}; + +struct __attribute__((aligned(4))) U19 { + char a; + struct T1 b; // expected-warning {{field b within 'struct U19' is less aligned than 'struct T1' and is usually due to 'struct U19' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((aligned(4))) U20 { + char a[4]; + struct T1 b; + int c; +}; + +struct U21 { + char a; + short c; + struct T1 d; // expected-warning {{field d within 'struct U21' is less aligned than 'struct T1' and is usually due to 'struct U21' being packed, which can lead to unaligned accesses}} +}; + +struct U22 { + char a; + short c; + struct T1 d __attribute__((aligned(4))); // expected-warning {{field d within 'struct U22' is less aligned than 'struct T1' and is usually due to 'struct U22' being packed, which can lead to unaligned accesses}} +}; + +#pragma pack(pop) + +// Set 3 +#pragma pack(push, 2) + +struct __attribute__((packed)) U23 { + char a; + struct T1 b; // expected-warning {{field b within 'struct U23' is less aligned than 'struct T1' and is usually due to 'struct U23' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct U24 { + char a; + struct T1 b; // expected-warning {{field b within 'struct U24' is less aligned than 'struct T1' and is usually due to 'struct U24' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct U25 { + char a; + char b; + short c; + struct T1 d; +}; + +struct U26 { + char a; + char b; + short c; + struct T1 d; +}; + +#pragma pack(pop) + +// Set 4 + +struct __attribute__((packed)) T2 { + char a; + struct T1 b; // expected-warning {{field b within 'struct T2' is less aligned than 'struct T1' and is usually due to 'struct T2' being packed, which can lead to unaligned accesses}} +}; + +struct T3 { + char a; + struct T1 b; +}; + +struct __attribute__((packed)) U27 { + char a; + struct T2 b; + int c; +}; + +struct U28 { + char a; + char _p[2]; + struct T2 b; + int c; +}; + +struct U29 { + char a; + struct T3 b; + int c; +}; + +struct __attribute__((packed)) U30 { + char a; + struct T3 b; // expected-warning {{field b within 'struct U30' is less aligned than 'struct T3' and is usually due to 'struct U30' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U31 { + char a; + struct T2 b __attribute__((aligned(4))); +}; + +struct __attribute__((packed)) U32 { + char a; + char b; + char c; + char d; + struct T3 e; +}; + +struct __attribute__((packed)) U33 { + char a; + char b; + char c; + char d; + struct T2 e __attribute__((aligned(4))); +}; + +struct __attribute__((packed)) U34 { + char a; + struct T1 b __attribute__((packed)); // expected-warning {{field b within 'struct U34' is less aligned than 'struct T1' and is usually due to 'struct U34' being packed, which can lead to unaligned accesses}} + struct T2 c; +}; + +struct __attribute__((packed)) U35 { + char a; + struct T4 { + char b; + struct T1 c; + } d; // expected-warning {{field d within 'struct U35' is less aligned than 'struct T4' and is usually due to 'struct U35' being packed, which can lead to unaligned accesses}} +}; + +// Set 5 + +#pragma pack(push, 1) +struct T5 { + char a; + struct T1 b; // expected-warning {{field b within 'struct T5' is less aligned than 'struct T1' and is usually due to 'struct T5' being packed, which can lead to unaligned accesses}} +}; +#pragma pack(pop) + +#pragma pack(push, 1) +struct U36 { + char a; + struct T5 b; + int c; +}; + +struct U37 { + char a; + struct T3 b; // expected-warning {{field b within 'struct U37' is less aligned than 'struct T3' and is usually due to 'struct U37' being packed, which can lead to unaligned accesses}} + int c; +}; +#pragma pack(pop) +struct U38 { + char a; + struct T5 b __attribute__((aligned(4))); + int c; +}; + +#pragma pack(push, 1) + +#pragma pack(push, 4) +struct U39 { + char a; + struct T5 b; + int c; +}; +#pragma pack(pop) + +#pragma pack(pop) + +// Set 6 + +struct __attribute__((packed)) A1 { + char a; + struct T1 b; // expected-warning {{field b within 'struct A1' is less aligned than 'struct T1' and is usually due to 'struct A1' being packed, which can lead to unaligned accesses}} +}; + +struct A2 { + char a; + struct T1 b; +}; + +struct __attribute__((packed)) A3 { + char a; + struct T1 b __attribute__((aligned(4))); +}; + +#pragma pack(push, 1) +struct A4 { + char a; + struct T1 b; // expected-warning {{field b within 'struct A4' is less aligned than 'struct T1' and is usually due to 'struct A4' being packed, which can lead to unaligned accesses}} +}; + +struct A5 { + char a; + struct T1 b __attribute__((aligned(4))); // expected-warning {{field b within 'struct A5' is less aligned than 'struct T1' and is usually due to 'struct A5' being packed, which can lead to unaligned accesses}} +}; +#pragma pack(pop) + +struct __attribute__((packed)) A6 { + struct T1 a; +}; + +struct A7 { + char a; + struct T1 b __attribute__((packed)); +}; + +struct A8 { + char a; + char b; + short c; + struct T1 d; +}; + +struct A9 { + char a; + struct T2 b; +}; + +struct A10 { + char a; + struct T2 b __attribute__((aligned(4))); +}; + +struct __attribute__((packed)) A11 { + char a; + struct T2 b; +}; + +struct __attribute__((packed)) U40 { + char a; + struct A1 b; + int c; +}; + +struct __attribute__((packed)) U41 { + char a; + struct A3 b; // expected-warning {{field b within 'struct U41' is less aligned than 'struct A3' and is usually due to 'struct U41' being packed, which can lead to unaligned accesses}} + int c; +}; + +#pragma pack(push, 1) +struct U42 { + char a; + struct A1 b; + int c; +}; +#pragma pack(pop) + +struct __attribute__((packed)) U43 { + char a; + struct A9 b; + int c; +}; + +struct __attribute__((packed)) U44 { + char a; + struct A10 b; // expected-warning {{field b within 'struct U44' is less aligned than 'struct A10' and is usually due to 'struct U44' being packed, which can lead to unaligned accesses}} + int c; +}; + +#pragma pack(push, 1) + +struct U45 { + char a; + struct A10 b; // expected-warning {{field b within 'struct U45' is less aligned than 'struct A10' and is usually due to 'struct U45' being packed, which can lead to unaligned accesses}} + int c; +}; + +#pragma pack(pop) + +struct __attribute__((packed)) U46 { + char a; + struct A2 b; // expected-warning {{field b within 'struct U46' is less aligned than 'struct A2' and is usually due to 'struct U46' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U47 { + char a; + struct A8 b; // expected-warning {{field b within 'struct U47' is less aligned than 'struct A8' and is usually due to 'struct U47' being packed, which can lead to unaligned accesses}} + int c; +}; + +#pragma pack(push, 1) +struct U48 { + char a; + struct A8 b; // expected-warning {{field b within 'struct U48' is less aligned than 'struct A8' and is usually due to 'struct U48' being packed, which can lead to unaligned accesses}} + int c; +}; +#pragma pack(pop) + +struct U49 { + char a; + struct A11 b; + int c; +}; + +struct U50 { + char a; + struct A1 b; + int c; +}; + +struct U51 { + char a; + struct A5 b; + int c; +}; + +struct __attribute__((packed)) U52 { + char a; + struct A6 b; +}; + +struct U53 { + char a; + struct A4 b; +}; + +struct U54 { + char b; + struct A7 c; +}; + +struct U1 s1; +struct U2 s2; +struct U3 s3; +struct U4 s4; +struct U5 s5; +struct U6 s6; +struct U7 s7; +struct U8 s8; +struct U9 s9; +struct U10 s10; +struct U11 s11; +struct U12 s12; +struct U13 s13; +struct U14 s14; +struct U15 s15; +struct U16 s16; +struct U17 s17; +struct U18 s18; +struct U19 s19; +struct U20 s20; +struct U21 s21; +struct U22 s22; +struct U23 s23; +struct U24 s24; +struct U25 s25; +struct U26 s26; +struct U27 s27; +struct U28 s28; +struct U29 s29; +struct U30 s30; +struct U31 s31; +struct U32 s32; +struct U33 s33; +struct U34 s34; +struct U35 s35; +struct U36 s36; +struct U37 s37; +struct U38 s38; +struct U39 s39; +struct U40 s40; +struct U41 s41; +struct U42 s42; +struct U43 s43; +struct U44 s44; +struct U45 s45; +struct U46 s46; +struct U47 s47; +struct U48 s48; +struct U49 s49; +struct U50 s50; +struct U51 s51; +struct U52 s52; +struct U53 s53; +struct U54 s54; diff --git a/clang/test/Sema/test-wunaligned-access.cpp b/clang/test/Sema/test-wunaligned-access.cpp new file mode 100644 index 0000000000000..33f518310b0b1 --- /dev/null +++ b/clang/test/Sema/test-wunaligned-access.cpp @@ -0,0 +1,274 @@ +// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm -o %t +// REQUIRES: arm-registered-target +// +// This test suite tests the warning triggered by the -Wunaligned-access option. +// The warning occurs when a struct or other type of record contains a field +// that is itself a record. The outer record must be a packed structure, while +// while the inner record must be unpacked. This is the fundamental condition +// for the warning to be triggered. Some of these tests may have three layers. +// +// The command line option -fsyntax-only is not used as Clang needs to be +// forced to layout the structs used in this test. +// The triple in the command line above is used for the assumptions about +// size and alignment of types. + +// Packed-Unpacked Tests (No Pragma) + +struct T1 { + char a; + int b; +}; + +struct __attribute__((packed)) U1 { + char a; + T1 b; // expected-warning {{field b within 'U1' is less aligned than 'T1' and is usually due to 'U1' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U2 { + char a; + T1 b __attribute__((aligned(4))); + int c; +}; + +struct __attribute__((packed)) U3 { + char a; + char b; + short c; + T1 d; +}; + +struct __attribute__((packed)) U4 { + T1 a; + int b; +}; + +struct __attribute__((aligned(4), packed)) U5 { + char a; + T1 b; // expected-warning {{field b within 'U5' is less aligned than 'T1' and is usually due to 'U5' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((aligned(4), packed)) U6 { + char a; + char b; + short c; + T1 d; +}; + +// Packed-Unpacked Tests with Pragma + +#pragma pack(push, 1) + +struct __attribute__((packed)) U7 { + char a; + T1 b; // expected-warning {{field b within 'U7' is less aligned than 'T1' and is usually due to 'U7' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((packed)) U8 { + char a; + T1 b __attribute__((aligned(4))); // expected-warning {{field b within 'U8' is less aligned than 'T1' and is usually due to 'U8' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct __attribute__((aligned(4))) U9 { + char a; + T1 b; // expected-warning {{field b within 'U9' is less aligned than 'T1' and is usually due to 'U9' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct U10 { + char a; + T1 b; // expected-warning {{field b within 'U10' is less aligned than 'T1' and is usually due to 'U10' being packed, which can lead to unaligned accesses}} + int c; +}; + +#pragma pack(pop) + +// Packed-Packed Tests + +struct __attribute__((packed)) T2 { + char a; + int b; +}; + +struct __attribute__((packed)) U11 { + char a; + T2 b; + int c; +}; + +#pragma pack(push, 1) +struct U12 { + char a; + T2 b; + int c; +}; +#pragma pack(pop) + +// Unpacked-Packed Tests + +struct U13 { + char a; + T2 b; + int c; +}; + +struct U14 { + char a; + T2 b __attribute__((aligned(4))); + int c; +}; + +// Unpacked-Unpacked Test + +struct T3 { + char a; + int b; +}; + +struct U15 { + char a; + T3 b; + int c; +}; + +// Packed-Packed-Unpacked Test (No pragma) + +struct __attribute__((packed)) A1 { + char a; + T1 b; // expected-warning {{field b within 'A1' is less aligned than 'T1' and is usually due to 'A1' being packed, which can lead to unaligned accesses}} +}; + +struct __attribute__((packed)) U16 { + char a; + A1 b; + int c; +}; + +struct __attribute__((packed)) A2 { + char a; + T1 b __attribute__((aligned(4))); +}; + +struct __attribute__((packed)) U17 { + char a; + A2 b; // expected-warning {{field b within 'U17' is less aligned than 'A2' and is usually due to 'U17' being packed, which can lead to unaligned accesses}} + int c; +}; + +// Packed-Unpacked-Packed tests + +struct A3 { + char a; + T2 b; +}; + +struct __attribute__((packed)) U18 { + char a; + A3 b; + int c; +}; + +struct A4 { + char a; + T2 b; + int c; +}; + +#pragma pack(push, 1) +struct U19 { + char a; + A4 b; // expected-warning {{field b within 'U19' is less aligned than 'A4' and is usually due to 'U19' being packed, which can lead to unaligned accesses}} + int c; +}; +#pragma pack(pop) + +// Packed-Unpacked-Unpacked tests + +struct A5 { + char a; + T1 b; +}; + +struct __attribute__((packed)) U20 { + char a; + A5 b; // expected-warning {{field b within 'U20' is less aligned than 'A5' and is usually due to 'U20' being packed, which can lead to unaligned accesses}} + int c; +}; + +struct A6 { + char a; + T1 b; +}; + +#pragma pack(push, 1) +struct U21 { + char a; + A6 b; // expected-warning {{field b within 'U21' is less aligned than 'A6' and is usually due to 'U21' being packed, which can lead to unaligned accesses}} + int c; +}; +#pragma pack(pop) + +// Unpacked-Packed-Packed test + +struct __attribute__((packed)) A7 { + char a; + T2 b; +}; + +struct U22 { + char a; + A7 b; + int c; +}; + +// Unpacked-Packed-Unpacked tests + +struct __attribute__((packed)) A8 { + char a; + T1 b; // expected-warning {{field b within 'A8' is less aligned than 'T1' and is usually due to 'A8' being packed, which can lead to unaligned accesses}} +}; + +struct U23 { + char a; + A8 b; + int c; +}; + +struct __attribute__((packed)) A9 { + char a; + T1 b __attribute__((aligned(4))); +}; + +struct U24 { + char a; + A9 b; + int c; +}; + +struct U1 s1; +struct U2 s2; +struct U3 s3; +struct U4 s4; +struct U5 s5; +struct U6 s6; +struct U7 s7; +struct U8 s8; +struct U9 s9; +struct U10 s10; +struct U11 s11; +struct U12 s12; +struct U13 s13; +struct U14 s14; +struct U15 s15; +struct U16 s16; +struct U17 s17; +struct U18 s18; +struct U19 s19; +struct U20 s20; +struct U21 s21; +struct U22 s22; +struct U23 s23; +struct U24 s24; \ No newline at end of file From 010a10b738915568d9d04d1f0caa09b5d25dc7b3 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 20 Jan 2022 15:18:47 +0100 Subject: [PATCH 003/946] [flang][NFC] Remove extra braces Noticed during the upstreaming process. --- flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp index 1e671dbb72093..f5616d21d1340 100644 --- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp @@ -76,9 +76,8 @@ class TargetRewrite : public TargetRewriteBase { mlir::OpBuilder rewriter(&context); auto mod = getModule(); - if (!forcedTargetTriple.empty()) { + if (!forcedTargetTriple.empty()) setTargetTriple(mod, forcedTargetTriple); - } auto specifics = CodeGenSpecifics::get(getOperation().getContext(), getTargetTriple(getOperation()), From c95cb4de1b6674e52aebdb7d02c6431843001282 Mon Sep 17 00:00:00 2001 From: Stanislav Gatev Date: Thu, 20 Jan 2022 09:28:25 +0000 Subject: [PATCH 004/946] [clang][dataflow] Intersect ExprToLoc when joining environments This is part of the implementation of the dataflow analysis framework. See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev. Reviewed-by: xazax.hun Differential Revision: https://reviews.llvm.org/D117754 --- .../FlowSensitive/DataflowEnvironment.cpp | 5 +++ .../TypeErasedDataflowAnalysis.cpp | 1 + .../Analysis/FlowSensitive/TransferTest.cpp | 37 +++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index e1d420fb55b82..6fbc3ec42465c 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -83,6 +83,11 @@ LatticeJoinEffect Environment::join(const Environment &Other) { if (DeclToLocSizeBefore != DeclToLoc.size()) Effect = LatticeJoinEffect::Changed; + const unsigned ExprToLocSizeBefore = ExprToLoc.size(); + ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc); + if (ExprToLocSizeBefore != ExprToLoc.size()) + Effect = LatticeJoinEffect::Changed; + // FIXME: Add support for joining distinct values that are assigned to the // same storage locations in `LocToVal` and `Other.LocToVal`. const unsigned LocToValSizeBefore = LocToVal.size(); diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 538cdce206b2f..3782f0f5f69ac 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -23,6 +23,7 @@ #include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/Value.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/Support/raw_ostream.h" diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 5979870f858b1..c1eaf281ddc49 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -1828,4 +1828,41 @@ TEST_F(TransferTest, VarDeclInitAssignConditionalOperator) { }); } +TEST_F(TransferTest, VarDeclInDoWhile) { + std::string Code = R"( + void target(int *Foo) { + do { + int Bar = *Foo; + } while (true); + (void)0; + /*[[p]]*/ + } + )"; + runDataflow(Code, + [](llvm::ArrayRef< + std::pair>> + Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results, ElementsAre(Pair("p", _))); + const Environment &Env = Results[0].second.Env; + + const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); + ASSERT_THAT(FooDecl, NotNull()); + + const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); + ASSERT_THAT(BarDecl, NotNull()); + + const auto *FooVal = + cast(Env.getValue(*FooDecl, SkipPast::None)); + const auto *FooPointeeVal = + cast(Env.getValue(FooVal->getPointeeLoc())); + + const auto *BarVal = dyn_cast_or_null( + Env.getValue(*BarDecl, SkipPast::None)); + ASSERT_THAT(BarVal, NotNull()); + + EXPECT_EQ(BarVal, FooPointeeVal); + }); +} + } // namespace From 6d45284618f08fa28dc515cab96fa573c4c4479e Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Thu, 20 Jan 2022 14:02:04 +0100 Subject: [PATCH 005/946] [mlir][memref] Add better support for identity layouts in memref.collapse_shape canonicalizer When computing the new type of a collapse_shape operation, we need to at least take into account whether the type has an identity layout, in which case we can easily support dynamic strides. Otherwise, the canonicalizer creates invalid IR. Longer term, both the verifier and the canoncializer need to be extended to support the general case. Differential Revision: https://reviews.llvm.org/D117772 --- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 16 ++++++++++++---- mlir/test/Dialect/MemRef/canonicalize.mlir | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 55499f63295f0..211af3045b9d8 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1334,6 +1334,7 @@ computeReshapeCollapsedType(MemRefType type, AffineExpr offset; SmallVector strides; auto status = getStridesAndOffset(type, strides, offset); + auto isIdentityLayout = type.getLayout().isIdentity(); (void)status; assert(succeeded(status) && "expected strided memref"); @@ -1350,12 +1351,19 @@ computeReshapeCollapsedType(MemRefType type, unsigned dim = m.getNumResults(); int64_t size = 1; AffineExpr stride = strides[currentDim + dim - 1]; - if (!isReshapableDimBand(currentDim, dim, sizes, strides)) { + if (isIdentityLayout || + isReshapableDimBand(currentDim, dim, sizes, strides)) { + for (unsigned d = 0; d < dim; ++d) { + int64_t currentSize = sizes[currentDim + d]; + if (ShapedType::isDynamic(currentSize)) { + size = ShapedType::kDynamicSize; + break; + } + size *= currentSize; + } + } else { size = ShapedType::kDynamicSize; stride = AffineExpr(); - } else { - for (unsigned d = 0; d < dim; ++d) - size *= sizes[currentDim + d]; } newSizes.push_back(size); newStrides.push_back(stride); diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index bd7a8dd830a80..58083437ca47e 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -406,6 +406,8 @@ func @collapse_after_memref_cast_type_change(%arg0 : memref) -> m return %collapsed : memref } +// ----- + // CHECK-LABEL: func @collapse_after_memref_cast( // CHECK-SAME: %[[INPUT:.*]]: memref) -> memref { // CHECK: %[[COLLAPSED:.*]] = memref.collapse_shape %[[INPUT]] @@ -419,6 +421,21 @@ func @collapse_after_memref_cast(%arg0 : memref) -> memref) -> memref { +// CHECK: %[[COLLAPSED:.*]] = memref.collapse_shape %[[INPUT]] +// CHECK_SAME: {{\[\[}}0, 1, 2], [3]] : memref<1x1x1x?xi64> into memref<1x?xi64> +// CHECK: %[[DYNAMIC:.*]] = memref.cast %[[COLLAPSED]] : +// CHECK-SAME: memref<1x?xi64> to memref +// CHECK: return %[[DYNAMIC]] : memref +func @collapse_after_memref_cast_type_change_dynamic(%arg0: memref<1x1x1x?xi64>) -> memref { + %casted = memref.cast %arg0 : memref<1x1x1x?xi64> to memref<1x1x?x?xi64> + %collapsed = memref.collapse_shape %casted [[0, 1, 2], [3]] : memref<1x1x?x?xi64> into memref + return %collapsed : memref +} + +// ----- + func @reduced_memref(%arg0: memref<2x5x7x1xf32>, %arg1 :index) -> memref<1x4x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 35 + s0 + d1 * 7 + d2)>> { %c0 = arith.constant 0 : index From 9e24d14ac89f44bb6c9141561ca849ccdd09e6a8 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Thu, 20 Jan 2022 15:05:38 +0100 Subject: [PATCH 006/946] [llvm][vfs] NFC: Virtualize in-memory `getStatus` This patch virtualizes the `getStatus` function on `InMemoryNode` in LLVM VFS. Currently, this is implemented via top-level function `getNodeStatus` that tries to cast `InMemoryNode *` into each subtype. Virtual functions seem to be the simpler solution here. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D117649 --- llvm/lib/Support/VirtualFileSystem.cpp | 29 +++++++++++--------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 7b752b557f8e6..1536f4de2e267 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -574,6 +574,11 @@ class InMemoryNode { } virtual ~InMemoryNode() = default; + /// Return the \p Status for this node. \p RequestedName should be the name + /// through which the caller referred to this node. It will override + /// \p Status::Name in the return value, to mimic the behavior of \p RealFile. + virtual Status getStatus(const Twine &RequestedName) const = 0; + /// Get the filename of this node (the name without the directory part). StringRef getFileName() const { return FileName; } InMemoryNodeKind getKind() const { return Kind; } @@ -589,10 +594,7 @@ class InMemoryFile : public InMemoryNode { : InMemoryNode(Stat.getName(), IME_File), Stat(std::move(Stat)), Buffer(std::move(Buffer)) {} - /// Return the \p Status for this node. \p RequestedName should be the name - /// through which the caller referred to this node. It will override - /// \p Status::Name in the return value, to mimic the behavior of \p RealFile. - Status getStatus(const Twine &RequestedName) const { + Status getStatus(const Twine &RequestedName) const override { return Status::copyWithNewName(Stat, RequestedName); } llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); } @@ -616,6 +618,10 @@ class InMemoryHardLink : public InMemoryNode { : InMemoryNode(Path, IME_HardLink), ResolvedFile(ResolvedFile) {} const InMemoryFile &getResolvedFile() const { return ResolvedFile; } + Status getStatus(const Twine &RequestedName) const override { + return ResolvedFile.getStatus(RequestedName); + } + std::string toString(unsigned Indent) const override { return std::string(Indent, ' ') + "HardLink to -> " + ResolvedFile.toString(0); @@ -668,7 +674,7 @@ class InMemoryDirectory : public InMemoryNode { /// Return the \p Status for this node. \p RequestedName should be the name /// through which the caller referred to this node. It will override /// \p Status::Name in the return value, to mimic the behavior of \p RealFile. - Status getStatus(const Twine &RequestedName) const { + Status getStatus(const Twine &RequestedName) const override { return Status::copyWithNewName(Stat, RequestedName); } @@ -704,17 +710,6 @@ class InMemoryDirectory : public InMemoryNode { } }; -namespace { -Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) { - if (auto Dir = dyn_cast(Node)) - return Dir->getStatus(RequestedName); - if (auto File = dyn_cast(Node)) - return File->getStatus(RequestedName); - if (auto Link = dyn_cast(Node)) - return Link->getResolvedFile().getStatus(RequestedName); - llvm_unreachable("Unknown node type"); -} -} // namespace } // namespace detail // The UniqueID of in-memory files is derived from path and content. @@ -923,7 +918,7 @@ bool InMemoryFileSystem::addHardLink(const Twine &FromPath, llvm::ErrorOr InMemoryFileSystem::status(const Twine &Path) { auto Node = lookupInMemoryNode(*this, Root.get(), Path); if (Node) - return detail::getNodeStatus(*Node, Path); + return (*Node)->getStatus(Path); return Node.getError(); } From 9011903e3613af360c3a1d05c106e464538efd08 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Thu, 20 Jan 2022 15:07:39 +0100 Subject: [PATCH 007/946] [llvm][vfs] Abstract in-memory node creation The creation of in-memory VFS nodes happens in a single function that deduces what kind of node to create from the arguments. This leads to complicated if-then-else logic that's difficult to cleanly extend. This patch abstracts away in-memory node creation via a type-erased factory function that's passed instead. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D117648 --- llvm/include/llvm/Support/VirtualFileSystem.h | 24 +++++-- llvm/lib/Support/VirtualFileSystem.cpp | 65 +++++++++++-------- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 9b5ff8f20ae2d..305096dff67eb 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -419,6 +419,21 @@ namespace detail { class InMemoryDirectory; class InMemoryFile; +class InMemoryNode; + +struct NewInMemoryNodeInfo { + llvm::sys::fs::UniqueID DirUID; + StringRef Path; + StringRef Name; + time_t ModificationTime; + std::unique_ptr Buffer; + uint32_t User; + uint32_t Group; + llvm::sys::fs::file_type Type; + llvm::sys::fs::perms Perms; + + Status makeStatus() const; +}; } // namespace detail @@ -428,14 +443,15 @@ class InMemoryFileSystem : public FileSystem { std::string WorkingDirectory; bool UseNormalizedPaths = true; - /// If HardLinkTarget is non-null, a hardlink is created to the To path which - /// must be a file. If it is null then it adds the file as the public addFile. + using MakeNodeFn = llvm::function_ref( + detail::NewInMemoryNodeInfo)>; + + /// Create node with \p MakeNode and add it into this filesystem at \p Path. bool addFile(const Twine &Path, time_t ModificationTime, std::unique_ptr Buffer, Optional User, Optional Group, Optional Type, - Optional Perms, - const detail::InMemoryFile *HardLinkTarget); + Optional Perms, MakeNodeFn MakeNode); public: explicit InMemoryFileSystem(bool UseNormalizedPaths = true); diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 1536f4de2e267..a963beb180bae 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -729,6 +729,16 @@ static sys::fs::UniqueID getDirectoryID(sys::fs::UniqueID Parent, return getUniqueID(llvm::hash_combine(Parent.getFile(), Name)); } +Status detail::NewInMemoryNodeInfo::makeStatus() const { + UniqueID UID = + (Type == sys::fs::file_type::directory_file) + ? getDirectoryID(DirUID, Name) + : getFileID(DirUID, Name, Buffer ? Buffer->getBuffer() : ""); + + return Status(Path, UID, llvm::sys::toTimePoint(ModificationTime), User, + Group, Buffer ? Buffer->getBufferSize() : 0, Type, Perms); +} + InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths) : Root(new detail::InMemoryDirectory( Status("", getDirectoryID(llvm::sys::fs::UniqueID(), ""), @@ -749,7 +759,7 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, Optional Group, Optional Type, Optional Perms, - const detail::InMemoryFile *HardLinkTarget) { + MakeNodeFn MakeNode) { SmallString<128> Path; P.toVector(Path); @@ -770,7 +780,6 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, const auto ResolvedGroup = Group.getValueOr(0); const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file); const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all); - assert(!(HardLinkTarget && Buffer) && "HardLink cannot have a buffer"); // Any intermediate directories we create should be accessible by // the owner, even if Perms says otherwise for the final path. const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all; @@ -781,27 +790,10 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, if (!Node) { if (I == E) { // End of the path. - std::unique_ptr Child; - if (HardLinkTarget) - Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget)); - else { - // Create a new file or directory. - Status Stat( - P.str(), - (ResolvedType == sys::fs::file_type::directory_file) - ? getDirectoryID(Dir->getUniqueID(), Name) - : getFileID(Dir->getUniqueID(), Name, Buffer->getBuffer()), - llvm::sys::toTimePoint(ModificationTime), ResolvedUser, - ResolvedGroup, Buffer->getBufferSize(), ResolvedType, - ResolvedPerms); - if (ResolvedType == sys::fs::file_type::directory_file) { - Child.reset(new detail::InMemoryDirectory(std::move(Stat))); - } else { - Child.reset( - new detail::InMemoryFile(std::move(Stat), std::move(Buffer))); - } - } - Dir->addChild(Name, std::move(Child)); + Dir->addChild( + Name, MakeNode({Dir->getUniqueID(), Path, Name, ModificationTime, + std::move(Buffer), ResolvedUser, ResolvedGroup, + ResolvedType, ResolvedPerms})); return true; } @@ -845,7 +837,15 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime, Optional Type, Optional Perms) { return addFile(P, ModificationTime, std::move(Buffer), User, Group, Type, - Perms, /*HardLinkTarget=*/nullptr); + Perms, + [](detail::NewInMemoryNodeInfo NNI) + -> std::unique_ptr { + Status Stat = NNI.makeStatus(); + if (Stat.getType() == sys::fs::file_type::directory_file) + return std::make_unique(Stat); + return std::make_unique( + Stat, std::move(NNI.Buffer)); + }); } bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime, @@ -856,7 +856,15 @@ bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime, Optional Perms) { return addFile(P, ModificationTime, llvm::MemoryBuffer::getMemBuffer(Buffer), std::move(User), std::move(Group), std::move(Type), - std::move(Perms)); + std::move(Perms), + [](detail::NewInMemoryNodeInfo NNI) + -> std::unique_ptr { + Status Stat = NNI.makeStatus(); + if (Stat.getType() == sys::fs::file_type::directory_file) + return std::make_unique(Stat); + return std::make_unique( + Stat, std::move(NNI.Buffer)); + }); } static ErrorOr @@ -911,8 +919,11 @@ bool InMemoryFileSystem::addHardLink(const Twine &FromPath, // before. Resolved ToPath must be a File. if (!ToNode || FromNode || !isa(*ToNode)) return false; - return this->addFile(FromPath, 0, nullptr, None, None, None, None, - cast(*ToNode)); + return addFile(FromPath, 0, nullptr, None, None, None, None, + [&](detail::NewInMemoryNodeInfo NNI) { + return std::make_unique( + NNI.Path.str(), *cast(*ToNode)); + }); } llvm::ErrorOr InMemoryFileSystem::status(const Twine &Path) { From 14c5fd920b0e4e05c5deed358af47707e579dd6d Mon Sep 17 00:00:00 2001 From: eopXD Date: Sat, 6 Nov 2021 07:54:58 -0700 Subject: [PATCH 008/946] [Clang][RISCV] Change TARGET_BUILTIN to require zve32x for vector instruction According to v-spec v1.0, `zve-32x` is the new minimum extension to include to have vector instructions. Reviewed By: kito-cheng Differential Revision: https://reviews.llvm.org/D112613 --- clang/utils/TableGen/RISCVVEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index 08724fb353972..d3f1d63185f4a 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -1027,7 +1027,7 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) { OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n"; OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, " - "ATTRS, \"experimental-v\")\n"; + "ATTRS, \"experimental-zve32x\")\n"; OS << "#endif\n"; for (auto &Def : Defs) { auto P = From 4130357f96bf44e73032ada6d86acd1e8f9f74a1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 20 Jan 2022 14:58:23 +0000 Subject: [PATCH 009/946] [X86] Fix v16f32 ADDSUB test This was supposed to ensure we're not generating 512-bit ADDSUB nodes, but cut+paste typos meant we weren't generating a full 512-bit pattern --- llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll | 61 ++++++++++++++++------ 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll index 95587ee7ae6b0..1087ef9193d79 100644 --- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -519,14 +519,14 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) { ; SSE: # %bb.0: ; SSE-NEXT: addsubps %xmm4, %xmm0 ; SSE-NEXT: addsubps %xmm5, %xmm1 -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movaps %xmm1, %xmm3 +; SSE-NEXT: addsubps %xmm6, %xmm2 +; SSE-NEXT: addsubps %xmm7, %xmm3 ; SSE-NEXT: retq ; ; AVX1-LABEL: test17: ; AVX1: # %bb.0: ; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vmovaps %ymm0, %ymm1 +; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX512-LABEL: test17: @@ -543,9 +543,39 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3] ; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm1[3,3,3,3] ; AVX512-NEXT: vaddss %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX512-NEXT: vinsertps {{.*#+}} xmm8 = xmm2[0,1,2],xmm3[0] +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX512-NEXT: vsubss %xmm4, %xmm3, %xmm5 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512-NEXT: vsubss %xmm7, %xmm6, %xmm6 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm4[1,1,3,3] +; AVX512-NEXT: vaddss %xmm2, %xmm7, %xmm2 +; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[2,3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm6[0],xmm2[3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,3,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3] +; AVX512-NEXT: vaddss %xmm4, %xmm3, %xmm3 +; AVX512-NEXT: vinsertps {{.*#+}} xmm9 = xmm2[0,1,2],xmm3[0] +; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 +; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm4 +; AVX512-NEXT: vsubss %xmm4, %xmm2, %xmm5 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] +; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] +; AVX512-NEXT: vsubss %xmm7, %xmm6, %xmm6 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm2[1,1,3,3] +; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm4[1,1,3,3] +; AVX512-NEXT: vaddss %xmm3, %xmm7, %xmm3 +; AVX512-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3] +; AVX512-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm6[0],xmm3[3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3] +; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3] +; AVX512-NEXT: vaddss %xmm4, %xmm2, %xmm2 +; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0] +; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 ; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm3 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] @@ -560,7 +590,8 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] ; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512-NEXT: vinsertf128 $1, %xmm9, %ymm8, %ymm1 +; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512-NEXT: retq %1 = extractelement <16 x float> %A, i32 0 %2 = extractelement <16 x float> %B, i32 0 @@ -588,28 +619,28 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) { %add4 = fadd float %15, %16 %17 = extractelement <16 x float> %A, i32 8 %18 = extractelement <16 x float> %B, i32 8 - %sub5 = fsub float %1, %2 + %sub5 = fsub float %17, %18 %19 = extractelement <16 x float> %A, i32 10 %20 = extractelement <16 x float> %B, i32 10 - %sub6 = fsub float %3, %4 + %sub6 = fsub float %19, %20 %21 = extractelement <16 x float> %A, i32 9 %22 = extractelement <16 x float> %B, i32 9 - %add5 = fadd float %5, %6 + %add5 = fadd float %21, %22 %23 = extractelement <16 x float> %A, i32 11 %24 = extractelement <16 x float> %B, i32 11 - %add6 = fadd float %7, %8 + %add6 = fadd float %23, %24 %25 = extractelement <16 x float> %A, i32 12 %26 = extractelement <16 x float> %B, i32 12 - %sub7 = fsub float %9, %10 + %sub7 = fsub float %25, %26 %27 = extractelement <16 x float> %A, i32 14 %28 = extractelement <16 x float> %B, i32 14 - %sub8 = fsub float %11, %12 + %sub8 = fsub float %27, %28 %29 = extractelement <16 x float> %A, i32 13 %30 = extractelement <16 x float> %B, i32 13 - %add7 = fadd float %13, %14 + %add7 = fadd float %29, %30 %31 = extractelement <16 x float> %A, i32 15 %32 = extractelement <16 x float> %B, i32 15 - %add8 = fadd float %15, %16 + %add8 = fadd float %31, %32 %vecinsert1 = insertelement <16 x float> undef, float %add, i32 1 %vecinsert2 = insertelement <16 x float> %vecinsert1, float %add2, i32 3 %vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub, i32 0 From 81d35f27ddec579f616e3f5f15b25fe4cc92236e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 18 Jan 2022 17:38:08 +0100 Subject: [PATCH 010/946] [DebugInstrRef] Memoize variable order during sorting (NFC) Instead of constructing DebugVariables and looking up the order in the comparison function, compute the order upfront and then sort a vector of (order, instr). This improves compile-time by -0.4% geomean on CTMark ReleaseLTO-g. Differential Revision: https://reviews.llvm.org/D117575 --- .../LiveDebugValues/InstrRefBasedImpl.cpp | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 849f2895b4a58..8a190e7699414 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -2806,31 +2806,28 @@ void InstrRefBasedLDV::emitLocations( } } - // We have to insert DBG_VALUEs in a consistent order, otherwise they appeaer - // in DWARF in different orders. Use the order that they appear when walking - // through each block / each instruction, stored in AllVarsNumbering. - auto OrderDbgValues = [&](const MachineInstr *A, - const MachineInstr *B) -> bool { - DebugVariable VarA(A->getDebugVariable(), A->getDebugExpression(), - A->getDebugLoc()->getInlinedAt()); - DebugVariable VarB(B->getDebugVariable(), B->getDebugExpression(), - B->getDebugLoc()->getInlinedAt()); - return AllVarsNumbering.find(VarA)->second < - AllVarsNumbering.find(VarB)->second; - }; - // Go through all the transfers recorded in the TransferTracker -- this is // both the live-ins to a block, and any movements of values that happen // in the middle. - for (auto &P : TTracker->Transfers) { - // Sort them according to appearance order. - llvm::sort(P.Insts, OrderDbgValues); + for (const auto &P : TTracker->Transfers) { + // We have to insert DBG_VALUEs in a consistent order, otherwise they + // appear in DWARF in different orders. Use the order that they appear + // when walking through each block / each instruction, stored in + // AllVarsNumbering. + SmallVector> Insts; + for (MachineInstr *MI : P.Insts) { + DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(), + MI->getDebugLoc()->getInlinedAt()); + Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI); + } + llvm::sort(Insts, + [](const auto &A, const auto &B) { return A.first < B.first; }); + // Insert either before or after the designated point... if (P.MBB) { MachineBasicBlock &MBB = *P.MBB; - for (auto *MI : P.Insts) { - MBB.insert(P.Pos, MI); - } + for (const auto &Pair : Insts) + MBB.insert(P.Pos, Pair.second); } else { // Terminators, like tail calls, can clobber things. Don't try and place // transfers after them. @@ -2838,9 +2835,8 @@ void InstrRefBasedLDV::emitLocations( continue; MachineBasicBlock &MBB = *P.Pos->getParent(); - for (auto *MI : P.Insts) { - MBB.insertAfterBundle(P.Pos, MI); - } + for (const auto &Pair : Insts) + MBB.insertAfterBundle(P.Pos, Pair.second); } } } From f29256a64ac11cf59cea878c8d1ba9537db4f523 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 19 Jan 2022 21:19:53 -0800 Subject: [PATCH 011/946] [MLGO] Improved support for AOT cross-targeting scenarios The tensorflow AOT compiler can cross-target, but it can't run on (for example) arm64. We added earlier support where the AOT-ed header and object would be built on a separate builder and then passed at build time to a build host where the AOT compiler can't run, but clang can be otherwise built. To simplify such scenarios given we now support more than one AOT-able case (regalloc and inliner), we make the AOT scenario centered on whether files are generated, case by case (this includes the "passed from a different builder" scenario). This means we shouldn't need an 'umbrella' LLVM_HAVE_TF_AOT, in favor of case by case control. A builder can opt out of an AOT case by passing that case's model path as `none`. Note that the overrides still take precedence. This patch controls conditional compilation with case-specific flags, which can be enabled locally, for the component where those are available. We still keep an overall flag for some tests. The 'development/training' mode is unchanged, because there the model is passed from the command line and interpreted. Differential Revision: https://reviews.llvm.org/D117752 --- llvm/CMakeLists.txt | 8 +- llvm/cmake/modules/TensorFlowCompile.cmake | 100 +++++++++--------- llvm/include/llvm/Analysis/InlineAdvisor.h | 4 - llvm/include/llvm/Config/llvm-config.h.cmake | 3 - llvm/lib/Analysis/InlineAdvisor.cpp | 3 + llvm/lib/Analysis/MLInlineAdvisor.cpp | 26 +++-- llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 4 +- llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp | 3 + llvm/lib/CodeGen/RegAllocEvictionAdvisor.h | 5 - .../CodeGen/MLRegalloc/dev-rel-equivalence.ll | 1 - .../Transforms/Inline/ML/bounds-checks.ll | 1 - .../Inline/ML/ml-test-release-mode.ll | 1 - 12 files changed, 80 insertions(+), 79 deletions(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 4f248c58e1822..687bc6489b4ac 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -891,13 +891,17 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "") ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT tf_xla_runtime) set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS tf_xla_runtime) # Once we add more modules, we should handle this more automatically. - if (NOT DEFINED LLVM_INLINER_MODEL_PATH + if (DEFINED LLVM_OVERRIDE_MODEL_HEADER_INLINERSIZEMODEL) + set(LLVM_INLINER_MODEL_PATH "none") + elseif(NOT DEFINED LLVM_INLINER_MODEL_PATH OR "${LLVM_INLINER_MODEL_PATH}" STREQUAL "" OR "${LLVM_INLINER_MODEL_PATH}" STREQUAL "autogenerate") set(LLVM_INLINER_MODEL_PATH "autogenerate") set(LLVM_INLINER_MODEL_AUTOGENERATED 1) endif() - if (NOT DEFINED LLVM_RAEVICT_MODEL_PATH + if (DEFINED LLVM_OVERRIDE_MODEL_HEADER_REGALLOCEVICTMODEL) + set(LLVM_RAEVICT_MODEL_PATH "none") + elseif(NOT DEFINED LLVM_RAEVICT_MODEL_PATH OR "${LLVM_RAEVICT_MODEL_PATH}" STREQUAL "" OR "${LLVM_RAEVICT_MODEL_PATH}" STREQUAL "autogenerate") set(LLVM_RAEVICT_MODEL_PATH "autogenerate") diff --git a/llvm/cmake/modules/TensorFlowCompile.cmake b/llvm/cmake/modules/TensorFlowCompile.cmake index 9427a26de8be6..4f79f6653c88f 100644 --- a/llvm/cmake/modules/TensorFlowCompile.cmake +++ b/llvm/cmake/modules/TensorFlowCompile.cmake @@ -44,35 +44,21 @@ endfunction() # Produce a pair of files called ${fname}.h and ${fname}.o in the # ${CMAKE_CURRENT_BINARY_DIR}. The generated header will define a C++ class # called ${cpp_class} - which may be a namespace-qualified class name. -function(tfcompile model tag_set signature_def_key fname cpp_class) - set(prefix ${CMAKE_CURRENT_BINARY_DIR}/${fname}) - set(obj_file ${prefix}.o) - set(hdr_file ${prefix}.h) - string(TOUPPER ${fname} fname_allcaps) - set(override_header ${LLVM_OVERRIDE_MODEL_HEADER_${fname_allcaps}}) - set(override_object ${LLVM_OVERRIDE_MODEL_OBJECT_${fname_allcaps}}) - if (EXISTS "${override_header}" AND EXISTS "${override_object}") - configure_file(${override_header} ${hdr_file} COPYONLY) - configure_file(${override_object} ${obj_file} COPYONLY) - message("Using provided header " - ${hdr_file} " and object " ${obj_file} - " files for model " ${model}) - else() - tf_get_absolute_path(${model} ${CMAKE_CURRENT_BINARY_DIR} LLVM_ML_MODELS_ABSOLUTE) - message("Using model at " ${LLVM_ML_MODELS_ABSOLUTE}) - add_custom_command(OUTPUT ${obj_file} ${hdr_file} - COMMAND ${TENSORFLOW_AOT_COMPILER} aot_compile_cpu - --multithreading false - --dir ${LLVM_ML_MODELS_ABSOLUTE} - --tag_set ${tag_set} - --signature_def_key ${signature_def_key} - --output_prefix ${prefix} - --cpp_class ${cpp_class} - --target_triple ${LLVM_HOST_TRIPLE} - ) - endif() +function(tf_compile model tag_set signature_def_key fname cpp_class hdr_file obj_file) + tf_get_absolute_path(${model} ${CMAKE_CURRENT_BINARY_DIR} LLVM_ML_MODELS_ABSOLUTE) + message("Using model at " ${LLVM_ML_MODELS_ABSOLUTE}) + add_custom_command(OUTPUT ${obj_file} ${hdr_file} + COMMAND ${TENSORFLOW_AOT_COMPILER} aot_compile_cpu + --multithreading false + --dir ${LLVM_ML_MODELS_ABSOLUTE} + --tag_set ${tag_set} + --signature_def_key ${signature_def_key} + --output_prefix ${prefix} + --cpp_class ${cpp_class} + --target_triple ${LLVM_HOST_TRIPLE} + ) - # Aggregate the objects so that results of different tfcompile calls may be + # Aggregate the objects so that results of different tf_compile calls may be # grouped into one target. set(GENERATED_OBJS ${GENERATED_OBJS} ${obj_file} PARENT_SCOPE) set_source_files_properties(${obj_file} PROPERTIES @@ -82,36 +68,50 @@ function(tfcompile model tag_set signature_def_key fname cpp_class) set_source_files_properties(${hdr_file} PROPERTIES GENERATED 1) -endfunction() + endfunction() function(tf_find_and_compile model default_url default_path test_model_generator tag_set signature_def_key fname cpp_class) - if ("${model}" STREQUAL "download") - # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED" - if ("${default_url}" STREQUAL "TO_BE_UPDATED") - message(FATAL_ERROR "Default URL was set to 'download' but there is no model url currently specified in cmake - likely, the model interface recently changed, and so there is not a released model available.") + set(prefix ${CMAKE_CURRENT_BINARY_DIR}/${fname}) + set(obj_file ${prefix}.o) + set(hdr_file ${prefix}.h) + string(TOUPPER ${fname} fname_allcaps) + set(override_header ${LLVM_OVERRIDE_MODEL_HEADER_${fname_allcaps}}) + set(override_object ${LLVM_OVERRIDE_MODEL_OBJECT_${fname_allcaps}}) + # If the user specified overrides, that indicates intent to use AOT and we + # don't care what the model path is + if (EXISTS "${override_header}" AND EXISTS "${override_object}") + configure_file(${override_header} ${hdr_file} COPYONLY) + configure_file(${override_object} ${obj_file} COPYONLY) + message(STATUS "Using provided header " ${hdr_file} " and object " ${obj_file} " + files for model " ${fname}) + set(GENERATED_OBJS ${GENERATED_OBJS} ${obj_file}) + set(GENERATED_HEADERS ${GENERATED_HEADERS} ${hdr_file}) + elseif("${model}" STREQUAL "none") + message(STATUS "Will skip enabling mlgo for ${fname}") + return() + else() + if ("${model}" STREQUAL "download") + # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED" + if ("${default_url}" STREQUAL "TO_BE_UPDATED") + message(FATAL_ERROR "Default URL was set to 'download' but there is no" + " model url currently specified in cmake - likely, the model interface" + " recently changed, and so there is not a released model available.") + endif() + + set(model ${default_url}) endif() - set(model ${default_url}) - endif() + if ("${model}" STREQUAL "autogenerate") + set(model ${default_path}-autogenerated) + generate_mock_model(${test_model_generator} ${model}) + endif() - if ("${model}" STREQUAL "autogenerate") - set(model ${default_path}-autogenerated) - generate_mock_model(${test_model_generator} ${model}) + tf_get_model(${model} LLVM_ML_MODELS_ABSOLUTE) + tf_compile(${LLVM_ML_MODELS_ABSOLUTE} ${tag_set} ${signature_def_key} ${fname} ${cpp_class} ${hdr_file} ${obj_file}) endif() - tf_get_model(${model} LLVM_ML_MODELS_ABSOLUTE) - tfcompile(${LLVM_ML_MODELS_ABSOLUTE} ${tag_set} ${signature_def_key} ${fname} ${cpp_class}) - - set(GENERATED_OBJS ${GENERATED_OBJS} ${obj_file} PARENT_SCOPE) - set_source_files_properties(${obj_file} PROPERTIES - GENERATED 1 EXTERNAL_OBJECT 1) - - set(GENERATED_HEADERS ${GENERATED_HEADERS} ${hdr_file} PARENT_SCOPE) - set_source_files_properties(${hdr_file} PROPERTIES - GENERATED 1) - set(GeneratedMLSources ${GeneratedMLSources} ${GENERATED_HEADERS} PARENT_SCOPE) set(MLDeps ${MLDeps} tf_xla_runtime PARENT_SCOPE) set(MLLinkDeps ${MLLinkDeps} tf_xla_runtime ${GENERATED_OBJS} PARENT_SCOPE) - + add_definitions(-DLLVM_HAVE_TF_AOT_${fname_allcaps}) endfunction() diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 9403f207e7055..0103ee7f83863 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -246,16 +246,12 @@ class InlineAdvisorAnalysisPrinterPass PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); }; -#ifdef LLVM_HAVE_TF_AOT std::unique_ptr getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM); -#endif -#ifdef LLVM_HAVE_TF_API std::unique_ptr getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM, std::function GetDefaultAdvice); -#endif // Default (manual policy) decision making helper APIs. Shared with the legacy // pass manager inliner. diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index a5edc2084a8a5..ec18b40fe04d9 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -91,9 +91,6 @@ /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */ #cmakedefine LLVM_HAVE_TF_API -/* Define if LLVM was built with a dependency to the tensorflow compiler */ -#cmakedefine LLVM_HAVE_TF_AOT - /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYSEXITS_H ${HAVE_SYSEXITS_H} diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp index 53fa7a6824d30..f6e3dd354ff8e 100644 --- a/llvm/lib/Analysis/InlineAdvisor.cpp +++ b/llvm/lib/Analysis/InlineAdvisor.cpp @@ -27,6 +27,9 @@ using namespace llvm; #define DEBUG_TYPE "inline" +#ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL +#define LLVM_HAVE_TF_AOT +#endif // This weirdly named statistic tracks the number of times that, when attempting // to inline a function A into B, we analyze the callers of B in order to see diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index c6ad1e05dd72c..203e0b025e6c6 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -11,36 +11,38 @@ // 'release' mode) or a runtime-loaded model (the 'development' case). // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) - -#include -#include -#include - +#include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InlineModelFeatureMaps.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/Analysis/MLInlineAdvisor.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ReleaseModeModelRunner.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Config/config.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Path.h" +#include +#include +#include + using namespace llvm; -#ifdef LLVM_HAVE_TF_AOT -#include "llvm/Analysis/ReleaseModeModelRunner.h" +#ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL +#define LLVM_HAVE_TF_AOT +#endif + +#if defined(LLVM_HAVE_TF_AOT) // codegen-ed file #include "InlinerSizeModel.h" // NOLINT -#include "llvm/Analysis/InlineModelFeatureMaps.h" std::unique_ptr llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { @@ -53,6 +55,8 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) { #define DEBUG_TYPE "inline-ml" +#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API) + static cl::opt SizeIncreaseThreshold( "ml-advisor-size-increase-threshold", cl::Hidden, cl::desc("Maximum factor by which expected native size may increase before " diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index 01c843724fe1b..848f63da288de 100644 --- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -42,7 +42,9 @@ using namespace llvm; #define DEBUG_TYPE "ml-regalloc" - +#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL +#define LLVM_HAVE_TF_AOT +#endif // Generated header in release (AOT) mode #if defined LLVM_HAVE_TF_AOT #include "RegallocEvictModel.h" diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index d64e8cd06492b..87df7bb4a6896 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -43,6 +43,9 @@ static cl::opt EnableLocalReassignment( cl::init(false)); #define DEBUG_TYPE "regalloc" +#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL +#define LLVM_HAVE_TF_AOT +#endif char RegAllocEvictionAdvisorAnalysis::ID = 0; INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict", diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index 8ad45c02285c4..33e03aed81a77 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -192,14 +192,9 @@ class RegAllocEvictionAdvisorAnalysis : public ImmutablePass { /// an instance of the eviction advisor. template <> Pass *callDefaultCtor(); -// TODO(mtrofin): implement these. -#ifdef LLVM_HAVE_TF_AOT RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor(); -#endif -#ifdef LLVM_HAVE_TF_API RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor(); -#endif // TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation // out of RegAllocGreedy.cpp diff --git a/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll b/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll index cedf19964996d..6f1b265480b42 100644 --- a/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll +++ b/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll @@ -1,5 +1,4 @@ ; REQUIRES: have_tf_api -; REQUIRES: have_tf_aot ; REQUIRES: llvm_raevict_model_autogenerated ; REQUIRES: x86_64-linux ; diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks.ll b/llvm/test/Transforms/Inline/ML/bounds-checks.ll index a462c6b4722fb..7270fa3a93b1e 100644 --- a/llvm/test/Transforms/Inline/ML/bounds-checks.ll +++ b/llvm/test/Transforms/Inline/ML/bounds-checks.ll @@ -2,7 +2,6 @@ ; In all cases, the end result is the same: mandatory inlinings must happen. ; However, when we discover we 'trip' over the artificially-low size increase ; factor, we don't inline anymore. -; REQUIRES: have_tf_aot ; REQUIRES: llvm_inliner_model_autogenerated ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS diff --git a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll index 7b0a253b0509b..9cbbfdd124e1f 100644 --- a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll +++ b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll @@ -5,7 +5,6 @@ ; This test uses Inputs/test-module.ll, as it will share it with a similar test ; for the 'development' mode. ; -; REQUIRES: have_tf_aot ; REQUIRES: llvm_inliner_model_autogenerated ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT From 866311e71c8f1f3724754d006eb50299424d5b1b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 20 Jan 2022 15:15:46 +0000 Subject: [PATCH 012/946] [X86] lowerToAddSubOrFMAddSub - lower 512-bit ADDSUB patterns to blend(fsub,fadd) AVX512 doesn't provide a ADDSUB instruction, but if we've built this from a build vector of scalar fsub/fadd elements we can still lower to blend(fsub,fadd) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++-- llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll | 98 ++-------------------- 2 files changed, 20 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0d12cf519a8b1..71c80d518f998 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10079,13 +10079,18 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, if (IsSubAdd) return SDValue(); - // Do not generate X86ISD::ADDSUB node for 512-bit types even though - // the ADDSUB idiom has been successfully recognized. There are no known - // X86 targets with 512-bit ADDSUB instructions! - // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom - // recognition. - if (VT.is512BitVector()) - return SDValue(); + // There are no known X86 targets with 512-bit ADDSUB instructions! + // Convert to blend(fsub,fadd). + if (VT.is512BitVector()) { + SmallVector Mask; + for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) { + Mask.push_back(I); + Mask.push_back(I + E + 1); + } + SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1); + return DAG.getVectorShuffle(VT, DL, Sub, Add, Mask); + } return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll index 1087ef9193d79..3ae8a1fd6659a 100644 --- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -531,67 +531,11 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) { ; ; AVX512-LABEL: test17: ; AVX512: # %bb.0: -; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] -; AVX512-NEXT: vsubss %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] -; AVX512-NEXT: vaddss %xmm5, %xmm4, %xmm4 -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3] -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm1[3,3,3,3] -; AVX512-NEXT: vaddss %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vinsertps {{.*#+}} xmm8 = xmm2[0,1,2],xmm3[0] -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX512-NEXT: vsubss %xmm4, %xmm3, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; AVX512-NEXT: vsubss %xmm7, %xmm6, %xmm6 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3] -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm4[1,1,3,3] -; AVX512-NEXT: vaddss %xmm2, %xmm7, %xmm2 -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[2,3] -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm6[0],xmm2[3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,3,3,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3] -; AVX512-NEXT: vaddss %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vinsertps {{.*#+}} xmm9 = xmm2[0,1,2],xmm3[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm4 -; AVX512-NEXT: vsubss %xmm4, %xmm2, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm2[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; AVX512-NEXT: vsubss %xmm7, %xmm6, %xmm6 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm7 = xmm2[1,1,3,3] -; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm4[1,1,3,3] -; AVX512-NEXT: vaddss %xmm3, %xmm7, %xmm3 -; AVX512-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3] -; AVX512-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm6[0],xmm3[3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3] -; AVX512-NEXT: vaddss %xmm4, %xmm2, %xmm2 -; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0] -; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 -; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm3 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] -; AVX512-NEXT: vsubss %xmm5, %xmm4, %xmm4 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] -; AVX512-NEXT: vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3] -; AVX512-NEXT: vaddss %xmm6, %xmm5, %xmm5 -; AVX512-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[2,3] -; AVX512-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3] -; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3] -; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] -; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; AVX512-NEXT: vinsertf128 $1, %xmm9, %ymm8, %ymm1 -; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm2 +; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA +; AVX512-NEXT: kmovw %eax, %k1 +; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} +; AVX512-NEXT: vmovaps %zmm2, %zmm0 ; AVX512-NEXT: retq %1 = extractelement <16 x float> %A, i32 0 %2 = extractelement <16 x float> %B, i32 0 @@ -677,35 +621,9 @@ define <8 x double> @test18(<8 x double> %A, <8 x double> %B) { ; ; AVX512-LABEL: test18: ; AVX512: # %bb.0: -; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX512-NEXT: vsubsd %xmm4, %xmm3, %xmm5 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm7 = xmm1[1,0] -; AVX512-NEXT: vaddsd %xmm7, %xmm6, %xmm6 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm6[0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vaddsd %xmm4, %xmm3, %xmm3 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm5[0],xmm3[0] -; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm4 -; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm5 -; AVX512-NEXT: vsubsd %xmm5, %xmm4, %xmm6 -; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm1 -; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm7 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0] -; AVX512-NEXT: vaddsd %xmm5, %xmm4, %xmm4 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm6[0],xmm4[0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm7[0],xmm0[0] -; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0 -; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1 -; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2 +; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7] ; AVX512-NEXT: retq %1 = extractelement <8 x double> %A, i32 0 %2 = extractelement <8 x double> %B, i32 0 From 0f283de9d195e873006ae522660698b10bd81452 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 20 Jan 2022 16:25:22 +0100 Subject: [PATCH 013/946] [InstSimplify] Add test for reinterpret load of pointer type (NFC) --- llvm/test/Transforms/InstSimplify/ConstProp/loads.ll | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll index afdc9980ab4f9..fb56cb342f4a3 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll @@ -336,3 +336,14 @@ define i32 @load_all_undef() { %v = load i32, i32* getelementptr (i32, i32* bitcast ({ i32, [4 x i8] }* @g_all_undef to i32*), i64 1) ret i32 %v } + +@g_i8_data = constant [16 x i8] c"\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" + +define i64* @load_ptr_from_i8_data() { +; CHECK-LABEL: @load_ptr_from_i8_data( +; CHECK-NEXT: [[V:%.*]] = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**), align 8 +; CHECK-NEXT: ret i64* [[V]] +; + %v = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**) + ret i64* %v +} From 3da69fb5a26c7bf83320b3b4ad0859be9b47b3fb Mon Sep 17 00:00:00 2001 From: Mubashar Ahmad Date: Thu, 20 Jan 2022 15:20:57 +0000 Subject: [PATCH 014/946] [Clang][AArch64][ARM] Unaligned Access Test Fix Test fixed for the unaligned access warning. --- clang/test/Sema/test-wunaligned-access.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Sema/test-wunaligned-access.c b/clang/test/Sema/test-wunaligned-access.c index 55c2149634d1a..909cda45f489b 100644 --- a/clang/test/Sema/test-wunaligned-access.c +++ b/clang/test/Sema/test-wunaligned-access.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm +// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm -o %t // REQUIRES: arm-registered-target // // This test suite tests the warning triggered by the -Wunaligned-access option. From 805bc24868670309187d49907ce12583902db47d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 20 Jan 2022 16:48:19 +0100 Subject: [PATCH 015/946] [InstSimplify] Add test for load of non-integral pointer (NFC) --- .../test/Transforms/InstSimplify/ConstProp/loads.ll | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll index fb56cb342f4a3..65daa324f7468 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -data-layout="e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,LE -; RUN: opt < %s -data-layout="E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,BE +; RUN: opt < %s -data-layout="e-p:64:64:64-p1:16:16:16-p2:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-ni:2" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,LE +; RUN: opt < %s -data-layout="E-p:64:64:64-p1:16:16:16-p2:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-ni:2" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,BE ; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE} @g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 } @@ -347,3 +347,12 @@ define i64* @load_ptr_from_i8_data() { %v = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**) ret i64* %v } + +define i64 addrspace(2)* @load_non_integral_ptr_from_i8_data() { +; CHECK-LABEL: @load_non_integral_ptr_from_i8_data( +; CHECK-NEXT: [[V:%.*]] = load i64 addrspace(2)*, i64 addrspace(2)** bitcast ([16 x i8]* @g_i8_data to i64 addrspace(2)**), align 8 +; CHECK-NEXT: ret i64 addrspace(2)* [[V]] +; + %v = load i64 addrspace(2)*, i64 addrspace(2)** bitcast ([16 x i8]* @g_i8_data to i64 addrspace(2)**) + ret i64 addrspace(2)* %v +} From 91eca967b9eb115afda52e3a7d158a97a24bfe7f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 20 Jan 2022 11:02:06 -0500 Subject: [PATCH 016/946] [gn build] (manually) port f29256a64a --- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 489d5abec9e27..3efbdde7d85b4 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -335,7 +335,6 @@ write_cmake_config("llvm-config") { "LLVM_ENABLE_NEW_PASS_MANAGER=1", "LLVM_FORCE_ENABLE_STATS=", "LLVM_HAS_ATOMICS=1", - "LLVM_HAVE_TF_AOT=", "LLVM_HAVE_TF_API=", "LLVM_HOST_TRIPLE=$llvm_current_triple", "LLVM_NATIVE_ARCH=$native_target", From 616f77172f0a48ef02c1700882ca0ba2215066d1 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 20 Jan 2022 08:18:08 -0600 Subject: [PATCH 017/946] [OpenMPIRBuilder] Detect and fix ambiguous InsertPoints for createParallel. When a Builder methods accepts multiple InsertPoints, when both point to the same position, inserting instructions at one position will "move" the other after the inserted position since the InsertPoint is pegged to the instruction following the intended InsertPoint. For instance, when creating a parallel region at Loc and passing the same position as AllocaIP, creating instructions at Loc will "move" the AllocIP behind the Loc position. To avoid this ambiguity, add an assertion checking this condition and fix the unittests. In case of AllocaIP, an alternative solution could be to implicitly split BasicBlock at InsertPoint, using the first as AllocaIP, the second for inserting the instructions themselves. However, this solution is specific to AllocaIP since AllocaIP will always have to be first. Hence, this is an argument to generally handling ambiguous InsertPoints as API sage error. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D117226 --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 16 +++++++++++++ .../Frontend/OpenMPIRBuilderTest.cpp | 23 +++++++++++++++++++ .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 13 +++++++++++ 3 files changed, 52 insertions(+) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 62bef5259877a..0bf0c832d5ddf 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -60,6 +60,20 @@ static cl::opt UnrollThresholdFactor( "simplifications still taking place"), cl::init(1.5)); +#ifndef NDEBUG +/// Return whether IP1 and IP2 are ambiguous, i.e. that inserting instructions +/// at position IP1 may change the meaning of IP2 or vice-versa. This is because +/// an InsertPoint stores the instruction before something is inserted. For +/// instance, if both point to the same instruction, two IRBuilders alternating +/// creating instruction will cause the instructions to be interleaved. +static bool isConflictIP(IRBuilder<>::InsertPoint IP1, + IRBuilder<>::InsertPoint IP2) { + if (!IP1.isSet() || !IP2.isSet()) + return false; + return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint(); +} +#endif + void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) { LLVMContext &Ctx = Fn.getContext(); @@ -527,6 +541,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { + assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous"); + if (!updateToLocation(Loc)) return Loc.IP; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index d00f799d30c0e..bc2d3ec8e7abe 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -483,6 +483,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { F->setName("func"); IRBuilder<> Builder(BB); + BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); + Builder.CreateBr(EnterBB); + Builder.SetInsertPoint(EnterBB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); AllocaInst *PrivAI = nullptr; @@ -589,6 +592,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { F->setName("func"); IRBuilder<> Builder(BB); + BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); + Builder.CreateBr(EnterBB); + Builder.SetInsertPoint(EnterBB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); unsigned NumInnerBodiesGenerated = 0; @@ -682,6 +688,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { F->setName("func"); IRBuilder<> Builder(BB); + BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); + Builder.CreateBr(EnterBB); + Builder.SetInsertPoint(EnterBB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); unsigned NumInnerBodiesGenerated = 0; @@ -790,6 +799,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { F->setName("func"); IRBuilder<> Builder(BB); + BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); + Builder.CreateBr(EnterBB); + Builder.SetInsertPoint(EnterBB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); AllocaInst *PrivAI = nullptr; @@ -913,6 +925,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { F->setName("func"); IRBuilder<> Builder(BB); + BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); + Builder.CreateBr(EnterBB); + Builder.SetInsertPoint(EnterBB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); unsigned NumBodiesGenerated = 0; @@ -3108,6 +3123,10 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); + + BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); + Builder.CreateBr(EnterBB); + Builder.SetInsertPoint(EnterBB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); // Create variables to be reduced. @@ -3345,6 +3364,10 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); + + BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F); + Builder.CreateBr(EnterBB); + Builder.SetInsertPoint(EnterBB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); // Create variables to be reduced. diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 14d38d44f6ab8..d1f261e52bf2e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -249,6 +249,19 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // TODO: Is the Parallel construct cancellable? bool isCancellable = false; + // Ensure that the BasicBlock for the the parallel region is sparate from the + // function entry which we may need to insert allocas. + if (builder.GetInsertBlock() == + &builder.GetInsertBlock()->getParent()->getEntryBlock()) { + assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && + "Assuming end of basic block"); + llvm::BasicBlock *entryBB = + llvm::BasicBlock::Create(builder.getContext(), "parallel.entry", + builder.GetInsertBlock()->getParent(), + builder.GetInsertBlock()->getNextNode()); + builder.CreateBr(entryBB); + builder.SetInsertPoint(entryBB); + } llvm::OpenMPIRBuilder::LocationDescription ompLoc( builder.saveIP(), builder.getCurrentDebugLocation()); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel( From 3c422cbe6b7e2ea70fdd97883a9cd4d7d6a2efb2 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 20 Jan 2022 08:23:51 -0800 Subject: [PATCH 018/946] [SLP] Add an asser to make a non-obvious precondition clear [NFC] --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 015973d65f4ff..37e4a4e5732fd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7539,6 +7539,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, ScheduleData *DepDest = BundleMember->NextLoadStore; if (DepDest) { Instruction *SrcInst = BundleMember->Inst; + assert(SrcInst->mayReadOrWriteMemory() && + "NextLoadStore list for non memory effecting bundle?"); MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA); bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory(); unsigned numAliased = 0; From 54ba376d0802e74c84d0bf79c0a919c040884398 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Thu, 20 Jan 2022 08:33:08 -0800 Subject: [PATCH 019/946] Add missing include to fix modular build --- lld/Common/Timer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lld/Common/Timer.cpp b/lld/Common/Timer.cpp index 40fecd8892c18..29838c9720b73 100644 --- a/lld/Common/Timer.cpp +++ b/lld/Common/Timer.cpp @@ -9,6 +9,7 @@ #include "lld/Common/Timer.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Format.h" +#include using namespace lld; using namespace llvm; From c0957bd61794cc37012e7a5f7d89f00fe02ff2ef Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Thu, 20 Jan 2022 08:35:33 -0800 Subject: [PATCH 020/946] Add missing include to fix modular build --- lldb/include/lldb/Target/Statistics.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h index dbf3554986aae..185389b2eeafe 100644 --- a/lldb/include/lldb/Target/Statistics.h +++ b/lldb/include/lldb/Target/Statistics.h @@ -14,6 +14,7 @@ #include "llvm/Support/JSON.h" #include #include +#include #include #include From 990bab89fff75b9afb98762f6e90eb634afc6d42 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Thu, 20 Jan 2022 15:13:32 +0000 Subject: [PATCH 021/946] [ScalableVectors] Warn instead of error for invalid size requests. This was intended to be fixed by D98856, but that only seemed to have the desired behaviour when compiling to assembly using `-S`, not when compiling into an object file or executable. Given that this was not the intention of D98856, this patch fixes the behaviour. --- clang/lib/Driver/ToolChains/Clang.cpp | 3 ++- clang/test/Driver/fsanitize-coverage.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 96d949be17eea..c48c6fd59bec3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5318,7 +5318,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // as errors, but until then, we can live with a warning being emitted by the // compiler. This way, Clang can be used to compile code with scalable vectors // and identify possible issues. - if (isa(JA)) { + if (isa(JA) || isa(JA) || + isa(JA)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-treat-scalable-fixed-error-as-warning"); } diff --git a/clang/test/Driver/fsanitize-coverage.c b/clang/test/Driver/fsanitize-coverage.c index ab8a8871877e6..23953af6e6697 100644 --- a/clang/test/Driver/fsanitize-coverage.c +++ b/clang/test/Driver/fsanitize-coverage.c @@ -106,7 +106,7 @@ // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-8bit-counters %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE8BIT // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=bb,inline-8bit-counters %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE8BIT -// CHECK_INLINE8BIT-NOT: warning +// CHECK_INLINE8BIT-NOT: warning: // CHECK_INLINE8BIT: -fsanitize-coverage-inline-8bit-counters // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-8bit-counters,pc-table %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_PC_TABLE_FOR_INLINE8BIT @@ -115,7 +115,7 @@ // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-bool-flag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE_BOOL_FLAG // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=bb,inline-bool-flag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE_BOOL_FLAG -// CHECK_INLINE_BOOL_FLAG-NOT: warning +// CHECK_INLINE_BOOL_FLAG-NOT: warning: // CHECK_INLINE_BOOL_FLAG: -fsanitize-coverage-inline-bool-flag // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-bool-flag,pc-table %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_PC_TABLE_FOR_INLINEBOOL From c43ebae838de35ddcbccd1af7a94ddadd8dfd7f9 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 20 Jan 2022 08:46:06 -0800 Subject: [PATCH 022/946] [SLP] Reduce nesting depth in calculateDependencies via for loop and early continue [NFC] --- .../Transforms/Vectorize/SLPVectorizer.cpp | 192 +++++++++--------- 1 file changed, 95 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 37e4a4e5732fd..5dbd975f58dac 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7491,113 +7491,111 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, while (!WorkList.empty()) { ScheduleData *SD = WorkList.pop_back_val(); - - ScheduleData *BundleMember = SD; - while (BundleMember) { + for (ScheduleData *BundleMember = SD; BundleMember; + BundleMember = BundleMember->NextInBundle) { assert(isInSchedulingRegion(BundleMember)); - if (!BundleMember->hasValidDependencies()) { - - LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember - << "\n"); - BundleMember->Dependencies = 0; - BundleMember->resetUnscheduledDeps(); + if (BundleMember->hasValidDependencies()) + continue; - // Handle def-use chain dependencies. - if (BundleMember->OpValue != BundleMember->Inst) { - ScheduleData *UseSD = getScheduleData(BundleMember->Inst); - if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { - BundleMember->Dependencies++; - ScheduleData *DestBundle = UseSD->FirstInBundle; - if (!DestBundle->IsScheduled) - BundleMember->incrementUnscheduledDeps(1); - if (!DestBundle->hasValidDependencies()) - WorkList.push_back(DestBundle); - } - } else { - for (User *U : BundleMember->Inst->users()) { - if (isa(U)) { - ScheduleData *UseSD = getScheduleData(U); - if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { - BundleMember->Dependencies++; - ScheduleData *DestBundle = UseSD->FirstInBundle; - if (!DestBundle->IsScheduled) - BundleMember->incrementUnscheduledDeps(1); - if (!DestBundle->hasValidDependencies()) - WorkList.push_back(DestBundle); - } - } else { - // I'm not sure if this can ever happen. But we need to be safe. - // This lets the instruction/bundle never be scheduled and - // eventually disable vectorization. - BundleMember->Dependencies++; - BundleMember->incrementUnscheduledDeps(1); - } - } + LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember + << "\n"); + BundleMember->Dependencies = 0; + BundleMember->resetUnscheduledDeps(); + + // Handle def-use chain dependencies. + if (BundleMember->OpValue != BundleMember->Inst) { + ScheduleData *UseSD = getScheduleData(BundleMember->Inst); + if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { + BundleMember->Dependencies++; + ScheduleData *DestBundle = UseSD->FirstInBundle; + if (!DestBundle->IsScheduled) + BundleMember->incrementUnscheduledDeps(1); + if (!DestBundle->hasValidDependencies()) + WorkList.push_back(DestBundle); } - - // Handle the memory dependencies. - ScheduleData *DepDest = BundleMember->NextLoadStore; - if (DepDest) { - Instruction *SrcInst = BundleMember->Inst; - assert(SrcInst->mayReadOrWriteMemory() && - "NextLoadStore list for non memory effecting bundle?"); - MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA); - bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory(); - unsigned numAliased = 0; - unsigned DistToSrc = 1; - - while (DepDest) { - assert(isInSchedulingRegion(DepDest)); - - // We have two limits to reduce the complexity: - // 1) AliasedCheckLimit: It's a small limit to reduce calls to - // SLP->isAliased (which is the expensive part in this loop). - // 2) MaxMemDepDistance: It's for very large blocks and it aborts - // the whole loop (even if the loop is fast, it's quadratic). - // It's important for the loop break condition (see below) to - // check this limit even between two read-only instructions. - if (DistToSrc >= MaxMemDepDistance || - ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) && - (numAliased >= AliasedCheckLimit || - SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) { - - // We increment the counter only if the locations are aliased - // (instead of counting all alias checks). This gives a better - // balance between reduced runtime and accurate dependencies. - numAliased++; - - DepDest->MemoryDependencies.push_back(BundleMember); + } else { + for (User *U : BundleMember->Inst->users()) { + if (isa(U)) { + ScheduleData *UseSD = getScheduleData(U); + if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { BundleMember->Dependencies++; - ScheduleData *DestBundle = DepDest->FirstInBundle; - if (!DestBundle->IsScheduled) { + ScheduleData *DestBundle = UseSD->FirstInBundle; + if (!DestBundle->IsScheduled) BundleMember->incrementUnscheduledDeps(1); - } - if (!DestBundle->hasValidDependencies()) { + if (!DestBundle->hasValidDependencies()) WorkList.push_back(DestBundle); - } } - DepDest = DepDest->NextLoadStore; - - // Example, explaining the loop break condition: Let's assume our - // starting instruction is i0 and MaxMemDepDistance = 3. - // - // +--------v--v--v - // i0,i1,i2,i3,i4,i5,i6,i7,i8 - // +--------^--^--^ - // - // MaxMemDepDistance let us stop alias-checking at i3 and we add - // dependencies from i0 to i3,i4,.. (even if they are not aliased). - // Previously we already added dependencies from i3 to i6,i7,i8 - // (because of MaxMemDepDistance). As we added a dependency from - // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8 - // and we can abort this loop at i6. - if (DistToSrc >= 2 * MaxMemDepDistance) - break; - DistToSrc++; + } else { + // I'm not sure if this can ever happen. But we need to be safe. + // This lets the instruction/bundle never be scheduled and + // eventually disable vectorization. + BundleMember->Dependencies++; + BundleMember->incrementUnscheduledDeps(1); } } } - BundleMember = BundleMember->NextInBundle; + + // Handle the memory dependencies (if any). + ScheduleData *DepDest = BundleMember->NextLoadStore; + if (!DepDest) + continue; + Instruction *SrcInst = BundleMember->Inst; + assert(SrcInst->mayReadOrWriteMemory() && + "NextLoadStore list for non memory effecting bundle?"); + MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA); + bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory(); + unsigned numAliased = 0; + unsigned DistToSrc = 1; + + while (DepDest) { + assert(isInSchedulingRegion(DepDest)); + + // We have two limits to reduce the complexity: + // 1) AliasedCheckLimit: It's a small limit to reduce calls to + // SLP->isAliased (which is the expensive part in this loop). + // 2) MaxMemDepDistance: It's for very large blocks and it aborts + // the whole loop (even if the loop is fast, it's quadratic). + // It's important for the loop break condition (see below) to + // check this limit even between two read-only instructions. + if (DistToSrc >= MaxMemDepDistance || + ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) && + (numAliased >= AliasedCheckLimit || + SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) { + + // We increment the counter only if the locations are aliased + // (instead of counting all alias checks). This gives a better + // balance between reduced runtime and accurate dependencies. + numAliased++; + + DepDest->MemoryDependencies.push_back(BundleMember); + BundleMember->Dependencies++; + ScheduleData *DestBundle = DepDest->FirstInBundle; + if (!DestBundle->IsScheduled) { + BundleMember->incrementUnscheduledDeps(1); + } + if (!DestBundle->hasValidDependencies()) { + WorkList.push_back(DestBundle); + } + } + DepDest = DepDest->NextLoadStore; + + // Example, explaining the loop break condition: Let's assume our + // starting instruction is i0 and MaxMemDepDistance = 3. + // + // +--------v--v--v + // i0,i1,i2,i3,i4,i5,i6,i7,i8 + // +--------^--^--^ + // + // MaxMemDepDistance let us stop alias-checking at i3 and we add + // dependencies from i0 to i3,i4,.. (even if they are not aliased). + // Previously we already added dependencies from i3 to i6,i7,i8 + // (because of MaxMemDepDistance). As we added a dependency from + // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8 + // and we can abort this loop at i6. + if (DistToSrc >= 2 * MaxMemDepDistance) + break; + DistToSrc++; + } } if (InsertInReadyList && SD->isReady()) { ReadyInsts.push_back(SD); From c104fca36b96203a36ef339e7548f14888013a2e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 20 Jan 2022 08:53:01 -0800 Subject: [PATCH 023/946] {SLP] Delete dead code in favor of proper assert [NFC] --- .../Transforms/Vectorize/SLPVectorizer.cpp | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5dbd975f58dac..43119b9c80cb3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7515,22 +7515,16 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, } } else { for (User *U : BundleMember->Inst->users()) { - if (isa(U)) { - ScheduleData *UseSD = getScheduleData(U); - if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { - BundleMember->Dependencies++; - ScheduleData *DestBundle = UseSD->FirstInBundle; - if (!DestBundle->IsScheduled) - BundleMember->incrementUnscheduledDeps(1); - if (!DestBundle->hasValidDependencies()) - WorkList.push_back(DestBundle); - } - } else { - // I'm not sure if this can ever happen. But we need to be safe. - // This lets the instruction/bundle never be scheduled and - // eventually disable vectorization. + assert(isa(U) && + "user of instruction must be instruction"); + ScheduleData *UseSD = getScheduleData(U); + if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) { BundleMember->Dependencies++; - BundleMember->incrementUnscheduledDeps(1); + ScheduleData *DestBundle = UseSD->FirstInBundle; + if (!DestBundle->IsScheduled) + BundleMember->incrementUnscheduledDeps(1); + if (!DestBundle->hasValidDependencies()) + WorkList.push_back(DestBundle); } } } From fabf1de13202030151899786c21d624b96605ca3 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 20 Jan 2022 16:56:31 +0000 Subject: [PATCH 024/946] [FuncSpec] Add a reference, and some other clarifying comments. NFC. --- .../Transforms/IPO/FunctionSpecialization.cpp | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 2425646455bd9..6c3cc39143372 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -6,15 +6,24 @@ // //===----------------------------------------------------------------------===// // -// This specialises functions with constant parameters (e.g. functions, -// globals). Constant parameters like function pointers and constant globals -// are propagated to the callee by specializing the function. +// This specialises functions with constant parameters. Constant parameters +// like function pointers and constant globals are propagated to the callee by +// specializing the function. The main benefit of this pass at the moment is +// that indirect calls are transformed into direct calls, which provides inline +// opportunities that the inliner would not have been able to achieve. That's +// why function specialisation is run before the inliner in the optimisation +// pipeline; that is by design. Otherwise, we would only benefit from constant +// passing, which is a valid use-case too, but hasn't been explored much in +// terms of performance uplifts, cost-model and compile-time impact. // // Current limitations: -// - It does not yet handle integer ranges. +// - It does not yet handle integer ranges. We do support "literal constants", +// but that's off by default under an option. // - Only 1 argument per function is specialised, -// - The cost-model could be further looked into, -// - We are not yet caching analysis results. +// - The cost-model could be further looked into (it mainly focuses on inlining +// benefits), +// - We are not yet caching analysis results, but profiling and checking where +// extra compile time is spent didn't suggest this to be a problem. // // Ideas: // - With a function specialization attribute for arguments, we could have @@ -30,8 +39,12 @@ // https://reviews.llvm.org/D106426 for details. Perhaps there is a // compile-time friendlier way to control/limit the number of specialisations // for recursive functions. -// - Don't transform the function if there is no function specialization -// happens. +// - Don't transform the function if function specialization does not trigger; +// the SCCPSolver may make IR changes. +// +// References: +// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable +// it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q // //===----------------------------------------------------------------------===// From 283f5a198a0e3c9978ca426e64a3011b566c2581 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Mon, 10 Jan 2022 10:19:27 +0000 Subject: [PATCH 025/946] [GlobalISel] Fix incorrect sign extension when combining G_INTTOPTR and G_PTR_ADD The GlobalISel combiner currently uses sign extension when manipulating the LHS constant when combining a sequence of the following sequence of machine instructions into a single constant: ``` %0:_(s32) = G_CONSTANT i32 %1:_(p0) = G_INTTOPTR %0:_(s32) %2:_(s64) = G_CONSTANT i64 %3:_(p0) = G_PTR_ADD %1:_, %2:_(s64) ``` This causes an issue when the bit width of the first contant and the target pointer size are different, as G_INTTOPTR has no sign extension semantics. This patch fixes this by capture an arbitrary precision in when matching the constant, allowing the matching function to correctly zero extend it. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D116941 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 +-- .../llvm/CodeGen/GlobalISel/MIPatternMatch.h | 31 ++++++++++++++++--- .../include/llvm/Target/GlobalISel/Combine.td | 2 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 13 +++++--- .../GlobalISel/combine-ptradd-int2ptr.mir | 15 +++++++++ .../AArch64/GlobalISel/inttoptr_add.ll | 15 +++++++++ 6 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index f3fa652b01754..1d07d7d6e7aed 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -353,8 +353,8 @@ class CombinerHelper { std::pair &PtrRegAndCommute); // Transform G_PTR_ADD (G_PTRTOINT C1), C2 -> C1 + C2 - bool matchCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst); - void applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst); + bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst); + void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst); /// Transform anyext(trunc(x)) to x. bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 28bb8de117628..daf1ff052983f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -13,6 +13,7 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_MIPATTERNMATCH_H #define LLVM_CODEGEN_GLOBALISEL_MIPATTERNMATCH_H +#include "llvm/ADT/APInt.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/InstrTypes.h" @@ -59,11 +60,26 @@ inline OneNonDBGUse_match m_OneNonDBGUse(const SubPat &SP) { return SP; } -struct ConstantMatch { - int64_t &CR; - ConstantMatch(int64_t &C) : CR(C) {} +template +inline Optional matchConstant(Register, const MachineRegisterInfo &); + +template <> +inline Optional matchConstant(Register Reg, + const MachineRegisterInfo &MRI) { + return getIConstantVRegVal(Reg, MRI); +} + +template <> +inline Optional matchConstant(Register Reg, + const MachineRegisterInfo &MRI) { + return getIConstantVRegSExtVal(Reg, MRI); +} + +template struct ConstantMatch { + ConstT &CR; + ConstantMatch(ConstT &C) : CR(C) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { - if (auto MaybeCst = getIConstantVRegSExtVal(Reg, MRI)) { + if (auto MaybeCst = matchConstant(Reg, MRI)) { CR = *MaybeCst; return true; } @@ -71,7 +87,12 @@ struct ConstantMatch { } }; -inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); } +inline ConstantMatch m_ICst(APInt &Cst) { + return ConstantMatch(Cst); +} +inline ConstantMatch m_ICst(int64_t &Cst) { + return ConstantMatch(Cst); +} struct GCstAndRegMatch { Optional &ValReg; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 1d189c6dea6d8..9736e52a7b5bb 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -391,7 +391,7 @@ def add_p2i_to_ptradd : GICombineRule< >; // Fold (ptr_add (int2ptr C1), C2) -> C1 + C2 -def const_ptradd_to_i2p_matchinfo : GIDefMatchData<"int64_t">; +def const_ptradd_to_i2p_matchinfo : GIDefMatchData<"APInt">; def const_ptradd_to_i2p: GICombineRule< (defs root:$root, const_ptradd_to_i2p_matchinfo:$info), (match (wip_match_opcode G_PTR_ADD):$root, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9ba8cf0cd7c25..ed1aa9d80840c 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2025,16 +2025,19 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd( } bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, - int64_t &NewCst) { + APInt &NewCst) { auto &PtrAdd = cast(MI); Register LHS = PtrAdd.getBaseReg(); Register RHS = PtrAdd.getOffsetReg(); MachineRegisterInfo &MRI = Builder.getMF().getRegInfo(); - if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) { - int64_t Cst; + if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) { + APInt Cst; if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) { - NewCst = Cst + *RHSCst; + auto DstTy = MRI.getType(PtrAdd.getReg(0)); + // G_INTTOPTR uses zero-extension + NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits()); + NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits()); return true; } } @@ -2043,7 +2046,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, } void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, - int64_t &NewCst) { + APInt &NewCst) { auto &PtrAdd = cast(MI); Register Dst = PtrAdd.getReg(0); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir index c6ee994b21b38..40e5e8ebb7731 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir @@ -50,3 +50,18 @@ body: | %4:_(s64) = G_PTRTOINT %3 $x0 = COPY %4(s64) ... +--- +name: test_combine_zero_extend +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test_combine_zero_extend + ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 4291891236 + ; CHECK-NEXT: $x0 = COPY [[C]](p0) + %0:_(s32) = G_CONSTANT i32 -3076096 + %1:_(p0) = G_INTTOPTR %0:_(s32) + %2:_(s64) = G_CONSTANT i64 36 + %3:_(p0) = G_PTR_ADD %1:_, %2:_(s64) + $x0 = COPY %3 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll new file mode 100644 index 0000000000000..b7349b1484248 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +define dso_local void @fn() { +; CHECK-LABEL: fn: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, #4132 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: movk x8, #65489, lsl #16 +; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: ret +entry: + store i32 1, i32* bitcast (i8* getelementptr inbounds (i8, i8* inttoptr (i32 -3076096 to i8*), i64 36) to i32*), align 4 + ret void +} From ee198df2e14c139340421d4f5310dbaf9f725518 Mon Sep 17 00:00:00 2001 From: Random06457 <28494085+Random06457@users.noreply.github.com> Date: Thu, 20 Jan 2022 20:03:49 +0300 Subject: [PATCH 026/946] [mips] Improve vr4300 mulmul bugfix pass When compiling with dwarf info, the mfix4300 flag introduced in https://reviews.llvm.org/D116238 can miss some occurrences of the vr4300 mulmul bug if a debug instruction happens to be between two `muls` instructions. This change skips debug instructions in order to fix the mulmul bug detection. Fixes https://github.com/llvm/llvm-project/issues/53094 Differential Revision: https://reviews.llvm.org/D117615 --- llvm/lib/Target/Mips/MipsMulMulBugPass.cpp | 8 ++++--- llvm/test/CodeGen/Mips/vr4300-mulmul.ll | 25 ++++++++++++++++++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp index cb112ca1dfffe..daaf1135c2b13 100644 --- a/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp +++ b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp @@ -110,17 +110,19 @@ bool MipsMulMulBugFix::fixMulMulBB(MachineBasicBlock &MBB, const MipsInstrInfo &MipsII) { bool Modified = false; + MachineBasicBlock::instr_iterator NextMII; + // Iterate through the instructions in the basic block for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); - MII != E; ++MII) { + MII != E; MII = NextMII) { - MachineBasicBlock::instr_iterator NextMII = std::next(MII); + NextMII = next_nodbg(MII, E); // Trigger when the current instruction is a mul and the next instruction // is either a mul or a branch in case the branch target start with a mul if (NextMII != E && isFirstMul(*MII) && isSecondMulOrBranch(*NextMII)) { - LLVM_DEBUG(dbgs() << "Found mulmul!"); + LLVM_DEBUG(dbgs() << "Found mulmul!\n"); const MCInstrDesc &NewMCID = MipsII.get(Mips::NOP); BuildMI(MBB, NextMII, DebugLoc(), NewMCID); diff --git a/llvm/test/CodeGen/Mips/vr4300-mulmul.ll b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll index f20cc169825ee..33d909c7b4369 100644 --- a/llvm/test/CodeGen/Mips/vr4300-mulmul.ll +++ b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll @@ -1,13 +1,14 @@ ; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn -define dso_local float @fun_s(float %x) local_unnamed_addr #0 { +define dso_local float @fun_s(float %x) local_unnamed_addr !dbg !7 { entry: ; CHECK-LABEL: fun_s ; CHECK: mul.s +; CHECK-NEXT: #DEBUG_VALUE: i <- 1 ; CHECK-NEXT: nop -; CHECK: mul.s %mul = fmul float %x, %x + call void @llvm.dbg.value(metadata i32 1, metadata !13, metadata !DIExpression()), !dbg !17 %mul1 = fmul float %mul, %x ret float %mul1 } @@ -22,3 +23,23 @@ entry: %mul1 = fmul double %mul, %x ret double %mul1 } + + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "vr4300-mulmul.ll", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!7 = distinct !DISubprogram(name: "fun_s", linkageName: "fun_s", scope: !1, file: !1, line: 1, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !10} +!10 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!13 = !DILocalVariable(name: "i", scope: !14, file: !1, line: 3, type: !15) +!14 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 5) +!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!17 = !DILocation(line: 0, scope: !14) From 89c447e4e6b564636bbf32b15d67e40cf6c60387 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 15 Jan 2022 19:16:03 -0500 Subject: [PATCH 027/946] AMDGPU: Stop reserving 36-bytes before kernel arguments for amdpal This was inheriting the mesa behavior, and as far as I know nobody is using opencl kernels with amdpal. The isMesaKernel check was irrelevant because this property needs to be held for all functions. --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 14 +- .../AMDGPU/GlobalISel/irtranslator-call.ll | 2 +- .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll | 272 ++++---- .../AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll | 162 ++--- .../AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll | 184 +++--- .../AMDGPU/GlobalISel/shl-ext-reduce.ll | 16 +- .../AMDGPU/GlobalISel/store-local.128.ll | 72 +-- .../AMDGPU/GlobalISel/store-local.96.ll | 72 +-- llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll | 12 +- llvm/test/CodeGen/AMDGPU/ds_read2.ll | 170 ++--- llvm/test/CodeGen/AMDGPU/ds_write2.ll | 96 +-- llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 20 +- .../AMDGPU/memory-legalizer-flat-agent.ll | 352 +++++----- .../memory-legalizer-flat-nontemporal.ll | 8 +- .../memory-legalizer-flat-singlethread.ll | 352 +++++----- .../AMDGPU/memory-legalizer-flat-system.ll | 352 +++++----- .../AMDGPU/memory-legalizer-flat-volatile.ll | 14 +- .../AMDGPU/memory-legalizer-flat-wavefront.ll | 348 +++++----- .../AMDGPU/memory-legalizer-flat-workgroup.ll | 336 +++++----- .../AMDGPU/memory-legalizer-global-agent.ll | 348 +++++----- .../memory-legalizer-global-nontemporal.ll | 8 +- .../memory-legalizer-global-singlethread.ll | 352 +++++----- .../AMDGPU/memory-legalizer-global-system.ll | 332 +++++----- .../memory-legalizer-global-volatile.ll | 14 +- .../memory-legalizer-global-wavefront.ll | 352 +++++----- .../memory-legalizer-global-workgroup.ll | 352 +++++----- .../AMDGPU/memory-legalizer-local-agent.ll | 604 ++++++++---------- .../memory-legalizer-local-nontemporal.ll | 16 +- .../memory-legalizer-local-singlethread.ll | 604 ++++++++---------- .../AMDGPU/memory-legalizer-local-system.ll | 604 ++++++++---------- .../AMDGPU/memory-legalizer-local-volatile.ll | 20 +- .../memory-legalizer-local-wavefront.ll | 604 ++++++++---------- .../memory-legalizer-local-workgroup.ll | 604 ++++++++---------- .../memory-legalizer-private-nontemporal.ll | 16 +- .../memory-legalizer-private-volatile.ll | 16 +- llvm/test/CodeGen/AMDGPU/store-local.128.ll | 96 +-- llvm/test/CodeGen/AMDGPU/store-local.96.ll | 96 +-- 37 files changed, 3802 insertions(+), 4090 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 88ed4b2b7a24e..7f1b94be4ffea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -212,7 +212,19 @@ class AMDGPUSubtarget { /// Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const Function &F) const { - return isAmdHsaOrMesa(F) ? 0 : 36; + switch (TargetTriple.getOS()) { + case Triple::AMDHSA: + case Triple::AMDPAL: + case Triple::Mesa3D: + return 0; + case Triple::UnknownOS: + default: + // For legacy reasons unknown/other is treated as a different version of + // mesa. + return 36; + } + + llvm_unreachable("invalid triple OS"); } /// \returns Maximum number of work groups per compute unit supported by the diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 452616e9ca3cf..baf0b4d29d819 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -5183,7 +5183,7 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY [[DEF]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C1]], [[C2]](s64) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 01d5307693ad2..3adb7550d233f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -211,11 +211,11 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 -; GFX7-NEXT: s_load_dword s5, s[0:1], 0x2e -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c +; GFX7-NEXT: s_load_dword s5, s[0:1], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -231,11 +231,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; ; GFX8-LABEL: test_div_fmas_f32: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0xb8 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70 +; GFX8-NEXT: s_load_dword s5, s[0:1], 0x94 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -252,11 +252,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; GFX10_W32-LABEL: test_div_fmas_f32: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x4 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s7, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s7, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 @@ -270,11 +270,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; GFX10_W64-LABEL: test_div_fmas_f32: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x4 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s7, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s7, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 @@ -292,10 +292,10 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x25 -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x2e -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -310,10 +310,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x94 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -329,10 +329,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 @@ -345,10 +345,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 @@ -365,10 +365,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0xb -; GFX7-NEXT: s_load_dword s3, s[0:1], 0xd -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x16 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0xd +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -383,10 +383,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x34 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x58 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x8 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x10 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -402,10 +402,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x58 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x34 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x2c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x10 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 @@ -418,10 +418,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x58 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x34 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x2c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x10 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 @@ -438,10 +438,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x2e -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 ; GFX7-NEXT: v_mov_b32_e32 v1, s3 @@ -456,10 +456,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o ; ; GFX8-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -475,10 +475,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s4 @@ -491,10 +491,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0xb8 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s4 @@ -511,8 +511,8 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) { ; GFX7-LABEL: test_div_fmas_f64: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x11 +; GFX7-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x8 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s6 ; GFX7-NEXT: v_mov_b32_e32 v2, s8 @@ -531,8 +531,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % ; ; GFX8-LABEL: test_div_fmas_f64: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 -; GFX8-NEXT: s_load_dword s0, s[0:1], 0x44 +; GFX8-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 +; GFX8-NEXT: s_load_dword s0, s[0:1], 0x20 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s6 ; GFX8-NEXT: v_mov_b32_e32 v2, s8 @@ -552,8 +552,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % ; GFX10_W32-LABEL: test_div_fmas_f64: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dword s2, s[0:1], 0x44 -; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s2, s[0:1], 0x20 +; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_and_b32 s0, 1, s2 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s8 @@ -569,8 +569,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % ; GFX10_W64-LABEL: test_div_fmas_f64: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dword s2, s[0:1], 0x44 -; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s2, s[0:1], 0x20 +; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_and_b32 s0, 1, s2 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s8 @@ -590,8 +590,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double % define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) { ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_cmp_eq_u32 s7, 0 @@ -609,8 +609,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou ; ; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_cmp_eq_u32 s7, 0 ; GFX8-NEXT: s_cselect_b32 s2, 1, 0 @@ -629,8 +629,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou ; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x1 -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_cmp_eq_u32 s7, 0 ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 @@ -646,8 +646,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou ; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x1 -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_cmp_eq_u32 s7, 0 ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 @@ -668,10 +668,10 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 @@ -685,11 +685,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa ; ; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, 0 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -703,10 +703,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa ; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 @@ -719,10 +719,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa ; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 @@ -739,10 +739,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s2, s[0:1], 0x13 -; GFX7-NEXT: s_load_dword s3, s[0:1], 0x1c -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x25 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0xa +; GFX7-NEXT: s_load_dword s3, s[0:1], 0x13 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x1c +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b64 vcc, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s2 @@ -756,11 +756,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; ; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x4c -; GFX8-NEXT: s_load_dword s3, s[0:1], 0x70 -; GFX8-NEXT: s_load_dword s4, s[0:1], 0x94 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x28 +; GFX8-NEXT: s_load_dword s3, s[0:1], 0x4c +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x70 ; GFX8-NEXT: s_mov_b64 vcc, -1 -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 @@ -774,10 +774,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W32: ; %bb.0: ; GFX10_W32-NEXT: s_clause 0x3 -; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W32-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 @@ -790,10 +790,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: ; GFX10_W64: ; %bb.0: ; GFX10_W64-NEXT: s_clause 0x3 -; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x70 -; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x94 -; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x4c -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dword s4, s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dword s5, s[0:1], 0x70 +; GFX10_W64-NEXT: s_load_dword s6, s[0:1], 0x28 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_mov_b64 vcc, -1 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 @@ -810,8 +810,8 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, [8 x i32], i32 %d) { ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX7-NEXT: s_load_dword s8, s[0:1], 0x15 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s8, s[0:1], 0xc ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 @@ -838,8 +838,8 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; ; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX8-NEXT: s_load_dword s2, s[0:1], 0x54 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX8-NEXT: s_load_dword s2, s[0:1], 0x30 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v1, s6 @@ -873,9 +873,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; ; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W32: ; %bb.0: -; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x54 +; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x30 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: global_load_dword v2, v1, s[6:7] glc dlc @@ -896,9 +896,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; ; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: ; GFX10_W64: ; %bb.0: -; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x54 +; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x30 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: global_load_dword v2, v1, s[6:7] glc dlc @@ -938,20 +938,20 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, [8 x i32], float addrspace(1)* %in, [8 x i32], i32 addrspace(1)* %dummy) { ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x13 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xa ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64 -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX7-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX7-NEXT: s_cbranch_execz .LBB13_2 ; GFX7-NEXT: ; %bb.1: ; %bb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x1d +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x14 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -971,7 +971,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; ; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: s_mov_b32 s6, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -980,12 +980,12 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc ; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2] -; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX8-NEXT: s_cbranch_execz .LBB13_2 ; GFX8-NEXT: ; %bb.1: ; %bb -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x50 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1008,18 +1008,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W32: ; %bb.0: ; %entry -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28 ; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10_W32-NEXT: s_mov_b32 s5, 0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3] ; GFX10_W32-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W32-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W32-NEXT: s_and_saveexec_b32 s4, vcc_lo ; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W32-NEXT: ; %bb.1: ; %bb -; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74 +; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x50 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W32-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) @@ -1038,18 +1038,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out ; ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: ; GFX10_W64: ; %bb.0: ; %entry -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x28 ; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX10_W64-NEXT: s_mov_b32 s6, 0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[2:3] ; GFX10_W64-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX10_W64-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX10_W64-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2 ; GFX10_W64-NEXT: ; %bb.1: ; %bb -; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x74 +; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x50 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll index 2ece383a03247..fa0d86214e65e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll @@ -44,9 +44,9 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3 define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_arg_imm: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -64,9 +64,9 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { ; GFX6-LABEL: bfe_i32_arg_imm_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 @@ -84,9 +84,9 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_i32_imm_arg_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -105,10 +105,10 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1 define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { ; GFX6-LABEL: v_bfe_print_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80002 @@ -125,9 +125,9 @@ define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -144,8 +144,8 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 8 @@ -161,10 +161,10 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -183,10 +183,10 @@ define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -205,10 +205,10 @@ define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -227,10 +227,10 @@ define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1001f @@ -247,10 +247,10 @@ define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x1f0001 @@ -267,10 +267,10 @@ define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180008 @@ -287,10 +287,10 @@ define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80018 @@ -307,10 +307,10 @@ define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_ashr_i32 s3, s3, 31 @@ -328,10 +328,10 @@ define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_i32_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshr_b32 s3, s3, 31 @@ -349,7 +349,7 @@ define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -365,7 +365,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 0x302e, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -381,7 +381,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -397,7 +397,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, 1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -413,7 +413,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -429,7 +429,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_5: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x10007 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -446,7 +446,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_i32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -463,7 +463,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -480,7 +480,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80006 ; GFX6-NEXT: s_bfe_i32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -497,7 +497,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80010 ; GFX6-NEXT: s_bfe_i32 s2, 0x10000, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -514,7 +514,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_i32 s2, 0xffff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -531,7 +531,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x40004 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -548,7 +548,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -565,7 +565,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_i32 s2, 0x1fffe, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -582,7 +582,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -599,7 +599,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_15: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 ; GFX6-NEXT: s_bfe_i32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -616,7 +616,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_i32 s2, -1, 0x70001 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -632,7 +632,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_17: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -649,7 +649,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_i32_constant_fold_test_18: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_i32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -666,10 +666,10 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_sext_in_reg_i24: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x180000 @@ -690,8 +690,8 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3 ; GFX6-LABEL: simplify_demanded_bfe_sdiv: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, 2.0 -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 @@ -731,10 +731,10 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3 define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_0_width: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 8 @@ -751,11 +751,11 @@ define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_8_bfe_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s4, 0x80000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, s4 @@ -774,10 +774,10 @@ define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_8_bfe_16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x80000 @@ -797,10 +797,10 @@ define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1) define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: bfe_16_bfe_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x100000 @@ -820,9 +820,9 @@ define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1) define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, s4 @@ -843,9 +843,9 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, s4 @@ -866,12 +866,12 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: sextload_i8_to_i32_bfe: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 @@ -890,12 +890,12 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { ; GFX6-LABEL: sextload_i8_to_i32_bfe_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 @@ -914,10 +914,10 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000 @@ -937,10 +937,10 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000 @@ -960,10 +960,10 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll index 821d0fdd435c3..782d0b970a4ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -44,9 +44,9 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3 define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_arg_arg_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s2, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb +; GFX6-NEXT: s_load_dword s2, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -65,9 +65,9 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_arg_imm: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -85,9 +85,9 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_arg_imm_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 @@ -105,9 +105,9 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0 define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { ; GFX6-LABEL: bfe_u32_imm_arg_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -126,9 +126,9 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xc -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x3 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s3, s3, 63 @@ -145,8 +145,8 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { ; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 8 @@ -162,12 +162,12 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zextload_i8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 8 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -184,10 +184,10 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -208,10 +208,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -232,10 +232,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -256,10 +256,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -280,10 +280,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -304,10 +304,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s3, 1 @@ -328,10 +328,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %o define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 @@ -348,10 +348,10 @@ define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -370,10 +370,10 @@ define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -392,10 +392,10 @@ define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x10000 @@ -415,10 +415,10 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_5: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s3, s3, 0x10000 @@ -438,10 +438,10 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -460,10 +460,10 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -482,10 +482,10 @@ define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s3, s3, 31 @@ -504,10 +504,10 @@ define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1001f @@ -524,10 +524,10 @@ define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace( define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x1f0001 @@ -544,10 +544,10 @@ define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x180008 @@ -564,10 +564,10 @@ define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x80018 @@ -585,10 +585,10 @@ define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_ashr_i32 s3, s3, 31 @@ -606,10 +606,10 @@ define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { ; GFX6-LABEL: bfe_u32_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s3, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshr_b32 s3, s3, 31 @@ -627,7 +627,7 @@ define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_0: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 0, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -643,7 +643,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 0x302e, 0 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -659,7 +659,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -675,7 +675,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_3: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, 1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -691,7 +691,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, -1, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -707,7 +707,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_5: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x10007 ; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -724,7 +724,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_6: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_u32 s2, 0x80, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -741,7 +741,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_7: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80000 ; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -758,7 +758,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80006 ; GFX6-NEXT: s_bfe_u32 s2, 0x7f, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -775,7 +775,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_9: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x80010 ; GFX6-NEXT: s_bfe_u32 s2, 0x10000, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -792,7 +792,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_10: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_u32 s2, 0xffff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -809,7 +809,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_11: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x40004 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -826,7 +826,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_12: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -843,7 +843,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_13: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x100010 ; GFX6-NEXT: s_bfe_u32 s2, 0x1fffe, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -860,7 +860,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_14: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1e0002 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -877,7 +877,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_15: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1c0004 ; GFX6-NEXT: s_bfe_u32 s2, 0xa0, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -894,7 +894,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_bfe_u32 s2, -1, 0x70001 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -910,7 +910,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_17: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1f0001 ; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -927,7 +927,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { ; GFX6-LABEL: bfe_u32_constant_fold_test_18: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, 0x1001f ; GFX6-NEXT: s_bfe_u32 s2, 0xff, s2 ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -948,13 +948,13 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s8, s[2:3], 0x0 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; GFX6-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_and_b32 s8, s8, 63 @@ -977,8 +977,8 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: lshr_and: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 @@ -995,9 +995,9 @@ define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { ; GFX6-LABEL: v_lshr_and: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dword s4, s[0:1], 0xc -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x3 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshr_b32 s3, s3, s4 @@ -1015,8 +1015,8 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: and_lshr: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 @@ -1033,8 +1033,8 @@ define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: and_lshr2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x30006 @@ -1051,8 +1051,8 @@ define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; GFX6-LABEL: shl_lshr: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s3, s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX6-NEXT: s_load_dword s3, s[0:1], 0x2 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x150002 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll index 34db791ac9b5c..7e5ecaac8d9c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -213,7 +213,7 @@ define i64 @v_shl_i64_sext_i32_overflow(i32 %x) { define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; GFX7-LABEL: mulu24_shl64: ; GFX7: ; %bb.0: ; %bb -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX7-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX7-NEXT: v_mul_u32_u24_e32 v0, 7, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, 0 @@ -226,7 +226,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; ; GFX8-LABEL: mulu24_shl64: ; GFX8: ; %bb.0: ; %bb -; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX8-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 7, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 @@ -241,7 +241,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; ; GFX9-LABEL: mulu24_shl64: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 7, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -256,7 +256,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) { ; ; GFX10-LABEL: mulu24_shl64: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-NEXT: v_and_b32_e32 v0, 6, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mul_u32_u24_e32 v0, 7, v0 @@ -281,7 +281,7 @@ bb: define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) { ; GFX7-LABEL: muli24_shl64: ; GFX7: ; %bb.0: ; %bb -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s6, 0 @@ -303,7 +303,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad ; ; GFX8-LABEL: muli24_shl64: ; GFX8: ; %bb.0: ; %bb -; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v5, 3, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -326,7 +326,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad ; ; GFX9-LABEL: muli24_shl64: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -341,7 +341,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad ; ; GFX10-LABEL: muli24_shl64: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll index 244325b25302a..b662b219abdb4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll @@ -9,8 +9,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -22,8 +22,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; ; GFX7-LABEL: store_lds_v4i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -37,8 +37,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; GFX10-LABEL: store_lds_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 @@ -54,8 +54,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_bfe_u32 s3, s4, 0x100000 @@ -111,8 +111,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s1, 0x80008 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -166,8 +166,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_bfe_u32 s3, s4, 0x100000 @@ -227,8 +227,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshr_b32 s0, s4, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -255,8 +255,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_lshr_b32 s1, s4, 16 @@ -285,8 +285,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -317,8 +317,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 @@ -331,8 +331,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -347,8 +347,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -365,8 +365,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -378,8 +378,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -393,8 +393,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 @@ -410,8 +410,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -423,8 +423,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -438,8 +438,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll index 7246858843ac7..d19684bcbff9f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll @@ -9,8 +9,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-NEXT: v_mov_b32_e32 v1, s13 @@ -21,8 +21,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; ; GFX7-LABEL: store_lds_v3i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s12 @@ -35,8 +35,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; GFX10-LABEL: store_lds_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s12 ; GFX10-NEXT: v_mov_b32_e32 v1, s13 @@ -51,8 +51,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_bfe_u32 s3, s4, 0x100000 @@ -96,8 +96,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s1, 0x80008 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -140,8 +140,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_bfe_u32 s0, 8, 0x100000 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_lshr_b32 s1, s4, 16 @@ -189,8 +189,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshr_b32 s0, s4, 16 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -212,8 +212,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_lshr_b32 s1, s4, 16 @@ -237,8 +237,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -264,8 +264,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 @@ -277,8 +277,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -292,8 +292,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -309,8 +309,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 @@ -322,8 +322,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -337,8 +337,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 @@ -354,8 +354,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-NEXT: v_mov_b32_e32 v1, s13 @@ -366,8 +366,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s12 @@ -380,8 +380,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s12 ; GFX10-NEXT: v_mov_b32_e32 v1, s13 diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll index bd173aaed0a07..41d33f96fc218 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -44,7 +44,7 @@ entry: define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 { ; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; CI: ; %bb.0: ; %entry -; CI-NEXT: s_load_dword s0, s[0:1], 0x9 +; CI-NEXT: s_load_dword s0, s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 ; CI-NEXT: s_mov_b64 vcc, 0 @@ -64,7 +64,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; ; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v3, 0, v0 @@ -81,7 +81,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v ; ; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b @@ -291,7 +291,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 { ; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s0, s[0:1], 0x9 +; CI-NEXT: s_load_dword s0, s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: v_sub_i32_e32 v0, vcc, 0x3fb, v0 ; CI-NEXT: s_mov_b64 vcc, 0 @@ -312,7 +312,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_ ; ; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-NEXT: s_mov_b64 vcc, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v3, 0x3fb, v0 @@ -330,7 +330,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_ ; ; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7b diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 260c6385c4781..c44c597775966 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -15,7 +15,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -28,7 +28,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -51,7 +51,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:255 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -64,7 +64,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:255 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -88,7 +88,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b32 v1, v0 ; CI-NEXT: ds_read_b32 v2, v0 offset:1028 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -102,7 +102,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ds_read_b32 v1, v0 ; GFX9-NEXT: ds_read_b32 v2, v0 offset:1028 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] @@ -126,7 +126,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 ; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -142,7 +142,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset1:8 ; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 @@ -184,7 +184,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_barrier ; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: v_add_f32_e32 v1, v1, v2 ; CI-NEXT: s_mov_b32 s2, 0 @@ -202,7 +202,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_barrier ; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 @@ -245,7 +245,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset0:2 offset1:8 ; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:11 offset1:27 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -261,7 +261,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v4 offset0:2 offset1:8 ; GFX9-NEXT: ds_read2_b32 v[2:3], v4 offset0:11 offset1:27 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_add_f32_e32 v1, v2, v3 @@ -301,9 +301,9 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { ; CI-LABEL: read2_ptr_is_subreg_arg_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 @@ -320,9 +320,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, ; ; GFX9-LABEL: read2_ptr_is_subreg_arg_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: v_mov_b32_e32 v2, s3 @@ -354,9 +354,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { ; CI-LABEL: read2_ptr_is_subreg_arg_offset_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s2 @@ -373,9 +373,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1) ; ; GFX9-LABEL: read2_ptr_is_subreg_arg_offset_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s2 ; GFX9-NEXT: v_mov_b32_e32 v2, s3 @@ -410,7 +410,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[1:2], v0 offset1:8 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -423,7 +423,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:8 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -453,7 +453,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b32 v1, v0 ; CI-NEXT: ds_read_b32 v2, v0 offset:32 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -467,7 +467,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ds_read_b32 v1, v0 ; GFX9-NEXT: ds_read_b32 v2, v0 offset:32 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] @@ -491,7 +491,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b32 v1, v0 ; CI-NEXT: ds_read_b32 v2, v0 offset:32 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -505,7 +505,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ds_read_b32 v1, v0 ; GFX9-NEXT: ds_read_b32 v2, v0 offset:32 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] @@ -526,10 +526,10 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { ; CI-LABEL: unaligned_read2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v1, vcc, s2, v0 @@ -565,8 +565,8 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a ; ; GFX9-ALIGNED-LABEL: unaligned_read2_f32: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_add_u32_e32 v1, s4, v0 @@ -594,9 +594,9 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a ; ; GFX9-UNALIGNED-LABEL: unaligned_read2_f32: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_add_u32_e32 v0, s2, v2 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:8 @@ -619,10 +619,10 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { ; CI-LABEL: unaligned_offset_read2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v1, vcc, s2, v0 @@ -658,8 +658,8 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, ; ; GFX9-ALIGNED-LABEL: unaligned_offset_read2_f32: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_add_u32_e32 v1, s4, v0 @@ -687,9 +687,9 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, ; ; GFX9-UNALIGNED-LABEL: unaligned_offset_read2_f32: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_add3_u32 v0, s2, v2, 5 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 @@ -715,10 +715,10 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { ; CI-LABEL: misaligned_2_simple_read2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v1, vcc, s2, v0 @@ -741,9 +741,9 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %ou ; ; GFX9-ALIGNED-LABEL: misaligned_2_simple_read2_f32: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-ALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_add_u32_e32 v1, s2, v0 ; GFX9-ALIGNED-NEXT: ds_read_u16 v2, v1 @@ -760,9 +760,9 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %ou ; ; GFX9-UNALIGNED-LABEL: misaligned_2_simple_read2_f32: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-UNALIGNED-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_add_u32_e32 v0, s2, v2 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:8 @@ -788,7 +788,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { ; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:8 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_mov_b32_e32 v5, 0 @@ -801,7 +801,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:8 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] @@ -824,7 +824,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out ; CI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b64 v[0:3], v4 offset1:255 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_mov_b32_e32 v5, 0 @@ -837,7 +837,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: ds_read2_b64 v[0:3], v4 offset1:255 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] @@ -861,7 +861,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b64 v[1:2], v0 ; CI-NEXT: ds_read_b64 v[3:4], v0 offset:2056 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -875,7 +875,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: ds_read_b64 v[0:1], v4 ; GFX9-NEXT: ds_read_b64 v[2:3], v4 offset:2056 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] ; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] @@ -896,10 +896,10 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { ; CI-LABEL: misaligned_read2_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v3, vcc, s2, v0 @@ -914,9 +914,9 @@ define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, doubl ; ; GFX9-LABEL: misaligned_read2_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v2, s2, v4 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:1 @@ -945,7 +945,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b64 v[0:1], v0 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -957,7 +957,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: ds_read_b64 v[0:1], v2 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -975,7 +975,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read2_b32 v[0:1], v0 offset1:2 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -987,7 +987,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:2 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v2, v0, s[0:1] @@ -1007,7 +1007,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b128 v[0:3], v0 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1020,7 +1020,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: ds_read_b128 v[0:3], v4 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -1042,7 +1042,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: ds_read_b64 v[0:1], v2 offset:16384 ; CI-NEXT: ds_read_b64 v[2:3], v2 offset:32760 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1056,7 +1056,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: ds_read_b64 v[0:1], v4 offset:16384 ; GFX9-NEXT: ds_read_b64 v[2:3], v4 offset:32760 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc @@ -1075,7 +1075,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 { ; CI-LABEL: sgemm_inner_loop_read2_sequence: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_lshl_b32 s0, s2, 2 ; CI-NEXT: s_add_i32 s1, s0, 0xc20 ; CI-NEXT: s_addk_i32 s0, 0xc60 @@ -1123,7 +1123,7 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* % ; GFX9-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX9-NEXT: s_waitcnt lgkmcnt(2) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v4 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_f32_e32 v0, v0, v6 @@ -1179,8 +1179,8 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* % define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 { ; CI-LABEL: misaligned_read2_v2i32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1193,8 +1193,8 @@ define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, ; ; GFX9-LABEL: misaligned_read2_v2i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -1210,8 +1210,8 @@ define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 { ; CI-LABEL: misaligned_read2_i64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dword s2, s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dword s2, s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -1224,8 +1224,8 @@ define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addr ; ; GFX9-LABEL: misaligned_read2_i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -1241,8 +1241,8 @@ define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addr define amdgpu_kernel void @ds_read_diff_base_interleaving( ; CI-LABEL: ds_read_diff_base_interleaving: ; CI: ; %bb.0: ; %bb -; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; CI-NEXT: v_lshlrev_b32_e32 v4, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 @@ -1272,10 +1272,10 @@ define amdgpu_kernel void @ds_read_diff_base_interleaving( ; ; GFX9-LABEL: ds_read_diff_base_interleaving: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v2, s4, v1 @@ -1348,10 +1348,10 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_add_u32 s40, s40, s11 ; CI-NEXT: s_mov_b64 s[10:11], s[6:7] -; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; CI-NEXT: s_load_dword s6, s[4:5], 0xb +; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 +; CI-NEXT: s_load_dword s6, s[4:5], 0x2 ; CI-NEXT: s_addc_u32 s41, s41, 0 -; CI-NEXT: s_add_u32 s8, s4, 48 +; CI-NEXT: s_add_u32 s8, s4, 12 ; CI-NEXT: s_addc_u32 s9, s5, 0 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 @@ -1393,9 +1393,9 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa ; GFX9-NEXT: s_add_u32 s36, s36, s11 ; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX9-NEXT: s_load_dword s6, s[4:5], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 -; GFX9-NEXT: s_add_u32 s8, s4, 48 +; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 +; GFX9-NEXT: s_add_u32 s8, s4, 12 ; GFX9-NEXT: s_addc_u32 s9, s5, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4 @@ -1487,7 +1487,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)* ; CI-NEXT: ds_read_u8 v6, v0 offset:66 ; CI-NEXT: ds_read_u8 v0, v0 offset:65 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: v_or_b32_e32 v1, v2, v1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v6 @@ -1514,7 +1514,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)* ; GFX9-ALIGNED-NEXT: ds_read_u8 v8, v2 offset:71 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(7) ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 8, v0 -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v7 @@ -1531,7 +1531,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)* ; GFX9-UNALIGNED-LABEL: read2_v2i32_align1_odd_offset: ; GFX9-UNALIGNED: ; %bb.0: ; %entry ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x41 -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll index 501282d7f5573..268e8bbeb8a12 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -9,7 +9,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_one_val_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -23,7 +23,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo ; ; GFX9-LABEL: simple_write2_one_val_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -44,7 +44,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -60,7 +60,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo ; ; GFX9-LABEL: simple_write2_two_val_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -85,7 +85,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_volatile_0: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -105,7 +105,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace( ; ; GFX9-LABEL: simple_write2_two_val_f32_volatile_0: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -131,7 +131,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace( define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_volatile_1: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -151,7 +151,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace( ; ; GFX9-LABEL: simple_write2_two_val_f32_volatile_1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -182,7 +182,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace( define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_subreg2_mixed_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 3, v0 @@ -199,7 +199,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa ; ; GFX9-LABEL: simple_write2_two_val_subreg2_mixed_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: ; kill: killed $vgpr4 @@ -229,7 +229,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_subreg2_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 3, v0 @@ -244,7 +244,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* ; ; GFX9-LABEL: simple_write2_two_val_subreg2_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -268,7 +268,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_subreg4_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v1, 4, v0 @@ -283,7 +283,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* ; ; GFX9-LABEL: simple_write2_two_val_subreg4_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -307,7 +307,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_max_offset_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -323,7 +323,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace( ; ; GFX9-LABEL: simple_write2_two_val_max_offset_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] glc @@ -348,7 +348,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace( define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_too_far_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -368,7 +368,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* ; ; GFX9-LABEL: simple_write2_two_val_too_far_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -394,7 +394,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_x2: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, ; ; GFX9-LABEL: simple_write2_two_val_f32_x2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -450,7 +450,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { ; CI-LABEL: simple_write2_two_val_f32_x2_nonzero_base: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb +; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -469,7 +469,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs ; ; GFX9-LABEL: simple_write2_two_val_f32_x2_nonzero_base: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[0:1] @@ -506,8 +506,8 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 { ; CI-LABEL: write2_ptr_subreg_arg_two_val_f32: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb -; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xf +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2 +; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x6 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -529,8 +529,8 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* ; ; GFX9-LABEL: write2_ptr_subreg_arg_two_val_f32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x3c +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x18 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v1, v0, s[4:5] @@ -566,7 +566,7 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_one_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -580,7 +580,7 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do ; ; GFX9-LABEL: simple_write2_one_val_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] @@ -601,8 +601,8 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { ; CI-LABEL: misaligned_simple_write2_one_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; CI-NEXT: s_load_dword s0, s[0:1], 0xd +; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; CI-NEXT: s_load_dword s0, s[0:1], 0x4 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -618,8 +618,8 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace ; ; GFX9-LABEL: misaligned_simple_write2_one_val_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x10 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] @@ -642,8 +642,8 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { ; CI-LABEL: unaligned_offset_simple_write2_one_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; CI-NEXT: s_load_dword s0, s[0:1], 0xd +; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; CI-NEXT: s_load_dword s0, s[0:1], 0x4 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -675,8 +675,8 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add ; ; GFX9-ALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x10 ; GFX9-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] @@ -702,8 +702,8 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add ; ; GFX9-UNALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x34 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x10 ; GFX9-UNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] @@ -731,7 +731,7 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_two_val_f64: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 @@ -747,7 +747,7 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do ; ; GFX9-LABEL: simple_write2_two_val_f64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] glc @@ -873,7 +873,7 @@ define amdgpu_kernel void @store_misaligned64_constant_large_offsets() { define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 { ; CI-LABEL: write2_sgemm_sequence: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x4 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_load_dword s0, s[0:1], 0x0 @@ -895,7 +895,7 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld ; ; GFX9-LABEL: write2_sgemm_sequence: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x10 ; GFX9-NEXT: s_lshl_b32 s2, s2, 2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 @@ -950,8 +950,8 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 { ; CI-LABEL: simple_write2_v4f32_superreg_align4: ; CI: ; %bb.0: -; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; CI-NEXT: s_load_dword s4, s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; CI-NEXT: s_load_dword s4, s[0:1], 0x0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -968,8 +968,8 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrs ; ; GFX9-ALIGNED-LABEL: simple_write2_v4f32_superreg_align4: ; GFX9-ALIGNED: ; %bb.0: -; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-ALIGNED-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-ALIGNED-NEXT: v_lshl_add_u32 v0, v0, 4, s4 ; GFX9-ALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 @@ -984,8 +984,8 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrs ; ; GFX9-UNALIGNED-LABEL: simple_write2_v4f32_superreg_align4: ; GFX9-UNALIGNED: ; %bb.0: -; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c -; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8 +; GFX9-UNALIGNED-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-UNALIGNED-NEXT: v_lshl_add_u32 v4, v0, 4, s4 ; GFX9-UNALIGNED-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 09e2e90feed96..abb1204512db0 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -262,7 +262,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -289,7 +289,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX10-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15 @@ -893,7 +893,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -923,7 +923,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX1010-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) @@ -951,7 +951,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX1030-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -1665,7 +1665,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -1695,7 +1695,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX1010-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1010-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) @@ -1723,7 +1723,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX1030-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX1030-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX1030-PAL-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 15 @@ -2329,7 +2329,7 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX9-PAL-NEXT: s_mov_b32 s4, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX9-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s5, s5, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s4, s3 @@ -2354,7 +2354,7 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) { ; GFX10-PAL-NEXT: s_addc_u32 s5, s5, 0 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 -; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX10-PAL-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-PAL-NEXT: v_add_nc_u32_e32 v0, s0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index ff90763f51e9c..4ef2355f2bd03 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_agent_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_agent_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -223,7 +223,7 @@ define amdgpu_kernel void @flat_agent_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -320,7 +320,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -406,8 +406,8 @@ define amdgpu_kernel void @flat_agent_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -478,8 +478,8 @@ define amdgpu_kernel void @flat_agent_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -555,8 +555,8 @@ define amdgpu_kernel void @flat_agent_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -635,8 +635,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -710,8 +710,8 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -792,8 +792,8 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -874,8 +874,8 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -964,8 +964,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1059,8 +1059,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1150,8 +1150,8 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1247,8 +1247,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1347,8 +1347,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1440,8 +1440,8 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1535,8 +1535,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1630,8 +1630,8 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1733,8 +1733,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1841,8 +1841,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1944,8 +1944,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2044,8 +2044,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2149,8 +2149,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2257,8 +2257,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2365,8 +2365,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2473,8 +2473,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2581,8 +2581,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2689,8 +2689,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2905,8 +2905,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -3010,8 +3010,8 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3122,8 +3122,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3236,8 +3236,8 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3356,8 +3356,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3478,8 +3478,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3595,8 +3595,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3709,8 +3709,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3828,8 +3828,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3950,8 +3950,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4072,8 +4072,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4194,8 +4194,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4316,8 +4316,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4438,8 +4438,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4560,8 +4560,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4682,8 +4682,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4777,7 +4777,7 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4862,7 +4862,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4955,7 +4955,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5057,7 +5057,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5145,8 +5145,8 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5217,8 +5217,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5294,8 +5294,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5374,8 +5374,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5449,8 +5449,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5529,8 +5529,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5611,8 +5611,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5699,8 +5699,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5792,8 +5792,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5886,8 +5886,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5987,8 +5987,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6091,8 +6091,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6185,8 +6185,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6278,8 +6278,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6373,8 +6373,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6474,8 +6474,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6580,8 +6580,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6681,8 +6681,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6779,8 +6779,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6882,8 +6882,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6988,8 +6988,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7094,8 +7094,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7200,8 +7200,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7306,8 +7306,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7412,8 +7412,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7518,8 +7518,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7624,8 +7624,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7729,8 +7729,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7844,8 +7844,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7960,8 +7960,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8083,8 +8083,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8210,8 +8210,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8332,8 +8332,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8451,8 +8451,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8575,8 +8575,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8702,8 +8702,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8829,8 +8829,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8956,8 +8956,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9083,8 +9083,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9210,8 +9210,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9337,8 +9337,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9464,8 +9464,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll index f8b7bd9adb737..ab79a4cb73af5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -137,7 +137,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -230,7 +230,7 @@ define amdgpu_kernel void @flat_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -319,7 +319,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll index ce46d51bbbb8d..49f2733fa2acd 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_singlethread_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -218,7 +218,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -303,7 +303,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -384,8 +384,8 @@ define amdgpu_kernel void @flat_singlethread_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -456,8 +456,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -528,8 +528,8 @@ define amdgpu_kernel void @flat_singlethread_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -600,8 +600,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -672,8 +672,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -744,8 +744,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -816,8 +816,8 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -888,8 +888,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -960,8 +960,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1038,8 +1038,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1123,8 +1123,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1208,8 +1208,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1296,8 +1296,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1381,8 +1381,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1466,8 +1466,8 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1551,8 +1551,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1636,8 +1636,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1721,8 +1721,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1806,8 +1806,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1891,8 +1891,8 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1976,8 +1976,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2061,8 +2061,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2146,8 +2146,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2231,8 +2231,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2316,8 +2316,8 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2401,8 +2401,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2486,8 +2486,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2583,8 +2583,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2690,8 +2690,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2904,8 +2904,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3011,8 +3011,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3118,8 +3118,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3225,8 +3225,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3332,8 +3332,8 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3439,8 +3439,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3546,8 +3546,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3653,8 +3653,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3760,8 +3760,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3867,8 +3867,8 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3974,8 +3974,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4081,8 +4081,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4171,7 +4171,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4256,7 +4256,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4341,7 +4341,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4426,7 +4426,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4507,8 +4507,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4579,8 +4579,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4651,8 +4651,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4723,8 +4723,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4795,8 +4795,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4867,8 +4867,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4939,8 +4939,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5011,8 +5011,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5083,8 +5083,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5161,8 +5161,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5246,8 +5246,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5331,8 +5331,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5419,8 +5419,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5504,8 +5504,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5589,8 +5589,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5674,8 +5674,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5759,8 +5759,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5844,8 +5844,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5929,8 +5929,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6014,8 +6014,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6099,8 +6099,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6184,8 +6184,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6269,8 +6269,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6354,8 +6354,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6439,8 +6439,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6524,8 +6524,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6609,8 +6609,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6706,8 +6706,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6813,8 +6813,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6920,8 +6920,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7027,8 +7027,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7134,8 +7134,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7241,8 +7241,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7348,8 +7348,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7455,8 +7455,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7562,8 +7562,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7669,8 +7669,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7776,8 +7776,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7883,8 +7883,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7990,8 +7990,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8097,8 +8097,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8204,8 +8204,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 200037724ef17..b9a307074492b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_system_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_system_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -223,7 +223,7 @@ define amdgpu_kernel void @flat_system_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -322,7 +322,7 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -410,8 +410,8 @@ define amdgpu_kernel void @flat_system_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -482,8 +482,8 @@ define amdgpu_kernel void @flat_system_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -559,8 +559,8 @@ define amdgpu_kernel void @flat_system_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -641,8 +641,8 @@ define amdgpu_kernel void @flat_system_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -718,8 +718,8 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -800,8 +800,8 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -884,8 +884,8 @@ define amdgpu_kernel void @flat_system_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -976,8 +976,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1075,8 +1075,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1170,8 +1170,8 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1269,8 +1269,8 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1373,8 +1373,8 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1470,8 +1470,8 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1565,8 +1565,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1662,8 +1662,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1767,8 +1767,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1879,8 +1879,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1986,8 +1986,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2088,8 +2088,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2195,8 +2195,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2307,8 +2307,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2419,8 +2419,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2531,8 +2531,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2643,8 +2643,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2755,8 +2755,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2867,8 +2867,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2979,8 +2979,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -3088,8 +3088,8 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3200,8 +3200,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3316,8 +3316,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3438,8 +3438,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3564,8 +3564,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3685,8 +3685,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3801,8 +3801,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3922,8 +3922,8 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4048,8 +4048,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4174,8 +4174,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4300,8 +4300,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4426,8 +4426,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4552,8 +4552,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4678,8 +4678,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4804,8 +4804,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4903,7 +4903,7 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4988,7 +4988,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5081,7 +5081,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5185,7 +5185,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -5275,8 +5275,8 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5347,8 +5347,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5424,8 +5424,8 @@ define amdgpu_kernel void @flat_system_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5506,8 +5506,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5583,8 +5583,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5663,8 +5663,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5747,8 +5747,8 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5837,8 +5837,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5934,8 +5934,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6032,8 +6032,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6135,8 +6135,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6243,8 +6243,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -6341,8 +6341,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6434,8 +6434,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6531,8 +6531,8 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6634,8 +6634,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6744,8 +6744,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6849,8 +6849,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6949,8 +6949,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7054,8 +7054,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7164,8 +7164,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7274,8 +7274,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7384,8 +7384,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7494,8 +7494,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7604,8 +7604,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7714,8 +7714,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7824,8 +7824,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -7933,8 +7933,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8048,8 +8048,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8166,8 +8166,8 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8291,8 +8291,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8422,8 +8422,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8548,8 +8548,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8669,8 +8669,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8795,8 +8795,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8926,8 +8926,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9057,8 +9057,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9188,8 +9188,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9319,8 +9319,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9450,8 +9450,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9581,8 +9581,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -9712,8 +9712,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll index 397ab8b5448e0..88a5ccb5de9d1 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -49,7 +49,7 @@ define amdgpu_kernel void @flat_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -116,7 +116,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -183,7 +183,7 @@ define amdgpu_kernel void @flat_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -250,7 +250,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 @@ -317,7 +317,7 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_volatile_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -377,8 +377,8 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_volatile_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll index 47c0cbc57a835..1569af9d257f2 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_wavefront_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -218,7 +218,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -303,7 +303,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -384,8 +384,8 @@ define amdgpu_kernel void @flat_wavefront_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -456,8 +456,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -528,8 +528,8 @@ define amdgpu_kernel void @flat_wavefront_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -600,8 +600,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -672,8 +672,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -744,8 +744,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -816,8 +816,8 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -888,8 +888,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -960,8 +960,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1038,8 +1038,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1123,8 +1123,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1208,8 +1208,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1296,8 +1296,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1381,8 +1381,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1466,8 +1466,8 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1551,8 +1551,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1636,8 +1636,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1721,8 +1721,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1806,8 +1806,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1891,8 +1891,8 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1976,8 +1976,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2061,8 +2061,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2146,8 +2146,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2231,8 +2231,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2316,8 +2316,8 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2401,8 +2401,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2486,8 +2486,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2583,8 +2583,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2690,8 +2690,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2904,8 +2904,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3011,8 +3011,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3118,8 +3118,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3225,8 +3225,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3332,8 +3332,8 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3439,8 +3439,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3546,8 +3546,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3653,8 +3653,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3760,8 +3760,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3867,8 +3867,8 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3974,8 +3974,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4081,8 +4081,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4171,7 +4171,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4256,7 +4256,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4341,7 +4341,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4426,7 +4426,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4507,8 +4507,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4579,8 +4579,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4651,8 +4651,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4723,8 +4723,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4795,8 +4795,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4867,8 +4867,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4939,8 +4939,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5011,8 +5011,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5083,8 +5083,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5161,8 +5161,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5246,8 +5246,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5331,8 +5331,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5419,8 +5419,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5504,8 +5504,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5589,8 +5589,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5674,8 +5674,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5759,8 +5759,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5844,8 +5844,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5929,8 +5929,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6014,8 +6014,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6099,8 +6099,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6184,8 +6184,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6269,8 +6269,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6354,8 +6354,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6439,8 +6439,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6524,8 +6524,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6609,8 +6609,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6706,8 +6706,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6813,8 +6813,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6920,8 +6920,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7027,8 +7027,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7134,8 +7134,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7241,8 +7241,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7348,8 +7348,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7455,8 +7455,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7562,8 +7562,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7669,8 +7669,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7776,8 +7776,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7883,8 +7883,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7990,8 +7990,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8097,8 +8097,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll index 179de4dc05ffa..ead08ed9fb2bd 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_workgroup_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -221,7 +221,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -316,7 +316,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -403,8 +403,8 @@ define amdgpu_kernel void @flat_workgroup_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -475,8 +475,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -551,8 +551,8 @@ define amdgpu_kernel void @flat_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -630,8 +630,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -705,8 +705,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -782,8 +782,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -862,8 +862,8 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -946,8 +946,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1034,8 +1034,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1120,8 +1120,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1211,8 +1211,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1305,8 +1305,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -1397,8 +1397,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1487,8 +1487,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1580,8 +1580,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1677,8 +1677,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1778,8 +1778,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1875,8 +1875,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -1969,8 +1969,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2067,8 +2067,8 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2168,8 +2168,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2269,8 +2269,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2370,8 +2370,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -2474,8 +2474,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2584,8 +2584,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2697,8 +2697,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2814,8 +2814,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -2933,8 +2933,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3048,8 +3048,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3160,8 +3160,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3276,8 +3276,8 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3395,8 +3395,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3514,8 +3514,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3633,8 +3633,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3752,8 +3752,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3871,8 +3871,8 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -3990,8 +3990,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4109,8 +4109,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -4204,7 +4204,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4289,7 +4289,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4376,7 +4376,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4466,7 +4466,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1 @@ -4549,8 +4549,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4621,8 +4621,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4695,8 +4695,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4770,8 +4770,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4843,8 +4843,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4917,8 +4917,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -4993,8 +4993,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5070,8 +5070,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5149,8 +5149,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5232,8 +5232,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5322,8 +5322,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5413,8 +5413,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s3 @@ -5503,8 +5503,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5590,8 +5590,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5679,8 +5679,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5769,8 +5769,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5861,8 +5861,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -5951,8 +5951,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6040,8 +6040,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6131,8 +6131,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6223,8 +6223,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6315,8 +6315,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6407,8 +6407,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6499,8 +6499,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6591,8 +6591,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6683,8 +6683,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6775,8 +6775,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s2, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s3, s3, 0 @@ -6875,8 +6875,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -6984,8 +6984,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7094,8 +7094,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7206,8 +7206,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7319,8 +7319,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7430,8 +7430,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7540,8 +7540,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7652,8 +7652,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7765,8 +7765,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7878,8 +7878,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -7991,8 +7991,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8104,8 +8104,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8217,8 +8217,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8330,8 +8330,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 @@ -8443,8 +8443,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s2, 16 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s3, 0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll index d57b42dcc8f04..43120f5132077 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_agent_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_agent_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -244,7 +244,7 @@ define amdgpu_kernel void @global_agent_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -346,7 +346,7 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -435,8 +435,8 @@ define amdgpu_kernel void @global_agent_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -516,8 +516,8 @@ define amdgpu_kernel void @global_agent_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -603,8 +603,8 @@ define amdgpu_kernel void @global_agent_release_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -693,8 +693,8 @@ define amdgpu_kernel void @global_agent_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -777,8 +777,8 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -868,8 +868,8 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -960,8 +960,8 @@ define amdgpu_kernel void @global_agent_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1060,8 +1060,8 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1165,8 +1165,8 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1268,8 +1268,8 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1378,8 +1378,8 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1491,8 +1491,8 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1590,8 +1590,8 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1689,8 +1689,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1789,8 +1789,8 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1897,8 +1897,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2010,8 +2010,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2117,8 +2117,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2221,8 +2221,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2331,8 +2331,8 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2444,8 +2444,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2557,8 +2557,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2670,8 +2670,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2783,8 +2783,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2896,8 +2896,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3009,8 +3009,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3122,8 +3122,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3229,8 +3229,8 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3342,8 +3342,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3457,8 +3457,8 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3579,8 +3579,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3703,8 +3703,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3821,8 +3821,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3936,8 +3936,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4057,8 +4057,8 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4181,8 +4181,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4305,8 +4305,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4429,8 +4429,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4677,8 +4677,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4801,8 +4801,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4925,8 +4925,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5025,7 +5025,7 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5115,7 +5115,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5211,7 +5211,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5313,7 +5313,7 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5402,8 +5402,8 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5483,8 +5483,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5570,8 +5570,8 @@ define amdgpu_kernel void @global_agent_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5744,8 +5744,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5835,8 +5835,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6027,8 +6027,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6132,8 +6132,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6235,8 +6235,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6345,8 +6345,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6458,8 +6458,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6557,8 +6557,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6656,8 +6656,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6756,8 +6756,8 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6864,8 +6864,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6977,8 +6977,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7084,8 +7084,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7188,8 +7188,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7298,8 +7298,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7411,8 +7411,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7524,8 +7524,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7637,8 +7637,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7750,8 +7750,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7863,8 +7863,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7976,8 +7976,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8089,8 +8089,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8196,8 +8196,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8309,8 +8309,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8430,8 +8430,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8554,8 +8554,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8672,8 +8672,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8787,8 +8787,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8908,8 +8908,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9032,8 +9032,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9156,8 +9156,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9280,8 +9280,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9404,8 +9404,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9528,8 +9528,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9652,8 +9652,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9776,8 +9776,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll index d7be355e61a06..02616719c90c6 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -155,7 +155,7 @@ define amdgpu_kernel void @global_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, 0 @@ -251,7 +251,7 @@ define amdgpu_kernel void @global_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -345,7 +345,7 @@ define amdgpu_kernel void @global_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: global_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll index d881b3d0e16b2..949b5a5ce4761 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_singlethread_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -238,7 +238,7 @@ define amdgpu_kernel void @global_singlethread_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -328,7 +328,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -414,8 +414,8 @@ define amdgpu_kernel void @global_singlethread_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -495,8 +495,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -576,8 +576,8 @@ define amdgpu_kernel void @global_singlethread_release_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -657,8 +657,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -738,8 +738,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -819,8 +819,8 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -900,8 +900,8 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -981,8 +981,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1062,8 +1062,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1151,8 +1151,8 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1247,8 +1247,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1343,8 +1343,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1437,8 +1437,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1526,8 +1526,8 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1615,8 +1615,8 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1704,8 +1704,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1793,8 +1793,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1882,8 +1882,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1971,8 +1971,8 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2060,8 +2060,8 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2149,8 +2149,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2238,8 +2238,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2327,8 +2327,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2416,8 +2416,8 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2505,8 +2505,8 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2594,8 +2594,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2683,8 +2683,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2782,8 +2782,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2889,8 +2889,8 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2996,8 +2996,8 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3103,8 +3103,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3210,8 +3210,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3424,8 +3424,8 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3531,8 +3531,8 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3638,8 +3638,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3745,8 +3745,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3852,8 +3852,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3959,8 +3959,8 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4066,8 +4066,8 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4173,8 +4173,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4280,8 +4280,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4375,7 +4375,7 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4465,7 +4465,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4555,7 +4555,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4645,7 +4645,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4731,8 +4731,8 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4812,8 +4812,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4893,8 +4893,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4974,8 +4974,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5055,8 +5055,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5136,8 +5136,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5217,8 +5217,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5298,8 +5298,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5379,8 +5379,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5468,8 +5468,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5564,8 +5564,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5754,8 +5754,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5843,8 +5843,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5932,8 +5932,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6021,8 +6021,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6110,8 +6110,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6199,8 +6199,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6288,8 +6288,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6377,8 +6377,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6466,8 +6466,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6644,8 +6644,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6733,8 +6733,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6822,8 +6822,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6911,8 +6911,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7000,8 +7000,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7099,8 +7099,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7206,8 +7206,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7313,8 +7313,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7420,8 +7420,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7527,8 +7527,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7634,8 +7634,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7741,8 +7741,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7848,8 +7848,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7955,8 +7955,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8062,8 +8062,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8169,8 +8169,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8276,8 +8276,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8383,8 +8383,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8490,8 +8490,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8597,8 +8597,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll index da895c684c430..93a58c17153ae 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_system_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_system_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_system_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -244,7 +244,7 @@ define amdgpu_kernel void @global_system_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -348,7 +348,7 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -439,8 +439,8 @@ define amdgpu_kernel void @global_system_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_system_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -520,8 +520,8 @@ define amdgpu_kernel void @global_system_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -607,8 +607,8 @@ define amdgpu_kernel void @global_system_release_store( ; ; SKIP-CACHE-INV-LABEL: global_system_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -699,8 +699,8 @@ define amdgpu_kernel void @global_system_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -785,8 +785,8 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -876,8 +876,8 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -970,8 +970,8 @@ define amdgpu_kernel void @global_system_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1072,8 +1072,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1181,8 +1181,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1288,8 +1288,8 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1400,8 +1400,8 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1517,8 +1517,8 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1620,8 +1620,8 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1719,8 +1719,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1821,8 +1821,8 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1931,8 +1931,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2048,8 +2048,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2159,8 +2159,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2265,8 +2265,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2377,8 +2377,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2494,8 +2494,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2611,8 +2611,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2728,8 +2728,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2839,8 +2839,8 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2952,8 +2952,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3075,8 +3075,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3203,8 +3203,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3325,8 +3325,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3442,8 +3442,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3565,8 +3565,8 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3693,8 +3693,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3821,8 +3821,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3949,8 +3949,8 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4077,8 +4077,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4205,8 +4205,8 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_relese_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4333,8 +4333,8 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4461,8 +4461,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4565,7 +4565,7 @@ define amdgpu_kernel void @global_system_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4655,7 +4655,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4751,7 +4751,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4855,7 +4855,7 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4946,8 +4946,8 @@ define amdgpu_kernel void @global_system_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5027,8 +5027,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5114,8 +5114,8 @@ define amdgpu_kernel void @global_system_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5206,8 +5206,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5292,8 +5292,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5383,8 +5383,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5477,8 +5477,8 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5579,8 +5579,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5688,8 +5688,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5795,8 +5795,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5907,8 +5907,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6024,8 +6024,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6127,8 +6127,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6226,8 +6226,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6328,8 +6328,8 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6438,8 +6438,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6666,8 +6666,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6772,8 +6772,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6884,8 +6884,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7001,8 +7001,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7118,8 +7118,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7235,8 +7235,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7352,8 +7352,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7469,8 +7469,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7586,8 +7586,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7703,8 +7703,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7814,8 +7814,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7927,8 +7927,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8044,8 +8044,8 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8168,8 +8168,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8296,8 +8296,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8418,8 +8418,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8535,8 +8535,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8658,8 +8658,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8786,8 +8786,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8914,8 +8914,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9042,8 +9042,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9170,8 +9170,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9298,8 +9298,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9426,8 +9426,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9554,8 +9554,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll index b2ee94f20983e..dacc965f269c1 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll @@ -56,7 +56,7 @@ define amdgpu_kernel void @global_volatile_load_0( ; ; SKIP-CACHE-INV-LABEL: global_volatile_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -132,7 +132,7 @@ define amdgpu_kernel void @global_volatile_load_1( ; ; SKIP-CACHE-INV-LABEL: global_volatile_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, 0 @@ -210,7 +210,7 @@ define amdgpu_kernel void @global_volatile_store_0( ; ; SKIP-CACHE-INV-LABEL: global_volatile_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -287,7 +287,7 @@ define amdgpu_kernel void @global_volatile_store_1( ; ; SKIP-CACHE-INV-LABEL: global_volatile_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, 0 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -361,7 +361,7 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -432,8 +432,8 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll index 5c674278ae3cd..3fe2c7bbc5e30 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_wavefront_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -238,7 +238,7 @@ define amdgpu_kernel void @global_wavefront_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -328,7 +328,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -414,8 +414,8 @@ define amdgpu_kernel void @global_wavefront_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -495,8 +495,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -576,8 +576,8 @@ define amdgpu_kernel void @global_wavefront_release_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -657,8 +657,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -738,8 +738,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -819,8 +819,8 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -900,8 +900,8 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -981,8 +981,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1062,8 +1062,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1151,8 +1151,8 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1247,8 +1247,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1343,8 +1343,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1437,8 +1437,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1526,8 +1526,8 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1615,8 +1615,8 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1704,8 +1704,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1793,8 +1793,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1882,8 +1882,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1971,8 +1971,8 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2060,8 +2060,8 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2149,8 +2149,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2238,8 +2238,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2327,8 +2327,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2416,8 +2416,8 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2505,8 +2505,8 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2594,8 +2594,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2683,8 +2683,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2782,8 +2782,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2889,8 +2889,8 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2996,8 +2996,8 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3103,8 +3103,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3210,8 +3210,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3424,8 +3424,8 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3531,8 +3531,8 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3638,8 +3638,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3745,8 +3745,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3852,8 +3852,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3959,8 +3959,8 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4066,8 +4066,8 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4173,8 +4173,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4280,8 +4280,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4375,7 +4375,7 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4465,7 +4465,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4555,7 +4555,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4645,7 +4645,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4731,8 +4731,8 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4812,8 +4812,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4893,8 +4893,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4974,8 +4974,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5055,8 +5055,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5136,8 +5136,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5217,8 +5217,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5298,8 +5298,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5379,8 +5379,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5468,8 +5468,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5564,8 +5564,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5754,8 +5754,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5843,8 +5843,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5932,8 +5932,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6021,8 +6021,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6110,8 +6110,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6199,8 +6199,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6288,8 +6288,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6377,8 +6377,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6466,8 +6466,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6644,8 +6644,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6733,8 +6733,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6822,8 +6822,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6911,8 +6911,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7000,8 +7000,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7099,8 +7099,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7206,8 +7206,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7313,8 +7313,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7420,8 +7420,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7527,8 +7527,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7634,8 +7634,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7741,8 +7741,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7848,8 +7848,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7955,8 +7955,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8062,8 +8062,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8169,8 +8169,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8276,8 +8276,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8383,8 +8383,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8490,8 +8490,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8597,8 +8597,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll index 2f36a6d914a1e..339cb981b3f2c 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -58,7 +58,7 @@ define amdgpu_kernel void @global_workgroup_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -148,7 +148,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -239,7 +239,7 @@ define amdgpu_kernel void @global_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -334,7 +334,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -422,8 +422,8 @@ define amdgpu_kernel void @global_workgroup_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -503,8 +503,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -589,8 +589,8 @@ define amdgpu_kernel void @global_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -678,8 +678,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -762,8 +762,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -845,8 +845,8 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -933,8 +933,8 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1024,8 +1024,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1117,8 +1117,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1212,8 +1212,8 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1315,8 +1315,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1421,8 +1421,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1519,8 +1519,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1610,8 +1610,8 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1706,8 +1706,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1805,8 +1805,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1906,8 +1906,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2002,8 +2002,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2095,8 +2095,8 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2193,8 +2193,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2294,8 +2294,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2395,8 +2395,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2496,8 +2496,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2597,8 +2597,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2698,8 +2698,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2799,8 +2799,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2900,8 +2900,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3004,8 +3004,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3112,8 +3112,8 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3225,8 +3225,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3341,8 +3341,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3458,8 +3458,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3570,8 +3570,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3679,8 +3679,8 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3793,8 +3793,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3910,8 +3910,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4027,8 +4027,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4144,8 +4144,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4261,8 +4261,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4378,8 +4378,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4495,8 +4495,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4612,8 +4612,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4711,7 +4711,7 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4801,7 +4801,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4892,7 +4892,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4985,7 +4985,7 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s7, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s6, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5072,8 +5072,8 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5153,8 +5153,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5236,8 +5236,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5320,8 +5320,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5402,8 +5402,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5485,8 +5485,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5570,8 +5570,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5656,8 +5656,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5744,8 +5744,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5837,8 +5837,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -5937,8 +5937,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6038,8 +6038,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dword s4, s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6134,8 +6134,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6225,8 +6225,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6318,8 +6318,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6412,8 +6412,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6508,8 +6508,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6602,8 +6602,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6695,8 +6695,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6790,8 +6790,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6886,8 +6886,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -6982,8 +6982,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7078,8 +7078,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7174,8 +7174,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7270,8 +7270,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7366,8 +7366,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7462,8 +7462,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7564,8 +7564,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7672,8 +7672,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7782,8 +7782,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -7893,8 +7893,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8005,8 +8005,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8115,8 +8115,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8224,8 +8224,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8335,8 +8335,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8447,8 +8447,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8559,8 +8559,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8671,8 +8671,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8783,8 +8783,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -8895,8 +8895,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9007,8 +9007,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -9119,8 +9119,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll index b3a3059a3be31..a328fd34ba1f1 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_agent_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_agent_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -234,7 +234,7 @@ define amdgpu_kernel void @local_agent_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -329,7 +329,7 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @local_agent_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -486,7 +486,7 @@ define amdgpu_kernel void @local_agent_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -564,7 +564,7 @@ define amdgpu_kernel void @local_agent_release_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -645,7 +645,7 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -721,7 +721,7 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -799,7 +799,7 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -880,7 +880,7 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -966,7 +966,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_agent_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll index 664d69d2d0cec..df3e5226c539f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll @@ -64,8 +64,8 @@ define amdgpu_kernel void @local_nontemporal_load_0( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -165,8 +165,8 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 @@ -265,8 +265,8 @@ define amdgpu_kernel void @local_nontemporal_store_0( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -363,8 +363,8 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; ; SKIP-CACHE-INV-LABEL: local_nontemporal_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll index 846245094b629..67ce190072b5b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_singlethread_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -233,7 +233,7 @@ define amdgpu_kernel void @local_singlethread_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -321,7 +321,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -401,7 +401,7 @@ define amdgpu_kernel void @local_singlethread_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -474,7 +474,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -547,7 +547,7 @@ define amdgpu_kernel void @local_singlethread_release_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -620,7 +620,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -693,7 +693,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -766,7 +766,7 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -839,7 +839,7 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -912,7 +912,7 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -985,7 +985,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1066,7 +1066,7 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1154,7 +1154,7 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1242,7 +1242,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1326,13 +1326,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1408,13 +1407,12 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1490,13 +1488,12 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1572,13 +1569,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1654,13 +1650,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1736,13 +1731,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1818,13 +1812,12 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1900,13 +1893,12 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1982,13 +1974,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2064,13 +2055,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2146,13 +2136,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2228,13 +2217,12 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2310,13 +2298,12 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2392,13 +2379,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2474,13 +2460,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2564,13 +2549,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2662,13 +2646,12 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2760,13 +2743,12 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2858,13 +2840,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2956,13 +2937,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3054,13 +3034,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3152,13 +3131,12 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3250,13 +3228,12 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3348,13 +3325,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3446,13 +3422,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3544,13 +3519,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3642,13 +3616,12 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3740,13 +3713,12 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3838,13 +3810,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3936,13 +3907,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4030,7 +4000,7 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4118,7 +4088,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4206,7 +4176,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4294,7 +4264,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4374,7 +4344,7 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4447,7 +4417,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4520,7 +4490,7 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4593,7 +4563,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4666,7 +4636,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4739,7 +4709,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4812,7 +4782,7 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4885,7 +4855,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4958,7 +4928,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5039,7 +5009,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5127,7 +5097,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5215,7 +5185,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5299,13 +5269,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5381,13 +5350,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5463,13 +5431,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5545,13 +5512,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5627,13 +5593,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5709,13 +5674,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5791,13 +5755,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5873,13 +5836,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5955,13 +5917,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6037,13 +5998,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6119,13 +6079,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6201,13 +6160,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6283,13 +6241,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6365,13 +6322,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6447,13 +6403,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6537,13 +6492,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_ret_cmp ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6635,13 +6589,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6733,13 +6686,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6831,13 +6783,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6929,13 +6880,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7027,13 +6977,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7125,13 +7074,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7223,13 +7171,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7321,13 +7268,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7419,13 +7365,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7517,13 +7462,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_ret_cmpxc ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7615,13 +7559,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7713,13 +7656,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7811,13 +7753,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7909,13 +7850,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg ; ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll index 64129cdd666bd..4efd46d271600 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_system_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_system_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_system_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -234,7 +234,7 @@ define amdgpu_kernel void @local_system_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -329,7 +329,7 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @local_system_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_system_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -486,7 +486,7 @@ define amdgpu_kernel void @local_system_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -564,7 +564,7 @@ define amdgpu_kernel void @local_system_release_store( ; ; SKIP-CACHE-INV-LABEL: local_system_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -645,7 +645,7 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -721,7 +721,7 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -799,7 +799,7 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -880,7 +880,7 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -966,7 +966,7 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_system_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_system_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_system_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_system_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_system_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll index 2f47903916565..97aced3707afb 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll @@ -62,8 +62,8 @@ define amdgpu_kernel void @local_volatile_load_0( ; ; SKIP-CACHE-INV-LABEL: local_volatile_load_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -139,8 +139,8 @@ define amdgpu_kernel void @local_volatile_load_1( ; ; SKIP-CACHE-INV-LABEL: local_volatile_load_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_mov_b32 s3, 0xf000 @@ -215,8 +215,8 @@ define amdgpu_kernel void @local_volatile_store_0( ; ; SKIP-CACHE-INV-LABEL: local_volatile_store_0: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -289,8 +289,8 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; SKIP-CACHE-INV-LABEL: local_volatile_store_1: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -360,7 +360,7 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_volatile_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -423,7 +423,7 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: local_volatile_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll index 3fde622dee480..046325f5437f8 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_wavefront_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -233,7 +233,7 @@ define amdgpu_kernel void @local_wavefront_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -321,7 +321,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -401,7 +401,7 @@ define amdgpu_kernel void @local_wavefront_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -474,7 +474,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -547,7 +547,7 @@ define amdgpu_kernel void @local_wavefront_release_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -620,7 +620,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -693,7 +693,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -766,7 +766,7 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -839,7 +839,7 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -912,7 +912,7 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -985,7 +985,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1066,7 +1066,7 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1154,7 +1154,7 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1242,7 +1242,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1326,13 +1326,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1408,13 +1407,12 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1490,13 +1488,12 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1572,13 +1569,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1654,13 +1650,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1736,13 +1731,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1818,13 +1812,12 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1900,13 +1893,12 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1982,13 +1974,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2064,13 +2055,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2146,13 +2136,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2228,13 +2217,12 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2310,13 +2298,12 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2392,13 +2379,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2474,13 +2460,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -2564,13 +2549,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2662,13 +2646,12 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2760,13 +2743,12 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2858,13 +2840,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2956,13 +2937,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3054,13 +3034,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3152,13 +3131,12 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3250,13 +3228,12 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3348,13 +3325,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3446,13 +3422,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3544,13 +3519,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3642,13 +3616,12 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3740,13 +3713,12 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3838,13 +3810,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3936,13 +3907,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -4030,7 +4000,7 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4118,7 +4088,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4206,7 +4176,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4294,7 +4264,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4374,7 +4344,7 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4447,7 +4417,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4520,7 +4490,7 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4593,7 +4563,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4666,7 +4636,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4739,7 +4709,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4812,7 +4782,7 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4885,7 +4855,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4958,7 +4928,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5039,7 +5009,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5127,7 +5097,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5215,7 +5185,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5299,13 +5269,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5381,13 +5350,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5463,13 +5431,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5545,13 +5512,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5627,13 +5593,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5709,13 +5674,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5791,13 +5755,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5873,13 +5836,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5955,13 +5917,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6037,13 +5998,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6119,13 +6079,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6201,13 +6160,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6283,13 +6241,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6365,13 +6322,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6447,13 +6403,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6537,13 +6492,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6635,13 +6589,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6733,13 +6686,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6831,13 +6783,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -6929,13 +6880,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7027,13 +6977,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7125,13 +7074,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7223,13 +7171,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7321,13 +7268,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7419,13 +7365,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7517,13 +7462,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7615,13 +7559,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7713,13 +7656,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7811,13 +7753,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7909,13 +7850,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll index 7094634268eed..580d7a814a7dd 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @local_workgroup_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -145,7 +145,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -234,7 +234,7 @@ define amdgpu_kernel void @local_workgroup_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -329,7 +329,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -413,7 +413,7 @@ define amdgpu_kernel void @local_workgroup_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -486,7 +486,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -564,7 +564,7 @@ define amdgpu_kernel void @local_workgroup_release_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -645,7 +645,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -721,7 +721,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -799,7 +799,7 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -880,7 +880,7 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -966,7 +966,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_endpgm @@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) @@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_unordered_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_load: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_unordered_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_workgroup_one_as_release_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_store: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s1 @@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 @@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_b32 v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_endpgm ; @@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_ret_cmpxch ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 @@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; SKIP-CACHE-INV: ; %bb.0: ; %entry -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; SKIP-CACHE-INV-NEXT: s_mov_b32 m0, -1 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s0 -; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s3 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s0 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s2 +; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v2, s1 ; SKIP-CACHE-INV-NEXT: ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll index 2af31ad68e050..0fd1e7a04e4b5 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -81,8 +81,8 @@ define amdgpu_kernel void @private_nontemporal_load_0( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 @@ -208,8 +208,8 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_add_i32_e32 v0, vcc, s2, v0 @@ -336,8 +336,8 @@ define amdgpu_kernel void @private_nontemporal_store_0( ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -462,8 +462,8 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll index c4c60fe16fb38..f605514382075 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll @@ -81,8 +81,8 @@ define amdgpu_kernel void @private_volatile_load_0( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v0, s2 ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 @@ -178,8 +178,8 @@ define amdgpu_kernel void @private_volatile_load_1( ; SKIP-CACHE-INV-NEXT: s_getpc_b64 s[4:5] ; SKIP-CACHE-INV-NEXT: s_mov_b32 s4, s0 ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dword s2, s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: v_add_i32_e32 v0, vcc, s2, v0 @@ -280,8 +280,8 @@ define amdgpu_kernel void @private_volatile_store_0( ; SKIP-CACHE-INV-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 @@ -381,8 +381,8 @@ define amdgpu_kernel void @private_volatile_store_1( ; SKIP-CACHE-INV-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_add_u32 s4, s4, s3 -; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0xb +; SKIP-CACHE-INV-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; SKIP-CACHE-INV-NEXT: s_load_dword s0, s[0:1], 0x2 ; SKIP-CACHE-INV-NEXT: s_addc_u32 s5, s5, 0 ; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0) ; SKIP-CACHE-INV-NEXT: s_load_dword s1, s[2:3], 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/store-local.128.ll index c432d9b4d46d4..d63c7890bc06a 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.128.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.128.ll @@ -7,8 +7,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -20,8 +20,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; ; GFX7-LABEL: store_lds_v4i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v4, s4 @@ -34,8 +34,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; ; GFX6-LABEL: store_lds_v4i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 @@ -49,8 +49,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 ; GFX10-LABEL: store_lds_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v4, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 @@ -66,8 +66,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3 define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 @@ -110,8 +110,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -163,8 +163,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -217,8 +217,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s7 @@ -265,8 +265,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s7 @@ -285,8 +285,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -314,8 +314,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -344,8 +344,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s7 @@ -368,8 +368,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 @@ -382,8 +382,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -397,8 +397,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -413,8 +413,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 @@ -431,8 +431,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -444,8 +444,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v4, s4 @@ -458,8 +458,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 @@ -473,8 +473,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v4, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 @@ -490,8 +490,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) { ; GFX9-LABEL: store_lds_v4i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s2 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 @@ -503,8 +503,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v4i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v4, s4 @@ -517,8 +517,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v4i32_align16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s6 @@ -532,8 +532,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v4i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v4, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/store-local.96.ll index 913b7e46f4edc..816ab02bcff20 100644 --- a/llvm/test/CodeGen/AMDGPU/store-local.96.ll +++ b/llvm/test/CodeGen/AMDGPU/store-local.96.ll @@ -7,8 +7,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -19,8 +19,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; ; GFX7-LABEL: store_lds_v3i32: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -32,8 +32,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; ; GFX6-LABEL: store_lds_v3i32: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 @@ -47,8 +47,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 ; GFX10-LABEL: store_lds_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 @@ -63,8 +63,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3 define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align1: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 @@ -98,8 +98,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align1: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -140,8 +140,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align1: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -183,8 +183,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s6 @@ -222,8 +222,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align2: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s6 @@ -239,8 +239,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align2: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -263,8 +263,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align2: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -288,8 +288,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s6 @@ -309,8 +309,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s4 @@ -322,8 +322,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -336,8 +336,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align4: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s4 @@ -351,8 +351,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align4: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 @@ -368,8 +368,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s6 @@ -381,8 +381,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v2, s4 @@ -395,8 +395,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align8: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 @@ -410,8 +410,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s6 @@ -427,8 +427,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { ; GFX9-LABEL: store_lds_v3i32_align16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX9-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 @@ -439,8 +439,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; ; GFX7-LABEL: store_lds_v3i32_align16: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd -; GFX7-NEXT: s_load_dword s0, s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x4 +; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 m0, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 @@ -452,8 +452,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; ; GFX6-LABEL: store_lds_v3i32_align16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x4 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, s4 @@ -467,8 +467,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, ; GFX10-LABEL: store_lds_v3i32_align16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 -; GFX10-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 From 8ff3c9e0be7ad4b50edfd30710b9c6eaf36713ff Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 18 Jan 2022 21:08:01 -0500 Subject: [PATCH 028/946] AMDGPU/GlobalISel: Fix selection of gfx90a FP atomics The struct/raw forms for the buffer atomics now work as expected. However, we're incorrectly handling the legacy form (which we probably shouldn't handle at all). We also are not diagnosing the use of the return value on gfx908. These will be addressed separately. --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 12 + llvm/lib/Target/AMDGPU/FLATInstructions.td | 2 +- .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll | 477 +++++++++++++++++- 3 files changed, 487 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ec86c446f564e..cc0c6b5ecf441 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1296,6 +1296,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasAtomicFaddInsts()) Atomic.legalFor({{S32, GlobalPtr}}); + if (ST.hasGFX90AInsts()) { + // These are legal with some caveats, and should have undergone expansion in + // the IR in most situations + // TODO: Move atomic expansion into legalizer + // TODO: Also supports <2 x f16> + Atomic.legalFor({ + {S32, GlobalPtr}, + {S64, GlobalPtr}, + {S64, FlatPtr} + }); + } + // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output // demarshalling getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index c7ec5308e6d09..c530d3cb49f04 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -915,7 +915,7 @@ class FlatSignedAtomicPatNoRtn : GCNPat < (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), - (inst $vaddr, $data, $offset) + (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; class ScratchLoadSignedPat : GCNPat < diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index dcd447eae9766..be4737d46c5a4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1) declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) @@ -408,6 +408,102 @@ main_body: ret void } +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* %ptr) #1 { +; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_mov_b64 s[2:3], 0 +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] +; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] +; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_cbranch_execnz .LBB24_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst + ret void +} + +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(double addrspace(1)* %ptr) #1 { +; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v2, v[0:1], s[0:1] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + ret void +} + +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrspace(1)* %ptr) #1 { +; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_mov_b64 s[2:3], 0 +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] +; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] +; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_cbranch_execnz .LBB26_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst + ret void +} + +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspace(1)* %ptr) #0 { +; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v2, v[0:1], s[0:1] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + ret void +} + define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fadd_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body @@ -420,6 +516,84 @@ main_body: ret double %ret } +define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double %data) #1 { +; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] +; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB29_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v2 +; GFX90A-NEXT: v_mov_b32_e32 v1, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst + ret double %ret +} + +define double @global_atomic_fadd_f64_rtn_pat_agent(double addrspace(1)* %ptr, double %data) #1 { +; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + ret double %ret +} + +define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, double %data) #1 { +; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] +; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB31_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v2 +; GFX90A-NEXT: v_mov_b32_e32 v1, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst + ret double %ret +} + define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %data) { ; GFX90A-LABEL: global_atomic_fmax_f64_rtn: ; GFX90A: ; %bb.0: ; %main_body @@ -444,6 +618,195 @@ main_body: ret double %ret } +define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double addrspace(1)* %ptr) { +; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_mov_b64 s[2:3], 0 +; GFX90A-NEXT: v_mov_b32_e32 v4, 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] +; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] +; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_cbranch_execnz .LBB34_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 { +; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_mov_b64 s[2:3], 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] +; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_cbranch_execnz .LBB35_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(double* %ptr) #1 { +; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + ret void +} + +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #1 { +; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_mov_b64 s[2:3], 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] +; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_cbranch_execnz .LBB37_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst + ret void +} + +define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 { +; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] +; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB38_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v2 +; GFX90A-NEXT: v_mov_b32_e32 v1, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst + ret double %ret +} + +define double @flat_atomic_fadd_f64_rtn_pat_agent(double* %ptr) #1 { +; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v2, 0 +; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + ret double %ret +} + +define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 { +; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] +; GFX90A-NEXT: v_add_f64 v[2:3], v[4:5], 4.0 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB40_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v2 +; GFX90A-NEXT: v_mov_b32_e32 v1, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst + ret double %ret +} + define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fadd_f64_noret: ; GFX90A: ; %bb.0: ; %main_body @@ -470,6 +833,35 @@ main_body: ret double %ret } +define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %ptr) { +; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX90A-NEXT: s_mov_b64 s[2:3], 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; GFX90A-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 +; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] +; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] +; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] +; GFX90A-NEXT: s_cbranch_execnz .LBB43_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst + ret void +} + define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data) { ; GFX90A-LABEL: flat_atomic_fmin_f64_noret: ; GFX90A: ; %bb.0: ; %main_body @@ -551,7 +943,7 @@ main_body: ret double %ret } -define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) #1 { ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 @@ -568,7 +960,52 @@ main_body: ret void } -define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) { +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspace(3)* %ptr) #0 { +; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 +; GFX90A-NEXT: v_mov_b32_e32 v0, 0 +; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v2, s0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v2, v[0:1] +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + ret void +} + +define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double addrspace(3)* %ptr) #4 { +; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_load_dword s2, s[0:1], 0x24 +; GFX90A-NEXT: s_mov_b64 s[0:1], 0 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v0, s2 +; GFX90A-NEXT: ds_read_b64 v[0:1], v0 +; GFX90A-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0 +; GFX90A-NEXT: v_mov_b32_e32 v4, s2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: ds_cmpst_rtn_b64 v[2:3], v4, v[0:1], v[2:3] +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[0:1] +; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX90A-NEXT: s_cbranch_execnz .LBB52_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_endpgm +main_body: + %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst + ret void +} + +define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) #1 { ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat: ; GFX90A: ; %bb.0: ; %main_body ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -582,3 +1019,37 @@ main_body: %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst ret double %ret } + +define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr, double %data) #2 { +; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5] +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + ret double %ret +} + +define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, double %data) #3 { +; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: +; GFX90A: ; %bb.0: ; %main_body +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v4, v1 +; GFX90A-NEXT: v_mov_b32_e32 v5, v2 +; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5] +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] +main_body: + %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) + ret double %ret +} + +attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" } +attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" } +attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" } +attributes #3 = { "denormal-fp-math"="ieee,ieee" } +attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" } From be7e938e2712f3d84d1775ecdb1bc6784ea114f7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jan 2022 10:45:37 -0500 Subject: [PATCH 029/946] AMDGPU/GlobalISel: Stop handling llvm.amdgcn.buffer.atomic.fadd This code is not structured to handle the legacy buffer intrinsics and was miscompiling them. --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 - .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll | 95 +++++-------------- 2 files changed, 22 insertions(+), 75 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index cc0c6b5ecf441..93bddd9681ed7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4167,7 +4167,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) { case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP; - case Intrinsic::amdgcn_buffer_atomic_fadd: case Intrinsic::amdgcn_raw_buffer_atomic_fadd: case Intrinsic::amdgcn_struct_buffer_atomic_fadd: return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD; @@ -5186,7 +5185,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: - case Intrinsic::amdgcn_buffer_atomic_fadd: case Intrinsic::amdgcn_raw_buffer_atomic_fmin: case Intrinsic::amdgcn_struct_buffer_atomic_fmin: case Intrinsic::amdgcn_raw_buffer_atomic_fmax: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll index be4737d46c5a4..f6a84f553f3b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A -declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1) declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg) declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) @@ -16,56 +15,6 @@ declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1) -define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { -; GFX90A-LABEL: buffer_atomic_add_noret_f64: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 -; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s8 -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen glc -; GFX90A-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) - ret void -} - -define amdgpu_ps void @buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { -; GFX90A-LABEL: buffer_atomic_add_rtn_f64: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc -; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; GFX90A-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) - store double %ret, double* undef - ret void -} - -define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { -; GFX90A-LABEL: buffer_atomic_add_rtn_f64_off4_slc: -; GFX90A: ; %bb.0: ; %main_body -; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 -; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c -; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 -; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: v_mov_b32_e32 v2, s10 -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc scc /* unexpected cache policy bit */ -; GFX90A-NEXT: v_mov_b32_e32 v2, 0 -; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] -; GFX90A-NEXT: s_endpgm -main_body: - %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1) - store double %ret, double addrspace(1)* %out, align 8 - ret void -} - define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64: ; GFX90A: ; %bb.0: ; %main_body @@ -418,7 +367,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] -; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB21_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: buffer_wbl2 @@ -431,7 +380,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX90A-NEXT: s_cbranch_execnz .LBB24_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB21_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: @@ -466,7 +415,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] -; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB23_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: buffer_wbl2 @@ -479,7 +428,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX90A-NEXT: s_cbranch_execnz .LBB26_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB23_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: @@ -522,7 +471,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off ; GFX90A-NEXT: s_mov_b64 s[4:5], 0 -; GFX90A-NEXT: .LBB29_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] @@ -536,7 +485,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX90A-NEXT: s_cbranch_execnz .LBB29_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB26_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX90A-NEXT: v_mov_b32_e32 v0, v2 @@ -569,7 +518,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off ; GFX90A-NEXT: s_mov_b64 s[4:5], 0 -; GFX90A-NEXT: .LBB31_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB28_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] @@ -583,7 +532,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX90A-NEXT: s_cbranch_execnz .LBB31_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB28_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX90A-NEXT: v_mov_b32_e32 v0, v2 @@ -628,7 +577,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] -; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB31_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -639,7 +588,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX90A-NEXT: s_cbranch_execnz .LBB34_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB31_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: @@ -655,7 +604,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 { ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] -; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB32_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 @@ -670,7 +619,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 { ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX90A-NEXT: s_cbranch_execnz .LBB35_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB32_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: @@ -704,7 +653,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) # ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] -; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 @@ -720,7 +669,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) # ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX90A-NEXT: s_cbranch_execnz .LBB37_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB34_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: @@ -734,7 +683,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] ; GFX90A-NEXT: s_mov_b64 s[4:5], 0 -; GFX90A-NEXT: .LBB38_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] @@ -748,7 +697,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 { ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX90A-NEXT: s_cbranch_execnz .LBB38_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB35_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX90A-NEXT: v_mov_b32_e32 v0, v2 @@ -781,7 +730,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] ; GFX90A-NEXT: s_mov_b64 s[4:5], 0 -; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] @@ -796,7 +745,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 { ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] ; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] -; GFX90A-NEXT: s_cbranch_execnz .LBB40_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB37_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX90A-NEXT: v_mov_b32_e32 v0, v2 @@ -841,7 +790,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] -; GFX90A-NEXT: .LBB43_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 @@ -854,7 +803,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt ; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] -; GFX90A-NEXT: s_cbranch_execnz .LBB43_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB40_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: @@ -985,7 +934,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, s2 ; GFX90A-NEXT: ds_read_b64 v[0:1], v0 -; GFX90A-NEXT: .LBB52_1: ; %atomicrmw.start +; GFX90A-NEXT: .LBB49_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0 @@ -997,7 +946,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add ; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1] -; GFX90A-NEXT: s_cbranch_execnz .LBB52_1 +; GFX90A-NEXT: s_cbranch_execnz .LBB49_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_endpgm main_body: From 75017db08cd3d138aa46b996ce03394d02632ef5 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 20 Jan 2022 17:07:11 +0000 Subject: [PATCH 030/946] [RISCV] Add tests for commuted vector/scalar VP patterns This patch adds a variety of tests checking that we can match vector/scalar instructions against masked VP intrinsics when the splat is on the LHS. At this stage, we can't, despite us having ostensibly-commutable ISel patterns for them. The use of V0 as the mask operand interferes with the auto-generated ISel table. --- .../RISCV/rvv/fixed-vectors-vadd-vp.ll | 62 ++++++++++++------- .../RISCV/rvv/fixed-vectors-vand-vp.ll | 28 +++++++++ .../RISCV/rvv/fixed-vectors-vfadd-vp.ll | 28 +++++++++ .../RISCV/rvv/fixed-vectors-vmul-vp.ll | 14 +++++ .../CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll | 14 +++++ .../RISCV/rvv/fixed-vectors-vxor-vp.ll | 14 +++++ llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 54 ++++++++++------ llvm/test/CodeGen/RISCV/rvv/vand-vp.ll | 14 +++++ llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 30 +++++++++ llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll | 14 +++++ llvm/test/CodeGen/RISCV/rvv/vor-vp.ll | 28 +++++++++ llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll | 14 +++++ 12 files changed, 270 insertions(+), 44 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 3f89d62501b8e..bd29f51a93919 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -128,6 +128,20 @@ define <4 x i8> @vadd_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl ret <4 x i8> %v } +define <4 x i8> @vadd_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + define <4 x i8> @vadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vx_v4i8_unmasked: ; CHECK: # %bb.0: @@ -407,17 +421,17 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: addi a3, a1, -128 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: bltu a1, a3, .LBB31_2 +; CHECK-NEXT: bltu a1, a3, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a3 -; CHECK-NEXT: .LBB31_2: +; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t -; CHECK-NEXT: bltu a1, a2, .LBB31_4 +; CHECK-NEXT: bltu a1, a2, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: .LBB31_4: +; CHECK-NEXT: .LBB32_4: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t @@ -433,17 +447,17 @@ define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, -128 ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bltu a0, a1, .LBB32_2 +; CHECK-NEXT: bltu a0, a1, .LBB33_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: .LBB33_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vadd.vi v16, v16, -1 -; CHECK-NEXT: bltu a0, a1, .LBB32_4 +; CHECK-NEXT: bltu a0, a1, .LBB33_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: .LBB33_4: ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret @@ -1528,17 +1542,17 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: bltu a0, a2, .LBB107_2 +; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB107_2: +; RV32-NEXT: .LBB108_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: bltu a0, a1, .LBB107_4 +; RV32-NEXT: bltu a0, a1, .LBB108_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB107_4: +; RV32-NEXT: .LBB108_4: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t @@ -1551,17 +1565,17 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: addi a2, a0, -16 ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: bltu a0, a2, .LBB107_2 +; RV64-NEXT: bltu a0, a2, .LBB108_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB107_2: +; RV64-NEXT: .LBB108_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV64-NEXT: li a1, 16 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: bltu a0, a1, .LBB107_4 +; RV64-NEXT: bltu a0, a1, .LBB108_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB107_4: +; RV64-NEXT: .LBB108_4: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t @@ -1580,17 +1594,17 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: bltu a0, a2, .LBB108_2 +; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB108_2: +; RV32-NEXT: .LBB109_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: bltu a0, a1, .LBB108_4 +; RV32-NEXT: bltu a0, a1, .LBB109_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a0, 16 -; RV32-NEXT: .LBB108_4: +; RV32-NEXT: .LBB109_4: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: ret @@ -1599,17 +1613,17 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64: # %bb.0: ; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: li a2, 0 -; RV64-NEXT: bltu a0, a1, .LBB108_2 +; RV64-NEXT: bltu a0, a1, .LBB109_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a1 -; RV64-NEXT: .LBB108_2: +; RV64-NEXT: .LBB109_2: ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV64-NEXT: li a1, 16 ; RV64-NEXT: vadd.vi v16, v16, -1 -; RV64-NEXT: bltu a0, a1, .LBB108_4 +; RV64-NEXT: bltu a0, a1, .LBB109_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a0, 16 -; RV64-NEXT: .LBB108_4: +; RV64-NEXT: .LBB109_4: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; RV64-NEXT: vadd.vi v8, v8, -1 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index ea41aba5e5d71..419a9d4b5b1bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -52,6 +52,20 @@ define <2 x i8> @vand_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl ret <2 x i8> %v } +define <2 x i8> @vand_vx_v2i8_commute(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vand_vx_v2i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vand.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + define <2 x i8> @vand_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vand_vx_v2i8_unmasked: ; CHECK: # %bb.0: @@ -66,6 +80,20 @@ define <2 x i8> @vand_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ret <2 x i8> %v } +define <2 x i8> @vand_vx_v2i8_unmasked_commute(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vand_vx_v2i8_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vand.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + define <2 x i8> @vand_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vand_vi_v2i8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll index c368a2f8bc40d..215daf4f92f51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll @@ -252,6 +252,20 @@ define <2 x float> @vfadd_vf_v2f32(<2 x float> %va, float %b, <2 x i1> %m, i32 z ret <2 x float> %v } +define <2 x float> @vfadd_vf_v2f32_commute(<2 x float> %va, float %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_v2f32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> undef, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl) + ret <2 x float> %v +} + define <2 x float> @vfadd_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroext %evl) { ; CHECK-LABEL: vfadd_vf_v2f32_unmasked: ; CHECK: # %bb.0: @@ -266,6 +280,20 @@ define <2 x float> @vfadd_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroe ret <2 x float> %v } +define <2 x float> @vfadd_vf_v2f32_unmasked_commute(<2 x float> %va, float %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_v2f32_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> undef, float %b, i32 0 + %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> undef, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl) + ret <2 x float> %v +} + declare <4 x float> @llvm.vp.fadd.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) define <4 x float> @vfadd_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll index 6907f6ae61688..976d12eab3d72 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll @@ -352,6 +352,20 @@ define <8 x i16> @vmul_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext ret <8 x i16> %v } +define <8 x i16> @vmul_vx_v8i16_commute(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_v8i16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmul.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.mul.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + define <8 x i16> @vmul_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { ; CHECK-LABEL: vmul_vx_v8i16_unmasked: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll index f623f11a56e20..b1807c1b29de6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll @@ -128,6 +128,20 @@ define <4 x i8> @vor_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) ret <4 x i8> %v } +define <4 x i8> @vor_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vor_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + define <4 x i8> @vor_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vor_vx_v4i8_unmasked: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll index 4edd829aeb1e2..207f12ff3822e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll @@ -52,6 +52,20 @@ define <2 x i8> @vxor_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl ret <2 x i8> %v } +define <2 x i8> @vxor_vx_v2i8_commute(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vxor_vx_v2i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vxor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + define <2 x i8> @vxor_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vxor_vx_v2i8_unmasked: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 13c41718676f9..4206edadadb85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -54,6 +54,20 @@ define @vadd_vx_nxv1i8( %va, i8 %b, %v } +define @vadd_vx_nxv1i8_commute( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vadd_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.add.nxv1i8( %vb, %va, %m, i32 %evl) + ret %v +} + define @vadd_vx_nxv1i8_unmasked( %va, i8 %b, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vx_nxv1i8_unmasked: ; CHECK: # %bb.0: @@ -636,20 +650,20 @@ define @vadd_vi_nxv128i8( %va, @vadd_vi_nxv128i8_unmasked( %va, i ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: mv a2, a0 -; CHECK-NEXT: bltu a0, a1, .LBB50_2 +; CHECK-NEXT: bltu a0, a1, .LBB51_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: .LBB51_2: ; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu ; CHECK-NEXT: sub a1, a0, a1 ; CHECK-NEXT: vadd.vi v8, v8, -1 -; CHECK-NEXT: bltu a0, a1, .LBB50_4 +; CHECK-NEXT: bltu a0, a1, .LBB51_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB50_4: +; CHECK-NEXT: .LBB51_4: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu ; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: ret @@ -1540,16 +1554,16 @@ define @vadd_vi_nxv32i32( %va, @vadd_vi_nxv32i32_unmasked( %va, i ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-NEXT: bltu a0, a3, .LBB118_2 +; CHECK-NEXT: bltu a0, a3, .LBB119_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: .LBB118_2: +; CHECK-NEXT: .LBB119_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t -; CHECK-NEXT: bltu a0, a1, .LBB118_4 +; CHECK-NEXT: bltu a0, a1, .LBB119_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: .LBB118_4: +; CHECK-NEXT: .LBB119_4: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t @@ -1614,16 +1628,16 @@ define @vadd_vi_nxv32i32_evl_nx8( %va, @vand_vx_nxv32i16( %va, i16 %b, %v } +define @vand_vx_nxv32i16_commute( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vand_vx_nxv32i16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, mu +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vand.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.and.nxv32i16( %vb, %va, %m, i32 %evl) + ret %v +} + define @vand_vx_nxv32i16_unmasked( %va, i16 %b, i32 zeroext %evl) { ; CHECK-LABEL: vand_vx_nxv32i16_unmasked: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 7e0291b6156aa..019a72b02be72 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -42,6 +42,20 @@ define @vfadd_vf_nxv1f16( %va, half %b, < ret %v } +define @vfadd_vf_nxv1f16_commute( %va, half %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv1f16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, half %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.fadd.nxv1f16( %vb, %va, %m, i32 %evl) + ret %v +} + define @vfadd_vf_nxv1f16_unmasked( %va, half %b, i32 zeroext %evl) { ; CHECK-LABEL: vfadd_vf_nxv1f16_unmasked: ; CHECK: # %bb.0: @@ -58,6 +72,22 @@ define @vfadd_vf_nxv1f16_unmasked( %va, h ret %v } +define @vfadd_vf_nxv1f16_unmasked_commute( %va, half %b, i32 zeroext %evl) { +; CHECK-LABEL: vfadd_vf_nxv1f16_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, half %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.fadd.nxv1f16( %vb, %va, %m, i32 %evl) + ret %v +} + declare @llvm.vp.fadd.nxv2f16(, , , i32) define @vfadd_vv_nxv2f16( %va, %b, %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll index bcf43854f94e6..ecf33e3c2ca20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -954,6 +954,20 @@ define @vmul_vx_nxv16i32( %va, i32 %b, %v } +define @vmul_vx_nxv16i32_commute( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_nxv16i32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmul.vv v8, v16, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.mul.nxv16i32( %vb, %va, %m, i32 %evl) + ret %v +} + define @vmul_vx_nxv16i32_unmasked( %va, i32 %b, i32 zeroext %evl) { ; CHECK-LABEL: vmul_vx_nxv16i32_unmasked: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll index 5c08d547204d8..469d0489c5d83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll @@ -1118,6 +1118,20 @@ define @vor_vx_nxv2i32( %va, i32 %b, %v } +define @vor_vx_nxv2i32_commute( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vor_vx_nxv2i32_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.or.nxv2i32( %vb, %va, %m, i32 %evl) + ret %v +} + define @vor_vx_nxv2i32_unmasked( %va, i32 %b, i32 zeroext %evl) { ; CHECK-LABEL: vor_vx_nxv2i32_unmasked: ; CHECK: # %bb.0: @@ -1132,6 +1146,20 @@ define @vor_vx_nxv2i32_unmasked( %va, i32 % ret %v } +define @vor_vx_nxv2i32_unmasked_commute( %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vor_vx_nxv2i32_unmasked_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vor.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %head = insertelement undef, i1 true, i32 0 + %m = shufflevector %head, undef, zeroinitializer + %v = call @llvm.vp.or.nxv2i32( %vb, %va, %m, i32 %evl) + ret %v +} + define @vor_vi_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vor_vi_nxv2i32: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll index 7d7d67d673bd0..975c4e926fbd1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll @@ -870,6 +870,20 @@ define @vxor_vx_nxv1i16( %va, i16 %b, %v } +define @vxor_vx_nxv1i16_commute( %va, i16 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: vxor_vx_nxv1i16_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vxor.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.vp.xor.nxv1i16( %vb, %va, %m, i32 %evl) + ret %v +} + define @vxor_vx_nxv1i16_unmasked( %va, i16 %b, i32 zeroext %evl) { ; CHECK-LABEL: vxor_vx_nxv1i16_unmasked: ; CHECK: # %bb.0: From 81cbbe3e17a4a9b2e0d227b591553d90eeab1400 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Thu, 20 Jan 2022 18:30:09 +0100 Subject: [PATCH 031/946] [flang][NFC] Remove unused/duplicated kStridePosInDim kStridePosInDim is a duplicate of kDimStridePos and is not used. Just remove it. Reviewed By: kiranchandramohan Differential Revision: https://reviews.llvm.org/D117784 --- flang/lib/Optimizer/CodeGen/TypeConverter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.h b/flang/lib/Optimizer/CodeGen/TypeConverter.h index d96b752be1b81..478c2592b10b1 100644 --- a/flang/lib/Optimizer/CodeGen/TypeConverter.h +++ b/flang/lib/Optimizer/CodeGen/TypeConverter.h @@ -31,9 +31,9 @@ static constexpr unsigned kTypePosInBox = 4; static constexpr unsigned kAttributePosInBox = 5; static constexpr unsigned kF18AddendumPosInBox = 6; static constexpr unsigned kDimsPosInBox = 7; -static constexpr unsigned kStridePosInDim = 2; static constexpr unsigned kOptTypePtrPosInBox = 8; static constexpr unsigned kOptRowTypePosInBox = 9; + // Position of the different values in [dims] static constexpr unsigned kDimLowerBoundPos = 0; static constexpr unsigned kDimExtentPos = 1; From 191a6e9dfa1a54b616e12bde2efa849ad8e03f48 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 20 Jan 2022 09:25:45 -0800 Subject: [PATCH 032/946] optimize icmp-ugt-ashr This diff optimizes the sequence icmp-ugt(ashr,C_1) C_2. InstCombine already implements this optimization for sgt, and this patch adds support ugt. This patch adds the check for UGT. @craig.topper came up with the idea and proof: define i1 @src(i8 %x, i8 %y, i8 %c) { %cp1 = add i8 %c, 1 %i = shl i8 %cp1, %y %i.2 = ashr i8 %i, %y %cmp = icmp eq i8 %cp1, %i.2 ;Assume: C + 1 == (((C + 1) << y) >> y) call void @llvm.assume(i1 %cmp) ; uncomment for the sgt case %j = shl i8 %cp1, %y %j.2 = sub i8 %j, 1 %cmp2 = icmp ne i8 %j.2, 127 ;Assume (((c + 1 ) << y) - 1) != 127 call void @llvm.assume(i1 %cmp2) %s = ashr i8 %x, %y %r = icmp sgt i8 %s, %c ret i1 %r } define i1 @tgt(i8 %x, i8 %y, i8 %c) { %cp1 = add i8 %c, 1 %j = shl i8 %cp1, %y %j.2 = sub i8 %j, 1 %r = icmp sgt i8 %x, %j.2 ret i1 %r } declare void @llvm.assume(i1) This change is related to the optimizations in D117252. Differential Revision: https://reviews.llvm.org/D117365 --- .../InstCombine/InstCombineCompares.cpp | 8 +++++++- .../Transforms/InstCombine/icmp-shr-lt-gt.ll | 15 ++++++++++---- llvm/test/Transforms/InstCombine/icmp-shr.ll | 20 ++++--------------- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7dc9ae1d9d064..fd58a44504b3c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2239,7 +2239,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, // those conditions rather than checking them. This is difficult because of // undef/poison (PR34838). if (IsAShr) { - if (Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT || IsExact) { + if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) { // When ShAmtC can be shifted losslessly: // icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC) // icmp slt/ult (ashr X, ShAmtC), C --> icmp slt/ult X, (C << ShAmtC) @@ -2254,6 +2254,12 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, (ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); } + if (Pred == CmpInst::ICMP_UGT) { + // icmp ugt (ashr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1 + APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; + if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } // If the compare constant has significant bits above the lowest sign-bit, // then convert an unsigned cmp to a test of the sign-bit: diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll index a9fb4b1f217d4..82f5a6dad7608 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll @@ -2344,8 +2344,7 @@ define i1 @ashr_ne_noexact(i8 %x) { define i1 @ashr_ugt_noexact(i8 %x) { ; CHECK-LABEL: @ashr_ugt_noexact( -; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 3 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[S]], 10 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 87 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i8 %x, 3 @@ -2356,8 +2355,7 @@ define i1 @ashr_ugt_noexact(i8 %x) { define i1 @ashr_uge_noexact(i8 %x) { ; CHECK-LABEL: @ashr_uge_noexact( -; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 3 -; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[S]], 9 +; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 79 ; CHECK-NEXT: ret i1 [[C]] ; %s = ashr i8 %x, 3 @@ -2451,6 +2449,15 @@ define <4 x i1> @ashr_00_00_vec(<4 x i8> %x) { ret <4 x i1> %c } +define i1 @ashr_sgt_overflow(i8 %x) { +; CHECK-LABEL: @ashr_sgt_overflow( +; CHECK-NEXT: ret i1 false +; + %s = ashr i8 %x, 1 + %c = icmp sgt i8 %s, 63 + ret i1 %c +} + define i1 @lshrult_01_00_exact(i4 %x) { ; CHECK-LABEL: @lshrult_01_00_exact( ; CHECK-NEXT: ret i1 false diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index 09732fea5e38f..168aa81874f68 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -574,12 +574,9 @@ define i1 @ashr_ugt_0(i4 %x) { ret i1 %r } -; negative test - define i1 @ashr_ugt_1(i4 %x) { ; CHECK-LABEL: @ashr_ugt_1( -; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[S]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X:%.*]], 3 ; CHECK-NEXT: ret i1 [[R]] ; %s = ashr i4 %x, 1 @@ -587,12 +584,9 @@ define i1 @ashr_ugt_1(i4 %x) { ret i1 %r } -; negative test - define i1 @ashr_ugt_2(i4 %x) { ; CHECK-LABEL: @ashr_ugt_2( -; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[S]], 2 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X:%.*]], 5 ; CHECK-NEXT: ret i1 [[R]] ; %s = ashr i4 %x, 1 @@ -694,12 +688,9 @@ define i1 @ashr_ugt_11(i4 %x) { ret i1 %r } -; negative test - define i1 @ashr_ugt_12(i4 %x) { ; CHECK-LABEL: @ashr_ugt_12( -; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[S]], -4 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X:%.*]], -7 ; CHECK-NEXT: ret i1 [[R]] ; %s = ashr i4 %x, 1 @@ -707,12 +698,9 @@ define i1 @ashr_ugt_12(i4 %x) { ret i1 %r } -; negative test - define i1 @ashr_ugt_13(i4 %x) { ; CHECK-LABEL: @ashr_ugt_13( -; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[S]], -3 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X:%.*]], -5 ; CHECK-NEXT: ret i1 [[R]] ; %s = ashr i4 %x, 1 From feddf1150227eb43500106cfd6cf01456f2b87e4 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Wed, 19 Jan 2022 18:41:39 -0800 Subject: [PATCH 033/946] [lld][WebAssemlby] Convert test to check disassembly output. NFC Differential Revision: https://reviews.llvm.org/D117739 --- lld/test/wasm/shared.s | 51 ++++++++++++++++++++++++++++++++-------- lld/test/wasm/shared64.s | 51 ++++++++++++++++++++++++++++++++-------- 2 files changed, 82 insertions(+), 20 deletions(-) diff --git a/lld/test/wasm/shared.s b/lld/test/wasm/shared.s index 7861485470ffa..01dc8d51474d9 100644 --- a/lld/test/wasm/shared.s +++ b/lld/test/wasm/shared.s @@ -1,6 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s # RUN: wasm-ld --experimental-pic -shared -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s +# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DIS .functype func_external () -> () @@ -205,16 +206,46 @@ get_local_func_address: # CHECK-NEXT: Functions: [ 3, 2 ] # check the generated code in __wasm_call_ctors and __wasm_apply_data_relocs functions -# TODO(sbc): Disassemble and verify instructions. - -# CHECK: - Type: CODE -# CHECK-NEXT: Functions: -# CHECK-NEXT: - Index: 0 -# CHECK-NEXT: Locals: [] -# CHECK-NEXT: Body: 10010B -# CHECK-NEXT: - Index: 1 -# CHECK-NEXT: Locals: [] -# CHECK-NEXT: Body: 230141046A2304360200230141086A230241016A3602002301410C6A230141006A360200230141106A2305360200230141146A230641046A3602000B + +# DIS: <__wasm_call_ctors>: +# DIS-EMPTY: +# DIS-NEXT: call 1 +# DIS-NEXT: end + +# DIS: <__wasm_apply_data_relocs>: +# DIS-EMPTY: +# DIS-NEXT: global.get 1 +# DIS-NEXT: i32.const 4 +# DIS-NEXT: i32.add +# DIS-NEXT: global.get 4 +# DIS-NEXT: i32.store 0 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i32.const 8 +# DIS-NEXT: i32.add +# DIS-NEXT: global.get 2 +# DIS-NEXT: i32.const 1 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.store 0 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i32.const 12 +# DIS-NEXT: i32.add +# DIS-NEXT: global.get 1 +# DIS-NEXT: i32.const 0 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.store 0 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i32.const 16 +# DIS-NEXT: i32.add +# DIS-NEXT: global.get 5 +# DIS-NEXT: i32.store 0 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i32.const 20 +# DIS-NEXT: i32.add +# DIS-NEXT: global.get 6 +# DIS-NEXT: i32.const 4 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.store 0 +# DIS-NEXT: end # check the data segment initialized with __memory_base global as offset diff --git a/lld/test/wasm/shared64.s b/lld/test/wasm/shared64.s index 86d5a521ab334..080b05213a04a 100644 --- a/lld/test/wasm/shared64.s +++ b/lld/test/wasm/shared64.s @@ -1,6 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown -o %t.o %s # RUN: wasm-ld -mwasm64 --experimental-pic -shared -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s +# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DIS .functype func_external () -> () @@ -212,16 +213,46 @@ get_local_func_address: # CHECK-NEXT: Functions: [ 3, 2 ] # check the generated code in __wasm_call_ctors and __wasm_apply_data_relocs functions -# TODO(sbc): Disassemble and verify instructions. - -# CHECK: - Type: CODE -# CHECK-NEXT: Functions: -# CHECK-NEXT: - Index: 0 -# CHECK-NEXT: Locals: [] -# CHECK-NEXT: Body: 10010B -# CHECK-NEXT: - Index: 1 -# CHECK-NEXT: Locals: [] -# CHECK-NEXT: Body: 230142047C23053702002301420C7C230242017C370200230142147C230141006A360200230142187C2306370200230142207C230741046A3602000B + +# DIS: <__wasm_call_ctors>: +# DIS-EMPTY: +# DIS-NEXT: call 1 +# DIS-NEXT: end + +# DIS: <__wasm_apply_data_relocs>: +# DIS-EMPTY: +# DIS-NEXT: global.get 1 +# DIS-NEXT: i64.const 4 +# DIS-NEXT: i64.add +# DIS-NEXT: global.get 5 +# DIS-NEXT: i64.store 0:p2align=2 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i64.const 12 +# DIS-NEXT: i64.add +# DIS-NEXT: global.get 2 +# DIS-NEXT: i64.const 1 +# DIS-NEXT: i64.add +# DIS-NEXT: i64.store 0:p2align=2 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i64.const 20 +# DIS-NEXT: i64.add +# DIS-NEXT: global.get 1 +# DIS-NEXT: i32.const 0 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.store 0 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i64.const 24 +# DIS-NEXT: i64.add +# DIS-NEXT: global.get 6 +# DIS-NEXT: i64.store 0:p2align=2 +# DIS-NEXT: global.get 1 +# DIS-NEXT: i64.const 32 +# DIS-NEXT: i64.add +# DIS-NEXT: global.get 7 +# DIS-NEXT: i32.const 4 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.store 0 +# DIS-NEXT: end # check the data segment initialized with __memory_base global as offset From 1455eddcf71d09b31aaea77f4b33d23741519bd8 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 20 Jan 2022 20:31:47 +0300 Subject: [PATCH 034/946] [NFC][SimplifyCFG] Add some tests for `invoke` merging --- ...patible-invokes-of-landingpad-debuginfo.ll | 103 ++ .../merge-compatible-invokes-of-landingpad.ll | 1367 +++++++++++++++++ 2 files changed, 1470 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll new file mode 100644 index 0000000000000..1b39e731c94d5 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals +; RUN: opt < %s -debugify -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -sink-common-insts -S | FileCheck %s +; RUN: opt < %s -passes='debugify,simplifycfg' -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; More interesting test, here we can merge the invokes. +define void @t1_mergeable_invoke() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t1_mergeable_invoke( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond(), !dbg [[DBG12:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[C0]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]], !dbg [[DBG13:![0-9]+]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG14:![0-9]+]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable, !dbg [[DBG15:![0-9]+]] +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup, !dbg [[DBG16:![0-9]+]] +; CHECK-NEXT: call void @destructor(), !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: resume { i8*, i32 } [[EH]], !dbg [[DBG18:![0-9]+]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond(), !dbg [[DBG19:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[C1]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]], !dbg [[DBG20:![0-9]+]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG21:![0-9]+]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable, !dbg [[DBG22:![0-9]+]] +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect(), !dbg [[DBG23:![0-9]+]] +; CHECK-NEXT: ret void, !dbg [[DBG24:![0-9]+]] +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +declare i1 @cond() +declare void @sideeffect() +declare void @simple_throw() noreturn +declare void @destructor() + +declare dso_local i32 @__gxx_personality_v0(...) +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn } +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "", directory: "/") +; CHECK: [[META2:![0-9]+]] = !{i32 13} +; CHECK: [[META3:![0-9]+]] = !{i32 2} +; CHECK: [[META4:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; CHECK: [[META5:![0-9]+]] = distinct !DISubprogram(name: "t1_mergeable_invoke", linkageName: "t1_mergeable_invoke", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8) +; CHECK: [[META6:![0-9]+]] = !DISubroutineType(types: !7) +; CHECK: [[META7:![0-9]+]] = !{} +; CHECK: [[META8:![0-9]+]] = !{!9, !11} +; CHECK: [[META9]] = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !10) +; CHECK: [[META10:![0-9]+]] = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_unsigned) +; CHECK: [[META11]] = !DILocalVariable(name: "2", scope: !5, file: !1, line: 8, type: !10) +; CHECK: [[DBG12]] = !DILocation(line: 1, column: 1, scope: !5) +; CHECK: [[DBG13]] = !DILocation(line: 2, column: 1, scope: !5) +; CHECK: [[DBG14]] = !DILocation(line: 3, column: 1, scope: !5) +; CHECK: [[DBG15]] = !DILocation(line: 4, column: 1, scope: !5) +; CHECK: [[DBG16]] = !DILocation(line: 5, column: 1, scope: !5) +; CHECK: [[DBG17]] = !DILocation(line: 6, column: 1, scope: !5) +; CHECK: [[DBG18]] = !DILocation(line: 7, column: 1, scope: !5) +; CHECK: [[DBG19]] = !DILocation(line: 8, column: 1, scope: !5) +; CHECK: [[DBG20]] = !DILocation(line: 9, column: 1, scope: !5) +; CHECK: [[DBG21]] = !DILocation(line: 10, column: 1, scope: !5) +; CHECK: [[DBG22]] = !DILocation(line: 11, column: 1, scope: !5) +; CHECK: [[DBG23]] = !DILocation(line: 12, column: 1, scope: !5) +; CHECK: [[DBG24]] = !DILocation(line: 13, column: 1, scope: !5) +;. diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll new file mode 100644 index 0000000000000..a9b93193f7d87 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll @@ -0,0 +1,1367 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals +; RUN: opt < %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -sink-common-insts -S | FileCheck %s +; RUN: opt < %s -passes='simplifycfg' -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Simple test, nothing interesting happens here. +define void @t0_noop() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t0_noop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c = call i1 @cond() + br i1 %c, label %if.then, label %if.end + +if.then: + invoke void @simple_throw() to label %invoke.cont unwind label %lpad + +invoke.cont: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.end: + call void @sideeffect() + ret void +} + +; More interesting test, here we can merge the invokes. +define void @t1_mergeable_invoke() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t1_mergeable_invoke( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; 'unreachable' block is shared, but it is unreachable, so we are fine. +define void @t2_shared_normal_dest() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t2_shared_normal_dest( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD]] +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont unwind label %lpad + +invoke.cont: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont unwind label %lpad + +if.end: + call void @sideeffect() + ret void +} + +; shared normal destination is not unreachable. +define void @t3_bad_shared_normal_dest() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t3_bad_shared_normal_dest( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD]] +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont unwind label %lpad + +invoke.cont: + call void @sideeffect() + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont unwind label %lpad + +if.end: + call void @sideeffect() + ret void +} + +; normal destinations are not unreachable. +define void @t4_normal_dests() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t4_normal_dests( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: call void @another_sideeffect() +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + call void @sideeffect() + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + call void @another_sideeffect() + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; Invokes lead to different landing pads. +define void @t5_different_landingpads() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t5_different_landingpads( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD0:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: common.resume: +; CHECK-NEXT: [[COMMON_RESUME_OP:%.*]] = phi { i8*, i32 } [ [[EH0:%.*]], [[LPAD0]] ], [ [[EH1:%.*]], [[LPAD1:%.*]] ] +; CHECK-NEXT: resume { i8*, i32 } [[COMMON_RESUME_OP]] +; CHECK: lpad0: +; CHECK-NEXT: [[EH0]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: br label [[COMMON_RESUME:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: lpad1: +; CHECK-NEXT: [[EH1]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @another_destructor() +; CHECK-NEXT: br label [[COMMON_RESUME]] +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad0 + +invoke.cont0: + unreachable + +lpad0: + %eh0 = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh0 + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: + unreachable + +lpad1: + %eh1 = landingpad { i8*, i32 } cleanup + call void @another_destructor() + resume { i8*, i32 } %eh1 + +if.end: + call void @sideeffect() + ret void +} + +; The invoked functions are different +define void @t6_different_invokes() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t6_different_invokes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @another_simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @another_simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; Merging of this invoke is disallowed +define void @t7_nomerge0() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t7_nomerge0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() #[[ATTR1:[0-9]+]] +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() nomerge to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} +define void @t8_nomerge1() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t8_nomerge1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() #[[ATTR1]] +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() nomerge to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} +define void @t9_nomerge2() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t9_nomerge2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() #[[ATTR1]] +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() #[[ATTR1]] +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() nomerge to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() nomerge to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; Just don't deal with inlineasm. +define void @t10_inlineasm() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t10_inlineasm( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void asm sideeffect "something bad", ""() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void asm sideeffect "something bad", ""() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void asm sideeffect "something bad", ""() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void asm sideeffect "something bad", ""() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; More interesting test, here we can merge the invokes. +define void @t11_phi_in_landingpad() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t11_phi_in_landingpad( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[IF_THEN0]] ], [ 1, [[IF_THEN1:%.*]] ] +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @consume(i32 [[PHI]]) +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %phi = phi i32 [ 0, %if.then0 ], [ 1, %if.then1 ] + %eh = landingpad { i8*, i32 } cleanup + call void @consume(i32 %phi) + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; It is okay for the invoke to take arguments +define void @t12_arguments_are_fine() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t12_arguments_are_fine( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw_taking_argument(i32 42) +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw_taking_argument(i32 42) +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw_taking_argument(i32 42) to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw_taking_argument(i32 42) to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; It is okay for the invoke to take different arguments +define void @t13_different_arguments_are_fine() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t13_different_arguments_are_fine( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw_taking_argument(i32 0) +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw_taking_argument(i32 42) +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw_taking_argument(i32 0) to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw_taking_argument(i32 42) to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; There can be more than two invokes in a set +define void @t14_three_invokes_only_two_compatible() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t14_three_invokes_only_two_compatible( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE0:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else0: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.else1: +; CHECK-NEXT: [[C2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C2]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] +; CHECK: if.then2: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont3: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else0 + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else0: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.else1 + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.else1: + %c2 = call i1 @cond() + br i1 %c2, label %if.then2, label %if.end + +if.then2: + invoke void @simple_throw() to label %invoke.cont3 unwind label %lpad + +invoke.cont3: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; If not all invokes of landingpad are compatible then we still merge compatible ones. +define void @t15_three_invokes_only_two_compatible() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t15_three_invokes_only_two_compatible( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE0:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else0: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.else1: +; CHECK-NEXT: [[C2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C2]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]] +; CHECK: if.then2: +; CHECK-NEXT: invoke void @another_simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont3: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else0 + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else0: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.else1 + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.else1: + %c2 = call i1 @cond() + br i1 %c2, label %if.then2, label %if.end + +if.then2: + invoke void @another_simple_throw() to label %invoke.cont3 unwind label %lpad + +invoke.cont3: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; We succeed in merging invokes into two sets +define void @t16_four_invokes_forming_two_sets() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t16_four_invokes_forming_two_sets( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE0:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else0: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.else1: +; CHECK-NEXT: [[C2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C2]], label [[IF_THEN2:%.*]], label [[IF_ELSE2:%.*]] +; CHECK: if.then2: +; CHECK-NEXT: invoke void @another_simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont3: +; CHECK-NEXT: unreachable +; CHECK: if.else2: +; CHECK-NEXT: [[C3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C3]], label [[IF_THEN3:%.*]], label [[IF_END:%.*]] +; CHECK: if.then3: +; CHECK-NEXT: invoke void @another_simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT4:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont4: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else0 + +if.then0: + invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else0: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.else1 + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.else1: + %c2 = call i1 @cond() + br i1 %c2, label %if.then2, label %if.else2 + +if.then2: + invoke void @another_simple_throw() to label %invoke.cont3 unwind label %lpad + +invoke.cont3: + unreachable + +if.else2: + %c3 = call i1 @cond() + br i1 %c3, label %if.then3, label %if.end + +if.then3: + invoke void @another_simple_throw() to label %invoke.cont4 unwind label %lpad + +invoke.cont4: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; Attributes must match +define void @t17_mismatched_attrs_prevent_merge() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t17_mismatched_attrs_prevent_merge( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() #[[ATTR2:[0-9]+]] +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() readnone to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; Common attributes are preserved +define void @t18_attributes_are_preserved() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t18_attributes_are_preserved( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() #[[ATTR2]] +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() #[[ATTR2]] +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() readnone to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() readnone to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; Fully identical operand bundles are good. +define void @t19_compatible_operand_bundle() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t19_compatible_operand_bundle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() [ "abc"(i32 42) ] +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() [ "abc"(i32 42) ] +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; Operand bundles must be compatible, else we can't merge. +define void @t20_incompatible_operand_bundle() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t20_incompatible_operand_bundle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() [ "abc"(i32 42) ] +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() [ "def"(i32 0) ] +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() [ "def"(i32 0) ] to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +; We need to PHI together the arguments of the operand bundles. +define void @t21_semicompatible_operand_bundle() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: @t21_semicompatible_operand_bundle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then0: +; CHECK-NEXT: invoke void @simple_throw() [ "abc"(i32 42) ] +; CHECK-NEXT: to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]] +; CHECK: invoke.cont0: +; CHECK-NEXT: unreachable +; CHECK: lpad: +; CHECK-NEXT: [[EH:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @destructor() +; CHECK-NEXT: resume { i8*, i32 } [[EH]] +; CHECK: if.else: +; CHECK-NEXT: [[C1:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: invoke void @simple_throw() [ "abc"(i32 0) ] +; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]] +; CHECK: invoke.cont2: +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: call void @sideeffect() +; CHECK-NEXT: ret void +; +entry: + %c0 = call i1 @cond() + br i1 %c0, label %if.then0, label %if.else + +if.then0: + invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont0 unwind label %lpad + +invoke.cont0: + unreachable + +lpad: + %eh = landingpad { i8*, i32 } cleanup + call void @destructor() + resume { i8*, i32 } %eh + +if.else: + %c1 = call i1 @cond() + br i1 %c1, label %if.then1, label %if.end + +if.then1: + invoke void @simple_throw() [ "abc"(i32 0) ] to label %invoke.cont2 unwind label %lpad + +invoke.cont2: + unreachable + +if.end: + call void @sideeffect() + ret void +} + +declare i1 @cond() + +declare void @sideeffect() +declare void @another_sideeffect() + +declare void @simple_throw() noreturn +declare void @another_simple_throw() noreturn + +declare void @simple_throw_taking_argument(i32) noreturn + +declare void @destructor() +declare void @another_destructor() + +declare void @consume(i32) + +declare dso_local i32 @__gxx_personality_v0(...) +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } +; CHECK: attributes #[[ATTR1]] = { nomerge } +; CHECK: attributes #[[ATTR2]] = { readnone } +;. From 48224475222d14e6f661d649f4363dcc197cc7ef Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 20 Jan 2022 13:53:59 +0100 Subject: [PATCH 035/946] [libc++] basic_string::resize_and_overwrite: Adopt LWG3645 (Not voted in yet) Adopt LWG3645, which fixes the value categories of basic_string::resize_and_overwrite https://timsong-cpp.github.io/lwg-issues/3645 Reviewed By: ldionne, #libc Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D116815 --- libcxx/docs/Status/Cxx2bIssues.csv | 2 ++ libcxx/include/string | 3 +-- .../basic.string/string.capacity/resize_and_overwrite.pass.cpp | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv index 1a8f6a4792c18..726668f3bdb2a 100644 --- a/libcxx/docs/Status/Cxx2bIssues.csv +++ b/libcxx/docs/Status/Cxx2bIssues.csv @@ -137,3 +137,5 @@ `3593 `__,"Several iterators' ``base() const &`` and ``lazy_split_view::outer-iterator::value_type::end()`` missing ``noexcept``","October 2021","","","|ranges|" `3595 `__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","","","|ranges|" "","","","","" +`3645 `__,"``resize_and_overwrite`` is overspecified to call its callback with lvalues", "Not voted in","|Complete|","14.0","" +"","","","","" \ No newline at end of file diff --git a/libcxx/include/string b/libcxx/include/string index c4f2b008cafe1..b2eef646f9827 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -979,8 +979,7 @@ public: _LIBCPP_HIDE_FROM_ABI constexpr void resize_and_overwrite(size_type __n, _Op __op) { __resize_default_init(__n); - pointer __data = data(); - __erase_to_end(_VSTD::move(__op)(__data, __n)); + __erase_to_end(_VSTD::move(__op)(data(), _LIBCPP_AUTO_CAST(__n))); } #endif diff --git a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp index 3f97537dd2df0..61312fa5ec49a 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp @@ -76,7 +76,8 @@ constexpr bool test() { void test_value_categories() { std::string s; - s.resize_and_overwrite(10, [](char*&, size_t&) { return 0; }); + s.resize_and_overwrite(10, [](char*&&, size_t&&) { return 0; }); + s.resize_and_overwrite(10, [](char* const&, const size_t&) { return 0; }); struct RefQualified { int operator()(char*, size_t) && { return 0; } }; From 2e49e0cfde43fa96e50ee89573ec6ff8a31c7c24 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jan 2022 11:06:25 -0500 Subject: [PATCH 036/946] AMDGPU/GlobalISel: Directly diagnose return value use for FP atomics Emit an error if the return value is used on subtargets that do not support them. Previously we were falling back to the DAG on selection failure, where it would emit this error and then fail again. --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 18 ++++++++++++++++-- ...m.amdgcn.raw.buffer.atomic.fadd-with-ret.ll | 3 ++- ...mdgcn.struct.buffer.atomic.fadd-with-ret.ll | 6 ++++-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 93bddd9681ed7..20b2b0f1be0ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5181,8 +5181,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_struct_buffer_atomic_inc: case Intrinsic::amdgcn_raw_buffer_atomic_dec: case Intrinsic::amdgcn_struct_buffer_atomic_dec: - case Intrinsic::amdgcn_raw_buffer_atomic_fadd: - case Intrinsic::amdgcn_struct_buffer_atomic_fadd: case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: case Intrinsic::amdgcn_raw_buffer_atomic_fmin: @@ -5190,6 +5188,22 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_raw_buffer_atomic_fmax: case Intrinsic::amdgcn_struct_buffer_atomic_fmax: return legalizeBufferAtomic(MI, B, IntrID); + case Intrinsic::amdgcn_raw_buffer_atomic_fadd: + case Intrinsic::amdgcn_struct_buffer_atomic_fadd: { + Register DstReg = MI.getOperand(0).getReg(); + if (!MRI.use_empty(DstReg) && !ST.hasGFX90AInsts()) { + Function &F = B.getMF().getFunction(); + DiagnosticInfoUnsupported NoFpRet( + F, "return versions of fp atomics not supported", B.getDebugLoc(), + DS_Error); + F.getContext().diagnose(NoFpRet); + B.buildUndef(DstReg); + MI.eraseFromParent(); + return true; + } + + return legalizeBufferAtomic(MI, B, IntrID); + } case Intrinsic::amdgcn_atomic_inc: return legalizeAtomicIncDec(MI, B, true); case Intrinsic::amdgcn_atomic_dec: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll index 257785fc6bffa..8a0ea3baa68ab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll @@ -1,9 +1,10 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s -; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 +; RUN: not llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) +; GFX908: error: {{.*}} return versions of fp atomics not supported ; GFX908: error: {{.*}} return versions of fp atomics not supported ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll index 3a31fad29010a..74f9704452020 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll @@ -1,10 +1,12 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s -; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 +; RUN: not llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908 + +; GFX908: error: {{.*}} return versions of fp atomics not supported +; GFX908: error: {{.*}} return versions of fp atomics not supported declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) -; GFX908: error: {{.*}} return versions of fp atomics not supported ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn: ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{[0-9]+}} idxen offen glc From c0f9592daae3be17e5fd1528a8f6067cb0c3bd91 Mon Sep 17 00:00:00 2001 From: zijunzhao Date: Thu, 20 Jan 2022 09:30:51 +0000 Subject: [PATCH 037/946] add tsan shared library Add tsan shared library on Android. Only build tsan when minSdkVersion is above 23. Reviewed By: danalbert, vitalybuka Differential Revision: https://reviews.llvm.org/D108394 --- compiler-rt/cmake/config-ix.cmake | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index d196deff5dc19..33693ce60321d 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -749,9 +749,14 @@ else() set(COMPILER_RT_HAS_PROFILE FALSE) endif() -if (COMPILER_RT_HAS_SANITIZER_COMMON AND TSAN_SUPPORTED_ARCH AND - OS_NAME MATCHES "Darwin|Linux|FreeBSD|Android|NetBSD") - set(COMPILER_RT_HAS_TSAN TRUE) +if (COMPILER_RT_HAS_SANITIZER_COMMON AND TSAN_SUPPORTED_ARCH) + if (OS_NAME MATCHES "Linux|Darwin|FreeBSD|NetBSD") + set(COMPILER_RT_HAS_TSAN TRUE) + elseif (OS_NAME MATCHES "Android" AND ANDROID_PLATFORM_LEVEL GREATER 23) + set(COMPILER_RT_HAS_TSAN TRUE) + else() + set(COMPILER_RT_HAS_TSAN FALSE) + endif() else() set(COMPILER_RT_HAS_TSAN FALSE) endif() From 08549ba51e11600ff89606d3de59d91d95c21645 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jan 2022 16:46:49 -0500 Subject: [PATCH 038/946] AMDGPU/GlobalISel: Explicitly set -global-isel-abort in failure tests If the default mode is the fallback, this would fail since it would end up seeing the DAG failure message instead. --- llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll | 2 +- llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll index 7e75fb037323a..a216a8b509e87 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll @@ -1,4 +1,4 @@ -; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s +; RUN: not --crash llc -global-isel -global-isel-abort=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s ; FIXME: Should produce context error for each one ; ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p5) = G_GLOBAL_VALUE @external_private (in function: fn_external_private) diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll index d1ab4cb03e3f7..42b81236e55ef 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll @@ -1,8 +1,8 @@ ; RUN: not --crash llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=SDAG-ERR %s -; RUN: not --crash llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s +; RUN: not --crash llc -global-isel=1 -global-isel-abort=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s ; Make sure this doesn't assert on targets without the r128-16 -; feature, and instead generates a slection error. +; feature, and instead generates a selection error. ; SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.load.1d ; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(<8 x s32>), 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") (in function: load_1d) From 2d1f9aa27dc443287ea308e340b0ec3c284e14ba Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jan 2022 16:14:56 -0500 Subject: [PATCH 039/946] AMDGPU/GlobalISel: Regenerate test checks with -NEXT --- .../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 446 +++++++++--------- 1 file changed, 223 insertions(+), 223 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir index a7f875fcdd428..d0ee43b20b2d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -12,9 +12,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_sgpr ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(p3) = G_PTRMASK %0, %1 @@ -33,9 +33,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xf0f0f0f0 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -252645136 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -252645136 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -252645136 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -54,9 +54,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xffffffff ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -1 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -75,9 +75,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0x00000000 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -96,9 +96,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi1 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -2147483648 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2147483648 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2147483648 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -117,9 +117,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi2 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -1073741824 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1073741824 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -1073741824 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -138,9 +138,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo1 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -2 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -2 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -159,9 +159,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo2 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -4 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -4 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -4 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -180,9 +180,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo3 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -8 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -8 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -8 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -201,9 +201,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo4 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -16 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -16 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -16 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -222,9 +222,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo29 ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; CHECK: %const:sreg_32 = S_MOV_B32 -536870912 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc - ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]] + ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -536870912 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -536870912 %1:sgpr(p3) = G_PTRMASK %0, %const @@ -243,15 +243,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(p0) = G_PTRMASK %0, %1 @@ -270,10 +270,10 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xffffffffffffffff ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1 %2:sgpr(p0) = G_PTRMASK %0, %1 @@ -292,15 +292,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 0 %2:sgpr(p0) = G_PTRMASK %0, %1 @@ -319,17 +319,17 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xf0f0f0f0f0f0f0f0 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160 - ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136 - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 %2:sgpr(p0) = G_PTRMASK %0, %1 @@ -348,17 +348,17 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1 - ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1 + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -377,15 +377,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi32 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -4294967296 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -404,17 +404,17 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clear_32 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1 - ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1 + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 4294967296 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -433,13 +433,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo1 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: %const:sreg_64 = S_MOV_B64 -2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -2 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -458,13 +458,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo2 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: %const:sreg_64 = S_MOV_B64 -4 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -4 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -483,13 +483,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo3 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: %const:sreg_64 = S_MOV_B64 -8 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -8 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -508,13 +508,13 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo4 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: %const:sreg_64 = S_MOV_B64 -16 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -16 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -533,15 +533,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo29 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3758096384 - ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3758096384 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -536870912 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -560,9 +560,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_0xf0f0f0f0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -252645136 %1:vgpr(p3) = G_PTRMASK %0, %const @@ -581,9 +581,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo1 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -2 %1:vgpr(p3) = G_PTRMASK %0, %const @@ -602,9 +602,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo2 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -4 %1:vgpr(p3) = G_PTRMASK %0, %const @@ -623,9 +623,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo3 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -8 %1:vgpr(p3) = G_PTRMASK %0, %const @@ -644,9 +644,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo4 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -16 %1:vgpr(p3) = G_PTRMASK %0, %const @@ -665,9 +665,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo29 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec - ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] + ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]] %0:vgpr(p3) = COPY $vgpr0 %const:vgpr(s32) = G_CONSTANT i32 -536870912 %1:vgpr(p3) = G_PTRMASK %0, %const @@ -686,15 +686,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec - ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 - ; CHECK: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY5]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1 + ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY5]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(p0) = G_PTRMASK %0, %1 @@ -713,17 +713,17 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr_0xf0f0f0f0f0f0f0f0 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec - ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec - ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec + ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_CONSTANT i64 -1085102592571150096 %2:vgpr(p0) = G_PTRMASK %0, %1 @@ -742,15 +742,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo1 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec - ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec + ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -2 %1:vgpr(p0) = G_PTRMASK %0, %const @@ -769,15 +769,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo2 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec + ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -4 %1:vgpr(p0) = G_PTRMASK %0, %const @@ -796,15 +796,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo3 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec - ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec + ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -4 %1:vgpr(p0) = G_PTRMASK %0, %const @@ -823,15 +823,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo4 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec - ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec + ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -16 %1:vgpr(p0) = G_PTRMASK %0, %const @@ -850,15 +850,15 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo29 ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec - ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 - ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec - ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec + ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0 + ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %const:vgpr(s64) = G_CONSTANT i64 -536870912 %1:vgpr(p0) = G_PTRMASK %0, %const @@ -877,9 +877,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p3_vgpr_sgpr_clearlo2 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: %const:sgpr(s32) = G_CONSTANT i32 -4 - ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p3) = G_PTRMASK [[COPY]], %const(s32) - ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p3) + ; CHECK-NEXT: %const:sgpr(s32) = G_CONSTANT i32 -4 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p3) = G_PTRMASK [[COPY]], %const(s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p3) %0:sgpr(p3) = COPY $sgpr0 %const:sgpr(s32) = G_CONSTANT i32 -4 %1:vgpr(p3) = G_PTRMASK %0, %const @@ -898,9 +898,9 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_sgpr_clearlo2 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 - ; CHECK: %const:sgpr(s32) = G_CONSTANT i32 -4 - ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], %const(s32) - ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0) + ; CHECK-NEXT: %const:sgpr(s32) = G_CONSTANT i32 -4 + ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], %const(s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p0) %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s32) = G_CONSTANT i32 -4 %1:vgpr(p0) = G_PTRMASK %0, %const From 064cea9c9a02e8fee77f9e28b3e06cb4908f7a0e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jan 2022 15:20:39 -0500 Subject: [PATCH 040/946] AMDGPU/GlobalISel: Try to use s_and_b64 in ptrmask selection Avoids a test diff with SDAG. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 32 ++++++++---- .../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 50 ++++--------------- 2 files changed, 32 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index d40d5873eacf8..6ab2cb6df3bda 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2552,6 +2552,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { Register MaskReg = I.getOperand(2).getReg(); LLT Ty = MRI->getType(DstReg); LLT MaskTy = MRI->getType(MaskReg); + MachineBasicBlock *BB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); @@ -2560,6 +2562,24 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { if (DstRB != SrcRB) // Should only happen for hand written MIR. return false; + // Try to avoid emitting a bit operation when we only need to touch half of + // the 64-bit pointer. + APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64); + const APInt MaskHi32 = APInt::getHighBitsSet(64, 32); + const APInt MaskLo32 = APInt::getLowBitsSet(64, 32); + + const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32; + const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32; + + if (!IsVGPR && Ty.getSizeInBits() == 64 && + !CanCopyLow32 && !CanCopyHi32) { + auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg) + .addReg(SrcReg) + .addReg(MaskReg); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + } + unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32; const TargetRegisterClass &RegRC = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; @@ -2576,8 +2596,6 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI)) return false; - MachineBasicBlock *BB = I.getParent(); - const DebugLoc &DL = I.getDebugLoc(); if (Ty.getSizeInBits() == 32) { assert(MaskTy.getSizeInBits() == 32 && "ptrmask should have been narrowed during legalize"); @@ -2600,13 +2618,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { Register MaskedLo, MaskedHi; - // Try to avoid emitting a bit operation when we only need to touch half of - // the 64-bit pointer. - APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64); - - const APInt MaskHi32 = APInt::getHighBitsSet(64, 32); - const APInt MaskLo32 = APInt::getLowBitsSet(64, 32); - if ((MaskOnes & MaskLo32) == MaskLo32) { + if (CanCopyLow32) { // If all the bits in the low half are 1, we only need a copy for it. MaskedLo = LoReg; } else { @@ -2621,7 +2633,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { .addReg(MaskLo); } - if ((MaskOnes & MaskHi32) == MaskHi32) { + if (CanCopyHi32) { // If all the bits in the high half are 1, we only need a copy for it. MaskedHi = HiReg; } else { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir index d0ee43b20b2d9..4861a891e059f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir @@ -244,14 +244,8 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1 - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = COPY $sgpr2_sgpr3 %2:sgpr(p0) = G_PTRMASK %0, %1 @@ -293,14 +287,8 @@ body: | ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1 - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 0 %2:sgpr(p0) = G_PTRMASK %0, %1 @@ -322,14 +310,8 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]] + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096 %2:sgpr(p0) = G_PTRMASK %0, %1 @@ -351,14 +333,8 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1 - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808 %1:sgpr(p0) = G_PTRMASK %0, %const @@ -407,14 +383,8 @@ body: | ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0 - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1 - ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]] %0:sgpr(p0) = COPY $sgpr0_sgpr1 %const:sgpr(s64) = G_CONSTANT i64 4294967296 %1:sgpr(p0) = G_PTRMASK %0, %const From 237502c1a478a68ee4a0e173efc2d4684e58d0bb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Jan 2022 17:39:27 -0500 Subject: [PATCH 041/946] AMDGPU: Fix asm in test using wrong IR type for physical register --- llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll index c4c887b1906a0..91d2ec82c81e7 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll @@ -55,14 +55,14 @@ define amdgpu_kernel void @test() #1 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b64 exec, s[6:7] ; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use s[0:3] +; GFX10-NEXT: ; use s[0:4] ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "={s[0:7]}" () #0 - %wide.sgpr2 = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" () #0 + %wide.sgpr2 = call <5 x i32> asm sideeffect "; def $0", "={s[8:12]}" () #0 call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0 - call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr2) #0 + call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr2) #0 ret void } From 5af2433e1794ebf7e58e848aa612c7912d71dc78 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Thu, 20 Jan 2022 11:04:46 -0500 Subject: [PATCH 042/946] [clang-cl] Support the /HOTPATCH flag This patch adds support for the MSVC /HOTPATCH flag: https://docs.microsoft.com/sv-se/cpp/build/reference/hotpatch-create-hotpatchable-image?view=msvc-170&viewFallbackFrom=vs-2019 The flag is translated to a new -fms-hotpatch flag, which in turn adds a 'patchable-function' attribute for each function in the TU. This is then picked up by the PatchableFunction pass which would generate a TargetOpcode::PATCHABLE_OP of minsize = 2 (which means the target instruction must resolve to at least two bytes). TargetOpcode::PATCHABLE_OP is only implemented for x86/x64. When targetting ARM/ARM64, /HOTPATCH isn't required (instructions are always 2/4 bytes and suitable for hotpatching). Additionally, when using /Z7, we generate a 'hot patchable' flag in the CodeView debug stream, in the S_COMPILE3 record. This flag is then picked up by LLD (or link.exe) and is used in conjunction with the linker /FUNCTIONPADMIN flag to generate extra space before each function, to accommodate for live patching long jumps. Please see: https://github.com/llvm/llvm-project/blob/d703b922961e0d02a5effdd4bfbb23ad50a3cc9f/lld/COFF/Writer.cpp#L1298 The outcome is that we can finally use Live++ or Recode along with clang-cl. NOTE: It seems that MSVC cl.exe always enables /HOTPATCH on x64 by default, although if we did the same I thought we might generate sub-optimal code (if this flag was active by default). Additionally, MSVC always generates a .debug$S section and a S_COMPILE3 record, which Clang doesn't do without /Z7. Therefore, the following MSVC command-line "cl /c file.cpp" would have to be written with Clang such as "clang-cl /c file.cpp /HOTPATCH /Z7" in order to obtain the same result. Depends on D43002, D80833 and D81301 for the full feature. Differential Revision: https://reviews.llvm.org/D116511 --- clang/docs/ReleaseNotes.rst | 10 +++++++ clang/include/clang/Basic/CodeGenOptions.def | 3 +++ clang/include/clang/Driver/Options.td | 6 ++++- clang/lib/CodeGen/BackendUtil.cpp | 1 + clang/lib/CodeGen/CodeGenFunction.cpp | 7 +++++ clang/lib/Driver/ToolChains/Clang.cpp | 2 ++ clang/lib/Driver/ToolChains/MSVC.cpp | 5 ++++ clang/test/CodeGen/patchable-function-entry.c | 5 ++++ .../CodeGenCXX/debug-info-hotpatch-arm.cpp | 26 +++++++++++++++++++ clang/test/CodeGenCXX/debug-info-hotpatch.cpp | 20 ++++++++++++++ clang/test/Driver/cl-options.c | 4 ++- llvm/include/llvm/Target/TargetOptions.h | 9 ++++--- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 6 +++++ .../COFF/ARMNT/arm-register-variables.ll | 3 ++- llvm/test/MC/AArch64/coff-debug.ll | 3 ++- 15 files changed, 103 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp create mode 100644 clang/test/CodeGenCXX/debug-info-hotpatch.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index de395b5d035ec..08e4d75299d2b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -136,6 +136,16 @@ Windows Support or pass ``/permissive`` to disable C++ operator names altogether. See `PR42427 ` for more info. +- Add support for MSVC-compatible ``/hotpatch`` flag in clang-cl, and equivalent + -cc1 flag ``-fms-hotpatch``. Along with the linker flag ``/functionpadmin`` + this creates executable images suitable for runtime code patching. This flag + is only required for x86/x64 targets; ARM/ARM64 simply needs the linker + ``/functionpadmin``. + + With this addition, clang-cl can be used in live code patching scenarios, + along with tools such as Live++ or Recode. Microsoft Edit and Continue isn't + currently supported. + C Language Changes in Clang --------------------------- diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index d4742cddd00c9..3526b8a4a9044 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -139,6 +139,9 @@ VALUE_CODEGENOPT(XRaySelectedFunctionGroup, 32, 0) VALUE_CODEGENOPT(PatchableFunctionEntryCount , 32, 0) ///< Number of NOPs at function entry VALUE_CODEGENOPT(PatchableFunctionEntryOffset , 32, 0) +CODEGENOPT(HotPatch, 1, 0) ///< Supports the Microsoft /HOTPATCH flag and + ///< generates a 'patchable-function' attribute. + CODEGENOPT(InstrumentForProfiling , 1, 0) ///< Set when -pg is enabled. CODEGENOPT(CallFEntry , 1, 0) ///< Set when -mfentry is enabled. CODEGENOPT(MNopMCount , 1, 0) ///< Set when -mnop-mcount is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1a9fd61328f83..b66363b1d3e92 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2498,6 +2498,9 @@ defm pascal_strings : BoolFOption<"pascal-strings", def fpatchable_function_entry_EQ : Joined<["-"], "fpatchable-function-entry=">, Group, Flags<[CC1Option]>, MetaVarName<"">, HelpText<"Generate M NOPs before function entry and N-M NOPs after function entry">, MarshallingInfoInt>; +def fms_hotpatch : Flag<["-"], "fms-hotpatch">, Group, Flags<[CC1Option, CoreOption]>, + HelpText<"Ensure that all functions can be hotpatched at runtime">, + MarshallingInfoFlag>; def fpcc_struct_return : Flag<["-"], "fpcc-struct-return">, Group, Flags<[CC1Option]>, HelpText<"Override the default ABI to return all structs on the stack">; def fpch_preprocess : Flag<["-"], "fpch-preprocess">, Group; @@ -6124,6 +6127,8 @@ def _SLASH_Gw_ : CLFlag<"Gw-">, def _SLASH_help : CLFlag<"help">, Alias, HelpText<"Display available options">; def _SLASH_HELP : CLFlag<"HELP">, Alias; +def _SLASH_hotpatch : CLFlag<"hotpatch">, Alias, + HelpText<"Create hotpatchable image">; def _SLASH_I : CLJoinedOrSeparate<"I">, HelpText<"Add directory to include search path">, MetaVarName<"">, Alias; @@ -6480,7 +6485,6 @@ def _SLASH_headerUnit : CLJoinedOrSeparate<"headerUnit">; def _SLASH_headerUnitAngle : CLJoinedOrSeparate<"headerUnit:angle">; def _SLASH_headerUnitQuote : CLJoinedOrSeparate<"headerUnit:quote">; def _SLASH_homeparams : CLFlag<"homeparams">; -def _SLASH_hotpatch : CLFlag<"hotpatch">; def _SLASH_kernel : CLFlag<"kernel">; def _SLASH_LN : CLFlag<"LN">; def _SLASH_MP : CLJoined<"MP">; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 67fee7f35ca17..6b8e052305b49 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -645,6 +645,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs; Options.DebugStrictDwarf = CodeGenOpts.DebugStrictDwarf; Options.ObjectFilenameForDebug = CodeGenOpts.ObjectFilenameForDebug; + Options.Hotpatch = CodeGenOpts.HotPatch; return true; } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 54ddbff3fb038..50e1638924d1a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -882,6 +882,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (Offset) Fn->addFnAttr("patchable-function-prefix", std::to_string(Offset)); } + // Instruct that functions for COFF/CodeView targets should start with a + // patchable instruction, but only on x86/x64. Don't forward this to ARM/ARM64 + // backends as they don't need it -- instructions on these architectures are + // always atomically patchable at runtime. + if (CGM.getCodeGenOpts().HotPatch && + getContext().getTargetInfo().getTriple().isX86()) + Fn->addFnAttr("patchable-function", "prologue-short-redirect"); // Add no-jump-tables value. if (CGM.getCodeGenOpts().NoUseJumpTables) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c48c6fd59bec3..a22f03a488486 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6001,6 +6001,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } } + Args.AddLastArg(CmdArgs, options::OPT_fms_hotpatch); + if (TC.SupportsProfiling()) { Args.AddLastArg(CmdArgs, options::OPT_pg); diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 4e15c3ab51cef..7e8636adc2728 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -511,6 +511,11 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7)) CmdArgs.push_back("-debug"); + // If we specify /hotpatch, let the linker add padding in front of each + // function, like MSVC does. + if (Args.hasArg(options::OPT_fms_hotpatch, options::OPT__SLASH_hotpatch)) + CmdArgs.push_back("-functionpadmin"); + // Pass on /Brepro if it was passed to the compiler. // Note that /Brepro maps to -mno-incremental-linker-compatible. bool DefaultIncrementalLinkerCompatible = diff --git a/clang/test/CodeGen/patchable-function-entry.c b/clang/test/CodeGen/patchable-function-entry.c index 3065eb2efa551..6e8d0d743cf45 100644 --- a/clang/test/CodeGen/patchable-function-entry.c +++ b/clang/test/CodeGen/patchable-function-entry.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -triple aarch64 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -triple x86_64 -emit-llvm %s -fpatchable-function-entry=1 -o - | FileCheck --check-prefixes=CHECK,OPT %s +// RUN: %clang_cc1 -triple x86_64 -emit-llvm %s -fms-hotpatch -o - | FileCheck --check-prefixes=HOTPATCH %s // CHECK: define{{.*}} void @f0() #0 __attribute__((patchable_function_entry(0))) void f0() {} @@ -34,3 +35,7 @@ void f() {} // CHECK: attributes #2 = { {{.*}} "patchable-function-entry"="0" "patchable-function-prefix"="4" // CHECK: attributes #3 = { {{.*}} "patchable-function-entry"="3" "patchable-function-prefix"="2" // OPT: attributes #4 = { {{.*}} "patchable-function-entry"="1" +// HOTPATCH: attributes #0 = { {{.*}} "patchable-function"="prologue-short-redirect" +// HOTPATCH: attributes #1 = { {{.*}} "patchable-function"="prologue-short-redirect" +// HOTPATCH: attributes #2 = { {{.*}} "patchable-function"="prologue-short-redirect" +// HOTPATCH: attributes #3 = { {{.*}} "patchable-function"="prologue-short-redirect" diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp new file mode 100644 index 0000000000000..6176f1788760a --- /dev/null +++ b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target || arm-registered-target +/// +/// Check that using /hotpatch doesn't generate an error. +/// Binaries are always hotpatchable on ARM/ARM64. +/// +// RUN: %clang_cl --target=arm-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1 +// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1 +/// +/// Ensure that we set the hotpatchable flag in the debug information. +/// +// RUN: %clang_cl --target=arm-pc-windows-msvc /c /Z7 -o %t.obj -- %s +// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH +// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /Z7 -o %t.obj -- %s +// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH +// HOTPATCH: S_COMPILE3 [size = [[#]]] +// HOTPATCH: flags = hot patchable +/// +/// Unfortunately we need /Z7, Clang does not systematically generate S_COMPILE3. +/// +// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c -o %t.obj -- %s +// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH +// NO-HOTPATCH-NOT: flags = hot patchable + +int main() { + return 0; +} diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch.cpp new file mode 100644 index 0000000000000..fde1a6ad085ea --- /dev/null +++ b/clang/test/CodeGenCXX/debug-info-hotpatch.cpp @@ -0,0 +1,20 @@ +// REQUIRES: x86-registered-target +/// +// RUN: %clang_cl --target=x86_64-windows-msvc /c /hotpatch /Z7 -o %t.obj -- %s +// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH +// HOTPATCH: S_COMPILE3 [size = [[#]]] +// HOTPATCH: flags = hot patchable +/// +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -o %t.obj -- %s +// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH +// NO-HOTPATCH-NOT: flags = hot patchable +/// +// RUN: %clang_cl --target=x86_64-windows-msvc /hotpatch -### -- %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=FUNCTIONPADMIN +// FUNCTIONPADMIN: clang{{.*}} +// FUNCTIONPADMIN: {{link.*"}} +// FUNCTIONPADMIN: -functionpadmin + +int main() { + return 0; +} diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 618be2d230f94..f39db87660125 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -118,6 +118,9 @@ // RUN: %clang_cl /Gw /Gw- -### -- %s 2>&1 | FileCheck -check-prefix=Gw_ %s // Gw_-NOT: -fdata-sections +// RUN: %clang_cl /hotpatch -### -- %s 2>&1 | FileCheck -check-prefix=hotpatch %s +// hotpatch: -fms-hotpatch + // RUN: %clang_cl /Imyincludedir -### -- %s 2>&1 | FileCheck -check-prefix=SLASH_I %s // RUN: %clang_cl /I myincludedir -### -- %s 2>&1 | FileCheck -check-prefix=SLASH_I %s // SLASH_I: "-I" "myincludedir" @@ -483,7 +486,6 @@ // RUN: /GZ \ // RUN: /H \ // RUN: /homeparams \ -// RUN: /hotpatch \ // RUN: /JMC \ // RUN: /kernel \ // RUN: /LN \ diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index c639f326abc9d..a636c48228325 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -140,9 +140,9 @@ namespace llvm { EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), EmitAddrsig(false), EmitCallSiteInfo(false), SupportsDebugEntryValues(false), EnableDebugEntryValues(false), - ValueTrackingVariableLocations(false), - ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), - DebugStrictDwarf(false), + ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), + XRayOmitFunctionIndex(false), DebugStrictDwarf(false), + Hotpatch(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -342,6 +342,9 @@ namespace llvm { /// By default, it is set to false. unsigned DebugStrictDwarf : 1; + /// Emit the hotpatch flag in CodeView debug. + unsigned Hotpatch : 1; + /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 9a8188e5cb468..52c74713551cb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -846,6 +846,12 @@ void CodeViewDebug::emitCompilerInformation() { if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) { Flags |= static_cast(CompileSym3Flags::PGO); } + using ArchType = llvm::Triple::ArchType; + ArchType Arch = Triple(MMI->getModule()->getTargetTriple()).getArch(); + if (Asm->TM.Options.Hotpatch || Arch == ArchType::thumb || + Arch == ArchType::aarch64) { + Flags |= static_cast(CompileSym3Flags::HotPatch); + } OS.AddComment("Flags and language"); OS.emitInt32(Flags); diff --git a/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll b/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll index d19cca330b2d3..781a4c65abc90 100644 --- a/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll +++ b/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll @@ -24,7 +24,8 @@ ; OBJ: Compile3Sym { ; OBJ-NEXT: Kind: S_COMPILE3 (0x113C) ; OBJ-NEXT: Language: C (0x0) -; OBJ-NEXT: Flags [ (0x0) +; OBJ-NEXT: Flags [ (0x4000) +; OBJ-NEXT: HotPatch (0x4000) ; OBJ-NEXT: ] ; OBJ-NEXT: Machine: ARMNT (0xF4) diff --git a/llvm/test/MC/AArch64/coff-debug.ll b/llvm/test/MC/AArch64/coff-debug.ll index 6099b3d570b46..af3459ace3531 100644 --- a/llvm/test/MC/AArch64/coff-debug.ll +++ b/llvm/test/MC/AArch64/coff-debug.ll @@ -77,7 +77,8 @@ attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-ma ; CHECK: Compile3Sym { ; CHECK: Kind: S_COMPILE3 (0x113C) ; CHECK: Language: C (0x0) -; CHECK: Flags [ (0x0) +; CHECK: Flags [ (0x4000 +; CHECK: HotPatch (0x4000) ; CHECK: ] ; CHECK: } ; CHECK: ] From 5abf1163224549902bf34c9d07b822e5283beb7a Mon Sep 17 00:00:00 2001 From: Sergei Grechanik Date: Thu, 20 Jan 2022 08:54:38 -0800 Subject: [PATCH 043/946] [mlir][vector] Allow values outside of [0; dim-size] in create_mask This commits explicitly states that negative values and values exceeding vector dimensions are allowed in vector.create_mask (but not in vector.constant_mask). These values are now truncated when canonicalizing vector.create_mask to vector.constant_mask. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D116069 --- mlir/include/mlir/Dialect/Vector/VectorOps.td | 8 +++-- mlir/lib/Dialect/Vector/VectorOps.cpp | 15 +++++++-- mlir/test/Dialect/Vector/canonicalize.mlir | 33 +++++++++++++++++++ .../Dialect/Vector/CPU/test-create-mask.mlir | 6 +++- 4 files changed, 56 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index 20b431a7b7b25..826c7d0338f0b 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -2131,7 +2131,9 @@ def Vector_ConstantMaskOp : specifies an exclusive upper bound [0, mask-dim-size-element-value) for a unique dimension in the vector result. The conjunction of the ranges define a hyper-rectangular region within which elements values are set to 1 - (otherwise element values are set to 0). + (otherwise element values are set to 0). Each value of 'mask_dim_sizes' must + be non-negative and not greater than the size of the corresponding vector + dimension (as opposed to vector.create_mask which allows this). Example: @@ -2169,7 +2171,9 @@ def Vector_CreateMaskOp : each operand specifies a range [0, operand-value) for a unique dimension in the vector result. The conjunction of the operand ranges define a hyper-rectangular region within which elements values are set to 1 - (otherwise element values are set to 0). + (otherwise element values are set to 0). If operand-value is negative, it is + treated as if it were zero, and if it is greater than the corresponding + dimension size, it is treated as if it were equal to the dimension size. Example: diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index eaa4f4e97e1dd..2e22fc0495bf2 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -4235,9 +4235,18 @@ class CreateMaskFolder final : public OpRewritePattern { return failure(); // Gather constant mask dimension sizes. SmallVector maskDimSizes; - for (auto operand : createMaskOp.operands()) { - auto *defOp = operand.getDefiningOp(); - maskDimSizes.push_back(cast(defOp).value()); + for (auto it : llvm::zip(createMaskOp.operands(), + createMaskOp.getType().getShape())) { + auto *defOp = std::get<0>(it).getDefiningOp(); + int64_t maxDimSize = std::get<1>(it); + int64_t dimSize = cast(defOp).value(); + dimSize = std::min(dimSize, maxDimSize); + // If one of dim sizes is zero, set all dims to zero. + if (dimSize <= 0) { + maskDimSizes.assign(createMaskOp.getType().getRank(), 0); + break; + } + maskDimSizes.push_back(dimSize); } // Replace 'createMaskOp' with ConstantMaskOp. rewriter.replaceOpWithNewOp( diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 5da33821eeaf0..3d1923ac09ace 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -13,6 +13,39 @@ func @create_vector_mask_to_constant_mask() -> (vector<4x3xi1>) { // ----- +// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation +func @create_vector_mask_to_constant_mask_truncation() -> (vector<4x3xi1>) { + %c2 = arith.constant 2 : index + %c5 = arith.constant 5 : index + // CHECK: vector.constant_mask [4, 2] : vector<4x3xi1> + %0 = vector.create_mask %c5, %c2 : vector<4x3xi1> + return %0 : vector<4x3xi1> +} + +// ----- + +// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_neg +func @create_vector_mask_to_constant_mask_truncation_neg() -> (vector<4x3xi1>) { + %cneg2 = arith.constant -2 : index + %c5 = arith.constant 5 : index + // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1> + %0 = vector.create_mask %c5, %cneg2 : vector<4x3xi1> + return %0 : vector<4x3xi1> +} + +// ----- + +// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_zero +func @create_vector_mask_to_constant_mask_truncation_zero() -> (vector<4x3xi1>) { + %c2 = arith.constant 2 : index + %c0 = arith.constant 0 : index + // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1> + %0 = vector.create_mask %c0, %c2 : vector<4x3xi1> + return %0 : vector<4x3xi1> +} + +// ----- + func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) { %0 = vector.constant_mask [2, 2] : vector<4x3xi1> %1 = vector.extract_strided_slice %0 diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir index 4a3113bdbe5a1..5834f14c6d22a 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir @@ -4,11 +4,13 @@ // RUN: FileCheck %s func @entry() { + %cneg1 = arith.constant -1 : index %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c6 = arith.constant 6 : index + %c7 = arith.constant 7 : index // // 1-D. @@ -18,16 +20,18 @@ func @entry() { vector.print %1 : vector<5xi1> // CHECK: ( 1, 1, 0, 0, 0 ) - scf.for %i = %c0 to %c6 step %c1 { + scf.for %i = %cneg1 to %c7 step %c1 { %2 = vector.create_mask %i : vector<5xi1> vector.print %2 : vector<5xi1> } // CHECK: ( 0, 0, 0, 0, 0 ) + // CHECK: ( 0, 0, 0, 0, 0 ) // CHECK: ( 1, 0, 0, 0, 0 ) // CHECK: ( 1, 1, 0, 0, 0 ) // CHECK: ( 1, 1, 1, 0, 0 ) // CHECK: ( 1, 1, 1, 1, 0 ) // CHECK: ( 1, 1, 1, 1, 1 ) + // CHECK: ( 1, 1, 1, 1, 1 ) // // 2-D. From af5600420b93769a5c7981d247d37ac4d61cce54 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 20 Jan 2022 08:29:16 -0500 Subject: [PATCH 044/946] [OpenMP] Don't pass empty files to nvlink This patch adds and exception to the nvlink wrapper tool to not pass empty cubin files to the nvlink job. If an empty file is passed to nvlink it will cause an error indicating that the file could not be opened. This would occur if the user tried to link object files that contained offloading code with a file that didnt. This will act as a workaround until the new OpenMP offloading driver becomes the default. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D117777 --- .../Inputs/openmp_static_device_link/empty.o | 0 .../Inputs/openmp_static_device_link/lib.bc | Bin 0 -> 1092 bytes clang/test/Driver/fat_archive_nvptx.cpp | 3 ++- .../clang-nvlink-wrapper/ClangNvlinkWrapper.cpp | 12 +++++++++++- 4 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/Inputs/openmp_static_device_link/empty.o create mode 100644 clang/test/Driver/Inputs/openmp_static_device_link/lib.bc diff --git a/clang/test/Driver/Inputs/openmp_static_device_link/empty.o b/clang/test/Driver/Inputs/openmp_static_device_link/empty.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/openmp_static_device_link/lib.bc b/clang/test/Driver/Inputs/openmp_static_device_link/lib.bc new file mode 100644 index 0000000000000000000000000000000000000000..1a87fd836dba2c8b03f53733e4782e15996b96b9 GIT binary patch literal 1092 zcmXX_VQ3p=82*xLb7vdBi#2Qsy*uyLOv7~bme!i=5-`0vBAb|@KRT$G%SvpulxvMi z+6_&59bbkKvQjWVDzwl){)^Bb{ZXXJbu*SQ>JTXyN@LTmN>+mFWa57KrSrJ&{dn(t zdGC3i_xbwewe2^o0JH)C!e-I&?$sY6U;n%OUT4X1!Qg5If*F8+@L9W207kr@z>7?E z-S(GS-Z5ERo>{|;`E)B~=UToBCtKb&_RqHWJ1j1%o3dSMb9JNx>blT%p#xop?>Bh$P)TOJ%0#RNUs`t0=$ZZ`Ihi0o%Z;1&0sYVdkCL#*+|LyY?7c-MBDNlv zX`N-lSCV}h!dFgYqAGcvA}0e#Rmmxph$WE9BYXJ;SkJ<$NRNs1ZUZ)gu*b5_ZVSwo zz!nuQBHof#HEC1!EEC>U%{xO#<%TsDBCijVZw{jg9Fs`RA~i(DoMh}tD{dQ4qK_yv zZbK6}G?`1icG!T2JMi%utcygrw8+#JneuV0D9nbytf$xw#q?`d_LO8#?l`XK?m5|= zCZsw2b^Oz()6y@5l-JEajv#daO{iLQm`sFF)P|x9$bRpA`Vi261z{}hdZVQPw$EJ9#8)rgku)CD?*$;+A;mH18YI}MC|&#t}w+EyH!0|7ubD) zT^Cq9!2{u?2>-3LD*ILl&z$Bd53J(pW3p2iX8^@jY_jic00`J)?QIiIJUen~K+WNL z9joNo)Tj5EyNk~s_?yD5NOUZwMiYI#XUAeALnF%B SYo8A(qa)$&(a~lV0RI6M5`u96 literal 0 HcmV?d00001 diff --git a/clang/test/Driver/fat_archive_nvptx.cpp b/clang/test/Driver/fat_archive_nvptx.cpp index a46c44ff998cc..5413445925dd3 100644 --- a/clang/test/Driver/fat_archive_nvptx.cpp +++ b/clang/test/Driver/fat_archive_nvptx.cpp @@ -10,7 +10,8 @@ // CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp // CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-nvidia-cuda-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles" // CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]" -// expected-no-diagnostics +// RUN: not %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda %s %S/Inputs/openmp_static_device_link/empty.o --libomptarget-nvptx-bc-path=%S/Inputs/openmp_static_device_link/lib.bc 2>&1 | FileCheck %s --check-prefix=EMPTY +// EMPTY-NOT: Could not open input file #ifndef HEADER #define HEADER diff --git a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp index 46a4f30ba8817..7ccc284a48314 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp @@ -55,12 +55,22 @@ static cl::opt NvlinkUserPath("nvlink-path", static cl::list NVArgs(cl::Sink, cl::desc("...")); +static bool isEmptyFile(StringRef Filename) { + ErrorOr> BufOrErr = + MemoryBuffer::getFileOrSTDIN(Filename, false, false); + if (std::error_code EC = BufOrErr.getError()) + return false; + return (*BufOrErr)->getBuffer().empty(); +} + static Error runNVLink(std::string NVLinkPath, SmallVectorImpl &Args) { std::vector NVLArgs; NVLArgs.push_back(NVLinkPath); + StringRef Output = *(llvm::find(Args, "-o") + 1); for (auto &Arg : Args) { - NVLArgs.push_back(Arg); + if (!(sys::fs::exists(Arg) && Arg != Output && isEmptyFile(Arg))) + NVLArgs.push_back(Arg); } if (sys::ExecuteAndWait(NVLinkPath, NVLArgs)) From aad04534c41979fe04d7b4bad70de95c7ba646a1 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Tue, 18 Jan 2022 13:46:18 -0500 Subject: [PATCH 045/946] [libc] Implement correct rounding with all rounding modes for hypot functions. Update the rounding logic for generic hypot function so that it will round correctly with all rounding modes. Reviewed By: sivachandra, zimmermann6 Differential Revision: https://reviews.llvm.org/D117590 --- libc/src/__support/FPUtil/Hypot.h | 26 +- libc/src/math/generic/CMakeLists.txt | 8 +- libc/test/src/math/CMakeLists.txt | 4 + libc/test/src/math/HypotTest.h | 16 +- .../math/differential_testing/CMakeLists.txt | 2 + libc/test/src/math/hypotf_hard_to_round.h | 1238 +++++++++++++++++ libc/test/src/math/hypotf_test.cpp | 5 + 7 files changed, 1289 insertions(+), 10 deletions(-) create mode 100644 libc/test/src/math/hypotf_hard_to_round.h diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h index 78a3571021a4a..4b2987138d1bd 100644 --- a/libc/src/__support/FPUtil/Hypot.h +++ b/libc/src/__support/FPUtil/Hypot.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_HYPOT_H #include "BasicOperations.h" +#include "FEnvImpl.h" #include "FPBits.h" #include "src/__support/CPP/TypeTraits.h" @@ -143,11 +144,22 @@ static inline T hypot(T x, T y) { if ((x_bits.get_unbiased_exponent() >= y_bits.get_unbiased_exponent() + MantissaWidth::VALUE + 2) || (y == 0)) { + // Check if the rounding mode is FE_UPWARD, will need -frounding-math so + // that the compiler does not optimize it away. + if ((y != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) { + UIntType out_bits = FPBits_t(abs(x)).uintval(); + return T(FPBits_t(++out_bits)); + } return abs(x); } else if ((y_bits.get_unbiased_exponent() >= x_bits.get_unbiased_exponent() + MantissaWidth::VALUE + 2) || (x == 0)) { - y_bits.set_sign(0); + // Check if the rounding mode is FE_UPWARD, will need -frounding-math so + // that the compiler does not optimize it away. + if ((x != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) { + UIntType out_bits = FPBits_t(abs(y)).uintval(); + return T(FPBits_t(++out_bits)); + } return abs(y); } @@ -250,8 +262,16 @@ static inline T hypot(T x, T y) { y_new >>= 1; // Round to the nearest, tie to even. - if (round_bit && (lsb || sticky_bits || (r != 0))) { - ++y_new; + switch (get_round()) { + case FE_TONEAREST: + // Round to nearest, ties to even + if (round_bit && (lsb || sticky_bits || (r != 0))) + ++y_new; + break; + case FE_UPWARD: + if (round_bit || sticky_bits || (r != 0)) + ++y_new; + break; } if (y_new >= (ONE >> 1)) { diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 6f650d2403d12..59bca76fc5f84 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -954,7 +954,9 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.fputil COMPILE_OPTIONS - -O2 + -O3 + -frounding-math + -Wno-c++17-extensions ) add_entrypoint_object( @@ -1002,7 +1004,9 @@ add_entrypoint_object( DEPENDS libc.src.__support.FPUtil.fputil COMPILE_OPTIONS - -O2 + -O3 + -frounding-math + -Wno-c++17-extensions ) add_entrypoint_object( diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 88592b9725bf3..7c51dc00d49db 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1060,6 +1060,8 @@ add_fp_unittest( libc.include.math libc.src.math.hypotf libc.src.__support.FPUtil.fputil + COMPILE_OPTIONS + -Wno-c++17-extensions ) add_fp_unittest( @@ -1073,6 +1075,8 @@ add_fp_unittest( libc.include.math libc.src.math.hypot libc.src.__support.FPUtil.fputil + COMPILE_OPTIONS + -Wno-c++17-extensions ) add_fp_unittest( diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h index 584b5aef3359c..b7a7ffdf5312a 100644 --- a/libc/test/src/math/HypotTest.h +++ b/libc/test/src/math/HypotTest.h @@ -10,7 +10,6 @@ #define LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_H #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/Hypot.h" #include "utils/MPFRWrapper/MPFRUtils.h" #include "utils/UnitTest/FPMatcher.h" #include "utils/UnitTest/Test.h" @@ -62,9 +61,9 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { y = -y; } - T result = func(x, y); mpfr::BinaryInput input{x, y}; - ASSERT_MPFR_MATCH(mpfr::Operation::Hypot, input, result, 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Hypot, input, + func(x, y), 0.5); } } } @@ -85,12 +84,19 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { y = -y; } - T result = func(x, y); mpfr::BinaryInput input{x, y}; - ASSERT_MPFR_MATCH(mpfr::Operation::Hypot, input, result, 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Hypot, input, + func(x, y), 0.5); } } } + + void test_input_list(Func func, int n, const mpfr::BinaryInput *inputs) { + for (int i = 0; i < n; ++i) { + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Hypot, inputs[i], + func(inputs[i].x, inputs[i].y), 0.5); + } + } }; #endif // LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_H diff --git a/libc/test/src/math/differential_testing/CMakeLists.txt b/libc/test/src/math/differential_testing/CMakeLists.txt index 0bf7855e2c91f..f2ac818fbd3b3 100644 --- a/libc/test/src/math/differential_testing/CMakeLists.txt +++ b/libc/test/src/math/differential_testing/CMakeLists.txt @@ -406,6 +406,7 @@ add_diff_binary( libc.src.math.hypotf COMPILE_OPTIONS -fno-builtin + -Wno-c++17-extensions ) add_diff_binary( @@ -417,4 +418,5 @@ add_diff_binary( libc.src.math.hypot COMPILE_OPTIONS -fno-builtin + -Wno-c++17-extensions ) diff --git a/libc/test/src/math/hypotf_hard_to_round.h b/libc/test/src/math/hypotf_hard_to_round.h new file mode 100644 index 0000000000000..06817c7a3a104 --- /dev/null +++ b/libc/test/src/math/hypotf_hard_to_round.h @@ -0,0 +1,1238 @@ +//===-- Hard-to-round inputs for hypotf ------------------------------C++--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H +#define LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H + +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = __llvm_libc::testing::mpfr; + +constexpr int N_HARD_TO_ROUND = 1216; +constexpr mpfr::BinaryInput HYPOTF_HARD_TO_ROUND[N_HARD_TO_ROUND] = { + {0x1.ffffecp-1f, 0x1.000002p+27}, + {0x1.900004p+34, 0x1.400002p+23}, /* 45 identical bits */ + {0x1.05555p+34, 0x1.bffffep+23}, /* 44 identical bits */ + {0x1.e5fffap+34, 0x1.affffep+23}, /* 45 identical bits */ + {0x1.260002p+34, 0x1.500002p+23}, /* 45 identical bits */ + {0x1.fffffap+34, 0x1.fffffep+23}, /* 45 identical bits */ + {0x1.8ffffap+34, 0x1.3ffffep+23}, /* 45 identical bits */ + {0x1.87fffcp+35, 0x1.bffffep+23}, /* 47 identical bits */ + {0x1.b8e50ap-52, -0x1.db1e78p-64}, + {0x1.03b54cp-33, 0x1.6ca6bep-45}, + {0x1.e2eff6p+97, -0x1.044cb2p+108}, + {-0x1.6b05c4p-127, 0x1.6b3146p-126}, + {-0x1.6b05c4p-127, 0x1.6b3146p-126}, + {0x1.26b188p-127, -0x1.a4f2fp-128}, + {0x1.e2eff6p+97, -0x1.044cb2p+108}, + // only inexact hard-to-round case with ulp(x) = 2^12*ulp(y): + {0x1.87fffcp+35, 0x1.bffffep+23}, + // 1200 exact cases with ulp(x) = 2^12*ulp(y): + {0x1.f0008p+35, 0x1.f7fp+23}, + {0x1.6401dp+35, 0x1.aafp+23}, + {0x1.c0029p+35, 0x1.defp+23}, + {0x1.100274p+35, 0x1.753p+23}, + {0x1.38030cp+35, 0x1.8fbp+23}, + {0x1.940334p+35, 0x1.c6dp+23}, + {0x1.cc041cp+35, 0x1.e55p+23}, + {0x1.0c03f8p+35, 0x1.727p+23}, + {0x1.e404a4p+35, 0x1.f1dp+23}, + {0x1.b004ecp+35, 0x1.d65p+23}, + {0x1.a804ep+35, 0x1.d1fp+23}, + {0x1.5804ccp+35, 0x1.a3bp+23}, + {0x1.98051p+35, 0x1.c91p+23}, + {0x1.480524p+35, 0x1.99dp+23}, + {0x1.900668p+35, 0x1.c49p+23}, + {0x1.28061cp+35, 0x1.855p+23}, + {0x1.7406d8p+35, 0x1.b47p+23}, + {0x1.dc07acp+35, 0x1.edbp+23}, + {0x1.2c078p+35, 0x1.87fp+23}, + {0x1.4407bcp+35, 0x1.975p+23}, + {0x1.4c08ccp+35, 0x1.9c5p+23}, + {0x1.14088p+35, 0x1.77fp+23}, + {0x1.0008ap+35, 0x1.6a1p+23}, + {0x1.bc0b14p+35, 0x1.dcdp+23}, + {0x1.9c0bfcp+35, 0x1.cb5p+23}, + {0x1.240b9cp+35, 0x1.82bp+23}, + {0x1.d00da8p+35, 0x1.e77p+23}, + {0x1.800e68p+35, 0x1.bb7p+23}, + {0x1.8c0eacp+35, 0x1.c25p+23}, + {0x1.080d0cp+35, 0x1.6fbp+23}, + {0x1.600ea8p+35, 0x1.a89p+23}, + {0x1.300fc8p+35, 0x1.8a9p+23}, + {0x1.ec12ap+35, 0x1.f5fp+23}, + {0x1.401094p+35, 0x1.94dp+23}, + {0x1.5012b4p+35, 0x1.9edp+23}, + {0x1.781634p+35, 0x1.b6dp+23}, + {0x1.b8181cp+35, 0x1.dabp+23}, + {0x1.a017f8p+35, 0x1.cd9p+23}, + {0x1.18161cp+35, 0x1.7abp+23}, + {0x1.d41bb8p+35, 0x1.e99p+23}, + {0x1.2018p+35, 0x1.801p+23}, + {0x1.881cp+35, 0x1.c01p+23}, + {0x1.ac1fc4p+35, 0x1.d43p+23}, + {0x1.fc21ep+35, 0x1.fe1p+23}, + {0x1.e02304p+35, 0x1.efdp+23}, + {0x1.341ef4p+35, 0x1.8d3p+23}, + {0x1.3c1facp+35, 0x1.925p+23}, + {0x1.041dbp+35, 0x1.6cfp+23}, + {0x1.5c2124p+35, 0x1.a63p+23}, + {0x1.f827cp+35, 0x1.fc1p+23}, + {0x1.5422dcp+35, 0x1.a15p+23}, + {0x1.e828cp+35, 0x1.f3fp+23}, + {0x1.b429a8p+35, 0x1.d89p+23}, + {0x1.a42904p+35, 0x1.cfdp+23}, + {0x1.d82e4cp+35, 0x1.ebbp+23}, + {0x1.7c2b34p+35, 0x1.b93p+23}, + {0x1.f431ap+35, 0x1.fa1p+23}, + {0x1.842e64p+35, 0x1.bddp+23}, + {0x1.6c2f04p+35, 0x1.afdp+23}, + {0x1.1c2b48p+35, 0x1.7d7p+23}, + {0x1.683048p+35, 0x1.ad7p+23}, + {0x1.703364p+35, 0x1.b23p+23}, + {0x1.10311cp+35, 0x1.755p+23}, + {0x1.c43ab4p+35, 0x1.e13p+23}, + {0x1.c83b7cp+35, 0x1.e35p+23}, + {0x1.383504p+35, 0x1.8fdp+23}, + {0x1.0c3248p+35, 0x1.729p+23}, + {0x1.64373p+35, 0x1.ab1p+23}, + {0x1.f03f8p+35, 0x1.f81p+23}, + {0x1.943c1p+35, 0x1.c6fp+23}, + {0x1.c03e7p+35, 0x1.df1p+23}, + {0x1.583944p+35, 0x1.a3dp+23}, + {0x1.48386p+35, 0x1.99fp+23}, + {0x1.2836c8p+35, 0x1.857p+23}, + {0x1.cc40c8p+35, 0x1.e57p+23}, + {0x1.a83f2p+35, 0x1.d21p+23}, + {0x1.b03fb8p+35, 0x1.d67p+23}, + {0x1.983e34p+35, 0x1.c93p+23}, + {0x1.2c388p+35, 0x1.881p+23}, + {0x1.e442ep+35, 0x1.f1fp+23}, + {0x1.0035e4p+35, 0x1.6a3p+23}, + {0x1.903efcp+35, 0x1.c4bp+23}, + {0x1.743d68p+35, 0x1.b49p+23}, + {0x1.443aa8p+35, 0x1.977p+23}, + {0x1.14378p+35, 0x1.781p+23}, + {0x1.4c3c58p+35, 0x1.9c7p+23}, + {0x1.dc4564p+35, 0x1.eddp+23}, + {0x1.243bf4p+35, 0x1.82dp+23}, + {0x1.bc46bp+35, 0x1.dcfp+23}, + {0x1.083b04p+35, 0x1.6fdp+23}, + {0x1.9c4568p+35, 0x1.cb7p+23}, + {0x1.d04a98p+35, 0x1.e79p+23}, + {0x1.6043bcp+35, 0x1.a8bp+23}, + {0x1.8045d8p+35, 0x1.bb9p+23}, + {0x1.8c46f8p+35, 0x1.c27p+23}, + {0x1.30411cp+35, 0x1.8abp+23}, + {0x1.40433p+35, 0x1.94fp+23}, + {0x1.50469p+35, 0x1.9efp+23}, + {0x1.ec516p+35, 0x1.f61p+23}, + {0x1.184574p+35, 0x1.7adp+23}, + {0x1.784d1p+35, 0x1.b6fp+23}, + {0x1.b85374p+35, 0x1.dadp+23}, + {0x1.204804p+35, 0x1.803p+23}, + {0x1.a051acp+35, 0x1.cdbp+23}, + {0x1.d458ecp+35, 0x1.e9bp+23}, + {0x1.885404p+35, 0x1.c03p+23}, + {0x1.044b5p+35, 0x1.6d1p+23}, + {0x1.34509cp+35, 0x1.8d5p+23}, + {0x1.ac5a4cp+35, 0x1.d45p+23}, + {0x1.3c51f8p+35, 0x1.927p+23}, + {0x1.fc61a4p+35, 0x1.fe3p+23}, + {0x1.5c55ecp+35, 0x1.a65p+23}, + {0x1.e061p+35, 0x1.effp+23}, + {0x1.545708p+35, 0x1.a17p+23}, + {0x1.f86744p+35, 0x1.fc3p+23}, + {0x1.e8674p+35, 0x1.f41p+23}, + {0x1.b464bcp+35, 0x1.d8bp+23}, + {0x1.a463p+35, 0x1.cffp+23}, + {0x1.7c625cp+35, 0x1.b95p+23}, + {0x1.d86bc4p+35, 0x1.ebdp+23}, + {0x1.1c5af8p+35, 0x1.7d9p+23}, + {0x1.84662p+35, 0x1.bdfp+23}, + {0x1.f470e4p+35, 0x1.fa3p+23}, + {0x1.6c65p+35, 0x1.affp+23}, + {0x1.6865f8p+35, 0x1.ad9p+23}, + {0x1.7069acp+35, 0x1.b25p+23}, + {0x1.105fc8p+35, 0x1.757p+23}, + {0x1.0c609cp+35, 0x1.72bp+23}, + {0x1.3867p+35, 0x1.8ffp+23}, + {0x1.646c94p+35, 0x1.ab3p+23}, + {0x1.c476dcp+35, 0x1.e15p+23}, + {0x1.c877e8p+35, 0x1.e37p+23}, + {0x1.286778p+35, 0x1.859p+23}, + {0x1.486bap+35, 0x1.9a1p+23}, + {0x1.586dcp+35, 0x1.a3fp+23}, + {0x1.00632cp+35, 0x1.6a5p+23}, + {0x1.2c6984p+35, 0x1.883p+23}, + {0x1.146684p+35, 0x1.783p+23}, + {0x1.9474fp+35, 0x1.c71p+23}, + {0x1.f07e84p+35, 0x1.f83p+23}, + {0x1.c07a54p+35, 0x1.df3p+23}, + {0x1.446d98p+35, 0x1.979p+23}, + {0x1.98775cp+35, 0x1.c95p+23}, + {0x1.a87964p+35, 0x1.d23p+23}, + {0x1.4c6fe8p+35, 0x1.9c9p+23}, + {0x1.b07a88p+35, 0x1.d69p+23}, + {0x1.7473fcp+35, 0x1.b4bp+23}, + {0x1.cc7d78p+35, 0x1.e59p+23}, + {0x1.907794p+35, 0x1.c4dp+23}, + {0x1.246c5p+35, 0x1.82fp+23}, + {0x1.e4812p+35, 0x1.f21p+23}, + {0x1.0869p+35, 0x1.6ffp+23}, + {0x1.dc832p+35, 0x1.edfp+23}, + {0x1.307274p+35, 0x1.8adp+23}, + {0x1.bc825p+35, 0x1.dd1p+23}, + {0x1.9c7ed8p+35, 0x1.cb9p+23}, + {0x1.6078d4p+35, 0x1.a8dp+23}, + {0x1.4075dp+35, 0x1.951p+23}, + {0x1.807d4cp+35, 0x1.bbbp+23}, + {0x1.8c7f48p+35, 0x1.c29p+23}, + {0x1.d0878cp+35, 0x1.e7bp+23}, + {0x1.507a7p+35, 0x1.9f1p+23}, + {0x1.1874dp+35, 0x1.7afp+23}, + {0x1.ec9024p+35, 0x1.f63p+23}, + {0x1.20780cp+35, 0x1.805p+23}, + {0x1.7883fp+35, 0x1.b71p+23}, + {0x1.b88edp+35, 0x1.dafp+23}, + {0x1.a08b64p+35, 0x1.cddp+23}, + {0x1.0478f4p+35, 0x1.6d3p+23}, + {0x1.888c0cp+35, 0x1.c05p+23}, + {0x1.d49624p+35, 0x1.e9dp+23}, + {0x1.348248p+35, 0x1.8d7p+23}, + {0x1.3c8448p+35, 0x1.929p+23}, + {0x1.ac94d8p+35, 0x1.d47p+23}, + {0x1.5c8ab8p+35, 0x1.a67p+23}, + {0x1.548b38p+35, 0x1.a19p+23}, + {0x1.fca16cp+35, 0x1.fe5p+23}, + {0x1.e09fp+35, 0x1.f01p+23}, + {0x1.f8a6ccp+35, 0x1.fc5p+23}, + {0x1.b49fd4p+35, 0x1.d8dp+23}, + {0x1.a49dp+35, 0x1.d01p+23}, + {0x1.e8a5c4p+35, 0x1.f43p+23}, + {0x1.1c8aacp+35, 0x1.7dbp+23}, + {0x1.7c9988p+35, 0x1.b97p+23}, + {0x1.849dep+35, 0x1.be1p+23}, + {0x1.d8a94p+35, 0x1.ebfp+23}, + {0x1.6c9bp+35, 0x1.b01p+23}, + {0x1.689bacp+35, 0x1.adbp+23}, + {0x1.108e78p+35, 0x1.759p+23}, + {0x1.f4b02cp+35, 0x1.fa5p+23}, + {0x1.0c8ef4p+35, 0x1.72dp+23}, + {0x1.709ff8p+35, 0x1.b27p+23}, + {0x1.3899p+35, 0x1.901p+23}, + {0x1.28982cp+35, 0x1.85bp+23}, + {0x1.009078p+35, 0x1.6a7p+23}, + {0x1.64a1fcp+35, 0x1.ab5p+23}, + {0x1.489ee4p+35, 0x1.9a3p+23}, + {0x1.2c9a8cp+35, 0x1.885p+23}, + {0x1.14958cp+35, 0x1.785p+23}, + {0x1.58a24p+35, 0x1.a41p+23}, + {0x1.c4b308p+35, 0x1.e17p+23}, + {0x1.c8b458p+35, 0x1.e39p+23}, + {0x1.44a08cp+35, 0x1.97bp+23}, + {0x1.94add4p+35, 0x1.c73p+23}, + {0x1.4ca37cp+35, 0x1.9cbp+23}, + {0x1.249cbp+35, 0x1.831p+23}, + {0x1.0897p+35, 0x1.701p+23}, + {0x1.c0b63cp+35, 0x1.df5p+23}, + {0x1.74aa94p+35, 0x1.b4dp+23}, + {0x1.98b088p+35, 0x1.c97p+23}, + {0x1.f0bd8cp+35, 0x1.f85p+23}, + {0x1.a8b3acp+35, 0x1.d25p+23}, + {0x1.b0b55cp+35, 0x1.d6bp+23}, + {0x1.90b03p+35, 0x1.c4fp+23}, + {0x1.ccba2cp+35, 0x1.e5bp+23}, + {0x1.e4bf64p+35, 0x1.f23p+23}, + {0x1.30a3dp+35, 0x1.8afp+23}, + {0x1.dcc0ep+35, 0x1.ee1p+23}, + {0x1.40a874p+35, 0x1.953p+23}, + {0x1.60adfp+35, 0x1.a8fp+23}, + {0x1.9cb84cp+35, 0x1.cbbp+23}, + {0x1.bcbdf4p+35, 0x1.dd3p+23}, + {0x1.80b4c4p+35, 0x1.bbdp+23}, + {0x1.8cb79cp+35, 0x1.c2bp+23}, + {0x1.50ae54p+35, 0x1.9f3p+23}, + {0x1.18a43p+35, 0x1.7b1p+23}, + {0x1.d0c484p+35, 0x1.e7dp+23}, + {0x1.20a818p+35, 0x1.807p+23}, + {0x1.78bad4p+35, 0x1.b73p+23}, + {0x1.ecceecp+35, 0x1.f65p+23}, + {0x1.04a69cp+35, 0x1.6d5p+23}, + {0x1.a0c52p+35, 0x1.cdfp+23}, + {0x1.b8ca3p+35, 0x1.db1p+23}, + {0x1.34b3f8p+35, 0x1.8d9p+23}, + {0x1.88c418p+35, 0x1.c07p+23}, + {0x1.3cb69cp+35, 0x1.92bp+23}, + {0x1.d4d36p+35, 0x1.e9fp+23}, + {0x1.5cbf88p+35, 0x1.a69p+23}, + {0x1.accf68p+35, 0x1.d49p+23}, + {0x1.54bf6cp+35, 0x1.a1bp+23}, + {0x1.e0dd04p+35, 0x1.f03p+23}, + {0x1.fce138p+35, 0x1.fe7p+23}, + {0x1.1cba64p+35, 0x1.7ddp+23}, + {0x1.b4dafp+35, 0x1.d8fp+23}, + {0x1.a4d704p+35, 0x1.d03p+23}, + {0x1.7cd0b8p+35, 0x1.b99p+23}, + {0x1.f8e658p+35, 0x1.fc7p+23}, + {0x1.e8e44cp+35, 0x1.f45p+23}, + {0x1.10bd2cp+35, 0x1.75bp+23}, + {0x1.84d5a4p+35, 0x1.be3p+23}, + {0x1.6cd104p+35, 0x1.b03p+23}, + {0x1.0cbd5p+35, 0x1.72fp+23}, + {0x1.68d164p+35, 0x1.addp+23}, + {0x1.d8e6cp+35, 0x1.ec1p+23}, + {0x1.38cb04p+35, 0x1.903p+23}, + {0x1.70d648p+35, 0x1.b29p+23}, + {0x1.00bdc8p+35, 0x1.6a9p+23}, + {0x1.f4ef78p+35, 0x1.fa7p+23}, + {0x1.28c8e4p+35, 0x1.85dp+23}, + {0x1.14c498p+35, 0x1.787p+23}, + {0x1.2ccb98p+35, 0x1.887p+23}, + {0x1.48d22cp+35, 0x1.9a5p+23}, + {0x1.64d768p+35, 0x1.ab7p+23}, + {0x1.58d6c4p+35, 0x1.a43p+23}, + {0x1.44d384p+35, 0x1.97dp+23}, + {0x1.08c504p+35, 0x1.703p+23}, + {0x1.24cd14p+35, 0x1.833p+23}, + {0x1.4cd714p+35, 0x1.9cdp+23}, + {0x1.c4ef38p+35, 0x1.e19p+23}, + {0x1.c8f0ccp+35, 0x1.e3bp+23}, + {0x1.94e6bcp+35, 0x1.c75p+23}, + {0x1.74e13p+35, 0x1.b4fp+23}, + {0x1.c0f228p+35, 0x1.df7p+23}, + {0x1.98e9b8p+35, 0x1.c99p+23}, + {0x1.90e8dp+35, 0x1.c51p+23}, + {0x1.a8edf8p+35, 0x1.d27p+23}, + {0x1.30d53p+35, 0x1.8b1p+23}, + {0x1.b0f034p+35, 0x1.d6dp+23}, + {0x1.f0fc98p+35, 0x1.f87p+23}, + {0x1.ccf6e4p+35, 0x1.e5dp+23}, + {0x1.40db1cp+35, 0x1.955p+23}, + {0x1.60e31p+35, 0x1.a91p+23}, + {0x1.e4fdacp+35, 0x1.f25p+23}, + {0x1.18d394p+35, 0x1.7b3p+23}, + {0x1.dcfea4p+35, 0x1.ee3p+23}, + {0x1.9cf1c4p+35, 0x1.cbdp+23}, + {0x1.80ec4p+35, 0x1.bbfp+23}, + {0x1.50e23cp+35, 0x1.9f5p+23}, + {0x1.bcf99cp+35, 0x1.dd5p+23}, + {0x1.8ceff4p+35, 0x1.c2dp+23}, + {0x1.20d828p+35, 0x1.809p+23}, + {0x1.d1018p+35, 0x1.e7fp+23}, + {0x1.04d448p+35, 0x1.6d7p+23}, + {0x1.78f1bcp+35, 0x1.b75p+23}, + {0x1.34e5acp+35, 0x1.8dbp+23}, + {0x1.ed0db8p+35, 0x1.f67p+23}, + {0x1.3ce8f4p+35, 0x1.92dp+23}, + {0x1.a0feep+35, 0x1.ce1p+23}, + {0x1.b90594p+35, 0x1.db3p+23}, + {0x1.88fc28p+35, 0x1.c09p+23}, + {0x1.5cf45cp+35, 0x1.a6bp+23}, + {0x1.54f3a4p+35, 0x1.a1dp+23}, + {0x1.d510ap+35, 0x1.ea1p+23}, + {0x1.ad09fcp+35, 0x1.d4bp+23}, + {0x1.1cea2p+35, 0x1.7dfp+23}, + {0x1.e11b0cp+35, 0x1.f05p+23}, + {0x1.fd2108p+35, 0x1.fe9p+23}, + {0x1.7d07ecp+35, 0x1.b9bp+23}, + {0x1.b5161p+35, 0x1.d91p+23}, + {0x1.a5110cp+35, 0x1.d05p+23}, + {0x1.10ebe4p+35, 0x1.75dp+23}, + {0x1.0cebbp+35, 0x1.731p+23}, + {0x1.e922d8p+35, 0x1.f47p+23}, + {0x1.f925e8p+35, 0x1.fc9p+23}, + {0x1.850d6cp+35, 0x1.be5p+23}, + {0x1.6d070cp+35, 0x1.b05p+23}, + {0x1.69072p+35, 0x1.adfp+23}, + {0x1.00eb1cp+35, 0x1.6abp+23}, + {0x1.38fd0cp+35, 0x1.905p+23}, + {0x1.d92444p+35, 0x1.ec3p+23}, + {0x1.28f9ap+35, 0x1.85fp+23}, + {0x1.14f3a8p+35, 0x1.789p+23}, + {0x1.710c9cp+35, 0x1.b2bp+23}, + {0x1.2cfca8p+35, 0x1.889p+23}, + {0x1.490578p+35, 0x1.9a7p+23}, + {0x1.f52ec8p+35, 0x1.fa9p+23}, + {0x1.650cd8p+35, 0x1.ab9p+23}, + {0x1.590b4cp+35, 0x1.a45p+23}, + {0x1.08f30cp+35, 0x1.705p+23}, + {0x1.45068p+35, 0x1.97fp+23}, + {0x1.24fd7cp+35, 0x1.835p+23}, + {0x1.4d0abp+35, 0x1.9cfp+23}, + {0x1.c52b6cp+35, 0x1.e1bp+23}, + {0x1.951fa8p+35, 0x1.c77p+23}, + {0x1.7517dp+35, 0x1.b51p+23}, + {0x1.310694p+35, 0x1.8b3p+23}, + {0x1.c92d44p+35, 0x1.e3dp+23}, + {0x1.9922ecp+35, 0x1.c9bp+23}, + {0x1.912174p+35, 0x1.c53p+23}, + {0x1.c12e18p+35, 0x1.df9p+23}, + {0x1.a92848p+35, 0x1.d29p+23}, + {0x1.b12b1p+35, 0x1.d6fp+23}, + {0x1.410dc8p+35, 0x1.957p+23}, + {0x1.1902fcp+35, 0x1.7b5p+23}, + {0x1.cd33ap+35, 0x1.e5fp+23}, + {0x1.f13ba8p+35, 0x1.f89p+23}, + {0x1.611834p+35, 0x1.a93p+23}, + {0x1.511628p+35, 0x1.9f7p+23}, + {0x1.21083cp+35, 0x1.80bp+23}, + {0x1.e53bf8p+35, 0x1.f27p+23}, + {0x1.8123cp+35, 0x1.bc1p+23}, + {0x1.9d2b4p+35, 0x1.cbfp+23}, + {0x1.dd3c6cp+35, 0x1.ee5p+23}, + {0x1.8d285p+35, 0x1.c2fp+23}, + {0x1.bd3548p+35, 0x1.dd7p+23}, + {0x1.0501f8p+35, 0x1.6d9p+23}, + {0x1.d13e8p+35, 0x1.e81p+23}, + {0x1.7928a8p+35, 0x1.b77p+23}, + {0x1.351764p+35, 0x1.8ddp+23}, + {0x1.3d1b5p+35, 0x1.92fp+23}, + {0x1.ed4c88p+35, 0x1.f69p+23}, + {0x1.a138a4p+35, 0x1.ce3p+23}, + {0x1.b940fcp+35, 0x1.db5p+23}, + {0x1.89343cp+35, 0x1.c0bp+23}, + {0x1.5d2934p+35, 0x1.a6dp+23}, + {0x1.5527ep+35, 0x1.a1fp+23}, + {0x1.d54de4p+35, 0x1.ea3p+23}, + {0x1.ad4494p+35, 0x1.d4dp+23}, + {0x1.1d19ep+35, 0x1.7e1p+23}, + {0x1.111aap+35, 0x1.75fp+23}, + {0x1.e15918p+35, 0x1.f07p+23}, + {0x1.0d1a14p+35, 0x1.733p+23}, + {0x1.7d3f24p+35, 0x1.b9dp+23}, + {0x1.fd60dcp+35, 0x1.febp+23}, + {0x1.a54b18p+35, 0x1.d07p+23}, + {0x1.b55134p+35, 0x1.d93p+23}, + {0x1.011874p+35, 0x1.6adp+23}, + {0x1.6d3d18p+35, 0x1.b07p+23}, + {0x1.854538p+35, 0x1.be7p+23}, + {0x1.693cep+35, 0x1.ae1p+23}, + {0x1.392f18p+35, 0x1.907p+23}, + {0x1.292a6p+35, 0x1.861p+23}, + {0x1.e96168p+35, 0x1.f49p+23}, + {0x1.f9657cp+35, 0x1.fcbp+23}, + {0x1.1522bcp+35, 0x1.78bp+23}, + {0x1.2d2dbcp+35, 0x1.88bp+23}, + {0x1.7142f4p+35, 0x1.b2dp+23}, + {0x1.4938c8p+35, 0x1.9a9p+23}, + {0x1.d961ccp+35, 0x1.ec5p+23}, + {0x1.092118p+35, 0x1.707p+23}, + {0x1.252de8p+35, 0x1.837p+23}, + {0x1.65424cp+35, 0x1.abbp+23}, + {0x1.593fd8p+35, 0x1.a47p+23}, + {0x1.45398p+35, 0x1.981p+23}, + {0x1.4d3e5p+35, 0x1.9d1p+23}, + {0x1.f56e1cp+35, 0x1.fabp+23}, + {0x1.3137fcp+35, 0x1.8b5p+23}, + {0x1.754e74p+35, 0x1.b53p+23}, + {0x1.955898p+35, 0x1.c79p+23}, + {0x1.c567a4p+35, 0x1.e1dp+23}, + {0x1.c969cp+35, 0x1.e3fp+23}, + {0x1.193268p+35, 0x1.7b7p+23}, + {0x1.414078p+35, 0x1.959p+23}, + {0x1.915a1cp+35, 0x1.c55p+23}, + {0x1.995c24p+35, 0x1.c9dp+23}, + {0x1.a9629cp+35, 0x1.d2bp+23}, + {0x1.c16a0cp+35, 0x1.dfbp+23}, + {0x1.b165fp+35, 0x1.d71p+23}, + {0x1.614d5cp+35, 0x1.a95p+23}, + {0x1.213854p+35, 0x1.80dp+23}, + {0x1.cd706p+35, 0x1.e61p+23}, + {0x1.514a18p+35, 0x1.9f9p+23}, + {0x1.f17abcp+35, 0x1.f8bp+23}, + {0x1.815b44p+35, 0x1.bc3p+23}, + {0x1.052facp+35, 0x1.6dbp+23}, + {0x1.9d64cp+35, 0x1.cc1p+23}, + {0x1.8d60bp+35, 0x1.c31p+23}, + {0x1.e57a48p+35, 0x1.f29p+23}, + {0x1.bd70f8p+35, 0x1.dd9p+23}, + {0x1.dd7a38p+35, 0x1.ee7p+23}, + {0x1.35492p+35, 0x1.8dfp+23}, + {0x1.d17b84p+35, 0x1.e83p+23}, + {0x1.795f98p+35, 0x1.b79p+23}, + {0x1.3d4dbp+35, 0x1.931p+23}, + {0x1.a1726cp+35, 0x1.ce5p+23}, + {0x1.b97c68p+35, 0x1.db7p+23}, + {0x1.896c54p+35, 0x1.c0dp+23}, + {0x1.5d5e1p+35, 0x1.a6fp+23}, + {0x1.ed8b5cp+35, 0x1.f6bp+23}, + {0x1.555c2p+35, 0x1.a21p+23}, + {0x1.1d49a4p+35, 0x1.7e3p+23}, + {0x1.ad7f3p+35, 0x1.d4fp+23}, + {0x1.d58b2cp+35, 0x1.ea5p+23}, + {0x1.11496p+35, 0x1.761p+23}, + {0x1.0d487cp+35, 0x1.735p+23}, + {0x1.0145dp+35, 0x1.6afp+23}, + {0x1.7d766p+35, 0x1.b9fp+23}, + {0x1.e19728p+35, 0x1.f09p+23}, + {0x1.a58528p+35, 0x1.d09p+23}, + {0x1.b58c5cp+35, 0x1.d95p+23}, + {0x1.396128p+35, 0x1.909p+23}, + {0x1.295b24p+35, 0x1.863p+23}, + {0x1.1551d4p+35, 0x1.78dp+23}, + {0x1.6d7328p+35, 0x1.b09p+23}, + {0x1.fda0b4p+35, 0x1.fedp+23}, + {0x1.6972a4p+35, 0x1.ae3p+23}, + {0x1.857d08p+35, 0x1.be9p+23}, + {0x1.2d5ed4p+35, 0x1.88dp+23}, + {0x1.094f28p+35, 0x1.709p+23}, + {0x1.496c1cp+35, 0x1.9abp+23}, + {0x1.e99ffcp+35, 0x1.f4bp+23}, + {0x1.f9a514p+35, 0x1.fcdp+23}, + {0x1.71795p+35, 0x1.b2fp+23}, + {0x1.255e58p+35, 0x1.839p+23}, + {0x1.597468p+35, 0x1.a49p+23}, + {0x1.456c84p+35, 0x1.983p+23}, + {0x1.6577c4p+35, 0x1.abdp+23}, + {0x1.d99f58p+35, 0x1.ec7p+23}, + {0x1.4d71f4p+35, 0x1.9d3p+23}, + {0x1.316968p+35, 0x1.8b7p+23}, + {0x1.f5ad74p+35, 0x1.fadp+23}, + {0x1.1961d8p+35, 0x1.7b9p+23}, + {0x1.75851cp+35, 0x1.b55p+23}, + {0x1.95918cp+35, 0x1.c7bp+23}, + {0x1.41732cp+35, 0x1.95bp+23}, + {0x1.c5a3ep+35, 0x1.e1fp+23}, + {0x1.9192c8p+35, 0x1.c57p+23}, + {0x1.c9a64p+35, 0x1.e41p+23}, + {0x1.21687p+35, 0x1.80fp+23}, + {0x1.99956p+35, 0x1.c9fp+23}, + {0x1.618288p+35, 0x1.a97p+23}, + {0x1.a99cf4p+35, 0x1.d2dp+23}, + {0x1.c1a604p+35, 0x1.dfdp+23}, + {0x1.b1a0d4p+35, 0x1.d73p+23}, + {0x1.517e0cp+35, 0x1.9fbp+23}, + {0x1.055d64p+35, 0x1.6ddp+23}, + {0x1.cdad24p+35, 0x1.e63p+23}, + {0x1.8192ccp+35, 0x1.bc5p+23}, + {0x1.f1b9d4p+35, 0x1.f8dp+23}, + {0x1.8d9914p+35, 0x1.c33p+23}, + {0x1.9d9e44p+35, 0x1.cc3p+23}, + {0x1.bdacacp+35, 0x1.ddbp+23}, + {0x1.e5b89cp+35, 0x1.f2bp+23}, + {0x1.357aep+35, 0x1.8e1p+23}, + {0x1.ddb808p+35, 0x1.ee9p+23}, + {0x1.3d8014p+35, 0x1.933p+23}, + {0x1.79968cp+35, 0x1.b7bp+23}, + {0x1.d1b88cp+35, 0x1.e85p+23}, + {0x1.5d92fp+35, 0x1.a71p+23}, + {0x1.a1ac38p+35, 0x1.ce7p+23}, + {0x1.559064p+35, 0x1.a23p+23}, + {0x1.89a47p+35, 0x1.c0fp+23}, + {0x1.b9b7d8p+35, 0x1.db9p+23}, + {0x1.1d796cp+35, 0x1.7e5p+23}, + {0x1.edca34p+35, 0x1.f6dp+23}, + {0x1.117824p+35, 0x1.763p+23}, + {0x1.adb9dp+35, 0x1.d51p+23}, + {0x1.0d76e8p+35, 0x1.737p+23}, + {0x1.d5c878p+35, 0x1.ea7p+23}, + {0x1.01733p+35, 0x1.6b1p+23}, + {0x1.1580fp+35, 0x1.78fp+23}, + {0x1.7dadap+35, 0x1.ba1p+23}, + {0x1.298becp+35, 0x1.865p+23}, + {0x1.39933cp+35, 0x1.90bp+23}, + {0x1.2d8ffp+35, 0x1.88fp+23}, + {0x1.e1d53cp+35, 0x1.f0bp+23}, + {0x1.a5bf3cp+35, 0x1.d0bp+23}, + {0x1.b5c788p+35, 0x1.d97p+23}, + {0x1.6da93cp+35, 0x1.b0bp+23}, + {0x1.69a86cp+35, 0x1.ae5p+23}, + {0x1.097d3cp+35, 0x1.70bp+23}, + {0x1.85b4dcp+35, 0x1.bebp+23}, + {0x1.499f74p+35, 0x1.9adp+23}, + {0x1.fde09p+35, 0x1.fefp+23}, + {0x1.258eccp+35, 0x1.83bp+23}, + {0x1.71afbp+35, 0x1.b31p+23}, + {0x1.459f8cp+35, 0x1.985p+23}, + {0x1.59a8fcp+35, 0x1.a4bp+23}, + {0x1.e9de94p+35, 0x1.f4dp+23}, + {0x1.f9e4bp+35, 0x1.fcfp+23}, + {0x1.65ad4p+35, 0x1.abfp+23}, + {0x1.4da59cp+35, 0x1.9d5p+23}, + {0x1.319ad8p+35, 0x1.8b9p+23}, + {0x1.d9dce8p+35, 0x1.ec9p+23}, + {0x1.19914cp+35, 0x1.7bbp+23}, + {0x1.41a5e4p+35, 0x1.95dp+23}, + {0x1.75bbc8p+35, 0x1.b57p+23}, + {0x1.f5ecdp+35, 0x1.fafp+23}, + {0x1.21989p+35, 0x1.811p+23}, + {0x1.95ca84p+35, 0x1.c7dp+23}, + {0x1.61b7b8p+35, 0x1.a99p+23}, + {0x1.c5e02p+35, 0x1.e21p+23}, + {0x1.91cb78p+35, 0x1.c59p+23}, + {0x1.058b2p+35, 0x1.6dfp+23}, + {0x1.99ceap+35, 0x1.ca1p+23}, + {0x1.51b204p+35, 0x1.9fdp+23}, + {0x1.c9e2c4p+35, 0x1.e43p+23}, + {0x1.a9d75p+35, 0x1.d2fp+23}, + {0x1.b1dbbcp+35, 0x1.d75p+23}, + {0x1.c1e2p+35, 0x1.dffp+23}, + {0x1.cde9ecp+35, 0x1.e65p+23}, + {0x1.81ca58p+35, 0x1.bc7p+23}, + {0x1.8dd17cp+35, 0x1.c35p+23}, + {0x1.35aca4p+35, 0x1.8e3p+23}, + {0x1.9dd7ccp+35, 0x1.cc5p+23}, + {0x1.f1f8fp+35, 0x1.f8fp+23}, + {0x1.bde864p+35, 0x1.dddp+23}, + {0x1.3db27cp+35, 0x1.935p+23}, + {0x1.e5f6f4p+35, 0x1.f2dp+23}, + {0x1.ddf5dcp+35, 0x1.eebp+23}, + {0x1.79cd84p+35, 0x1.b7dp+23}, + {0x1.d1f598p+35, 0x1.e87p+23}, + {0x1.5dc7d4p+35, 0x1.a73p+23}, + {0x1.55c4acp+35, 0x1.a25p+23}, + {0x1.1da938p+35, 0x1.7e7p+23}, + {0x1.a1e608p+35, 0x1.ce9p+23}, + {0x1.89dc9p+35, 0x1.c11p+23}, + {0x1.b9f34cp+35, 0x1.dbbp+23}, + {0x1.11a6ecp+35, 0x1.765p+23}, + {0x1.ee091p+35, 0x1.f6fp+23}, + {0x1.0da558p+35, 0x1.739p+23}, + {0x1.01a094p+35, 0x1.6b3p+23}, + {0x1.adf474p+35, 0x1.d53p+23}, + {0x1.d605c8p+35, 0x1.ea9p+23}, + {0x1.15b01p+35, 0x1.791p+23}, + {0x1.29bcb8p+35, 0x1.867p+23}, + {0x1.39c554p+35, 0x1.90dp+23}, + {0x1.2dc11p+35, 0x1.891p+23}, + {0x1.7de4e4p+35, 0x1.ba3p+23}, + {0x1.09ab54p+35, 0x1.70dp+23}, + {0x1.69de38p+35, 0x1.ae7p+23}, + {0x1.6ddf54p+35, 0x1.b0dp+23}, + {0x1.a5f954p+35, 0x1.d0dp+23}, + {0x1.b602b8p+35, 0x1.d99p+23}, + {0x1.49d2dp+35, 0x1.9afp+23}, + {0x1.85ecb4p+35, 0x1.bedp+23}, + {0x1.25bf44p+35, 0x1.83dp+23}, + {0x1.e21354p+35, 0x1.f0dp+23}, + {0x1.45d298p+35, 0x1.987p+23}, + {0x1.71e614p+35, 0x1.b33p+23}, + {0x1.59dd94p+35, 0x1.a4dp+23}, + {0x1.fe207p+35, 0x1.ff1p+23}, + {0x1.4dd948p+35, 0x1.9d7p+23}, + {0x1.65e2cp+35, 0x1.ac1p+23}, + {0x1.31cc4cp+35, 0x1.8bbp+23}, + {0x1.ea1d3p+35, 0x1.f4fp+23}, + {0x1.19c0c4p+35, 0x1.7bdp+23}, + {0x1.fa245p+35, 0x1.fd1p+23}, + {0x1.da1a7cp+35, 0x1.ecbp+23}, + {0x1.41d8ap+35, 0x1.95fp+23}, + {0x1.21c8b4p+35, 0x1.813p+23}, + {0x1.75f278p+35, 0x1.b59p+23}, + {0x1.05b8ep+35, 0x1.6e1p+23}, + {0x1.96038p+35, 0x1.c7fp+23}, + {0x1.61ececp+35, 0x1.a9bp+23}, + {0x1.f62c3p+35, 0x1.fb1p+23}, + {0x1.51e6p+35, 0x1.9ffp+23}, + {0x1.92042cp+35, 0x1.c5bp+23}, + {0x1.c61c64p+35, 0x1.e23p+23}, + {0x1.9a07e4p+35, 0x1.ca3p+23}, + {0x1.aa11bp+35, 0x1.d31p+23}, + {0x1.ca1f4cp+35, 0x1.e45p+23}, + {0x1.b216a8p+35, 0x1.d77p+23}, + {0x1.c21ep+35, 0x1.e01p+23}, + {0x1.35de6cp+35, 0x1.8e5p+23}, + {0x1.8201e8p+35, 0x1.bc9p+23}, + {0x1.ce26b8p+35, 0x1.e67p+23}, + {0x1.8e09e8p+35, 0x1.c37p+23}, + {0x1.3de4e8p+35, 0x1.937p+23}, + {0x1.9e1158p+35, 0x1.cc7p+23}, + {0x1.f2381p+35, 0x1.f91p+23}, + {0x1.be242p+35, 0x1.ddfp+23}, + {0x1.7a048p+35, 0x1.b7fp+23}, + {0x1.e6355p+35, 0x1.f2fp+23}, + {0x1.de33b4p+35, 0x1.eedp+23}, + {0x1.1dd908p+35, 0x1.7e9p+23}, + {0x1.d232a8p+35, 0x1.e89p+23}, + {0x1.55f8f8p+35, 0x1.a27p+23}, + {0x1.5dfcbcp+35, 0x1.a75p+23}, + {0x1.11d5b8p+35, 0x1.767p+23}, + {0x1.8a14b4p+35, 0x1.c13p+23}, + {0x1.a21fdcp+35, 0x1.cebp+23}, + {0x1.0dd3ccp+35, 0x1.73bp+23}, + {0x1.ba2ec4p+35, 0x1.dbdp+23}, + {0x1.01cdfcp+35, 0x1.6b5p+23}, + {0x1.ee47fp+35, 0x1.f71p+23}, + {0x1.15df34p+35, 0x1.793p+23}, + {0x1.ae2f1cp+35, 0x1.d55p+23}, + {0x1.29ed88p+35, 0x1.869p+23}, + {0x1.39f77p+35, 0x1.90fp+23}, + {0x1.d6431cp+35, 0x1.eabp+23}, + {0x1.09d97p+35, 0x1.70fp+23}, + {0x1.2df234p+35, 0x1.893p+23}, + {0x1.7e1c2cp+35, 0x1.ba5p+23}, + {0x1.25efcp+35, 0x1.83fp+23}, + {0x1.6a1408p+35, 0x1.ae9p+23}, + {0x1.4a063p+35, 0x1.9b1p+23}, + {0x1.6e157p+35, 0x1.b0fp+23}, + {0x1.a6337p+35, 0x1.d0fp+23}, + {0x1.86249p+35, 0x1.befp+23}, + {0x1.b63decp+35, 0x1.d9bp+23}, + {0x1.4605a8p+35, 0x1.989p+23}, + {0x1.e2517p+35, 0x1.f0fp+23}, + {0x1.5a123p+35, 0x1.a4fp+23}, + {0x1.721c7cp+35, 0x1.b35p+23}, + {0x1.31fdc4p+35, 0x1.8bdp+23}, + {0x1.4e0cf8p+35, 0x1.9d9p+23}, + {0x1.661844p+35, 0x1.ac3p+23}, + {0x1.19f04p+35, 0x1.7bfp+23}, + {0x1.fe6054p+35, 0x1.ff3p+23}, + {0x1.ea5bdp+35, 0x1.f51p+23}, + {0x1.21f8dcp+35, 0x1.815p+23}, + {0x1.fa63f4p+35, 0x1.fd3p+23}, + {0x1.420b6p+35, 0x1.961p+23}, + {0x1.05e6a4p+35, 0x1.6e3p+23}, + {0x1.da5814p+35, 0x1.ecdp+23}, + {0x1.76292cp+35, 0x1.b5bp+23}, + {0x1.622224p+35, 0x1.a9dp+23}, + {0x1.963c8p+35, 0x1.c81p+23}, + {0x1.521ap+35, 0x1.a01p+23}, + {0x1.923ce4p+35, 0x1.c5dp+23}, + {0x1.f66b94p+35, 0x1.fb3p+23}, + {0x1.9a412cp+35, 0x1.ca5p+23}, + {0x1.c658acp+35, 0x1.e25p+23}, + {0x1.aa4c14p+35, 0x1.d33p+23}, + {0x1.361038p+35, 0x1.8e7p+23}, + {0x1.ca5bd8p+35, 0x1.e47p+23}, + {0x1.b25198p+35, 0x1.d79p+23}, + {0x1.c25a04p+35, 0x1.e03p+23}, + {0x1.82397cp+35, 0x1.bcbp+23}, + {0x1.3e1758p+35, 0x1.939p+23}, + {0x1.8e4258p+35, 0x1.c39p+23}, + {0x1.ce6388p+35, 0x1.e69p+23}, + {0x1.9e4ae8p+35, 0x1.cc9p+23}, + {0x1.7a3b8p+35, 0x1.b81p+23}, + {0x1.be5fep+35, 0x1.de1p+23}, + {0x1.f27734p+35, 0x1.f93p+23}, + {0x1.1e08dcp+35, 0x1.7ebp+23}, + {0x1.de719p+35, 0x1.eefp+23}, + {0x1.e673bp+35, 0x1.f31p+23}, + {0x1.120488p+35, 0x1.769p+23}, + {0x1.562d48p+35, 0x1.a29p+23}, + {0x1.5e31a8p+35, 0x1.a77p+23}, + {0x1.d26fbcp+35, 0x1.e8bp+23}, + {0x1.0e0244p+35, 0x1.73dp+23}, + {0x1.01fb68p+35, 0x1.6b7p+23}, + {0x1.8a4cdcp+35, 0x1.c15p+23}, + {0x1.a259b4p+35, 0x1.cedp+23}, + {0x1.ba6a4p+35, 0x1.dbfp+23}, + {0x1.160e5cp+35, 0x1.795p+23}, + {0x1.2a1e5cp+35, 0x1.86bp+23}, + {0x1.ee86d4p+35, 0x1.f73p+23}, + {0x1.0a079p+35, 0x1.711p+23}, + {0x1.3a299p+35, 0x1.911p+23}, + {0x1.2e235cp+35, 0x1.895p+23}, + {0x1.ae69c8p+35, 0x1.d57p+23}, + {0x1.d68074p+35, 0x1.eadp+23}, + {0x1.26204p+35, 0x1.841p+23}, + {0x1.7e5378p+35, 0x1.ba7p+23}, + {0x1.4a3994p+35, 0x1.9b3p+23}, + {0x1.6a49dcp+35, 0x1.aebp+23}, + {0x1.6e4b9p+35, 0x1.b11p+23}, + {0x1.865c7p+35, 0x1.bf1p+23}, + {0x1.4638bcp+35, 0x1.98bp+23}, + {0x1.a66d9p+35, 0x1.d11p+23}, + {0x1.b67924p+35, 0x1.d9dp+23}, + {0x1.322f4p+35, 0x1.8bfp+23}, + {0x1.5a46dp+35, 0x1.a51p+23}, + {0x1.1a1fcp+35, 0x1.7c1p+23}, + {0x1.4e40acp+35, 0x1.9dbp+23}, + {0x1.7252e8p+35, 0x1.b37p+23}, + {0x1.e28f9p+35, 0x1.f11p+23}, + {0x1.664dccp+35, 0x1.ac5p+23}, + {0x1.222908p+35, 0x1.817p+23}, + {0x1.06146cp+35, 0x1.6e5p+23}, + {0x1.423e24p+35, 0x1.963p+23}, + {0x1.fea03cp+35, 0x1.ff5p+23}, + {0x1.ea9a74p+35, 0x1.f53p+23}, + {0x1.faa39cp+35, 0x1.fd5p+23}, + {0x1.765fe4p+35, 0x1.b5dp+23}, + {0x1.da95bp+35, 0x1.ecfp+23}, + {0x1.524e04p+35, 0x1.a03p+23}, + {0x1.62576p+35, 0x1.a9fp+23}, + {0x1.967584p+35, 0x1.c83p+23}, + {0x1.9275ap+35, 0x1.c5fp+23}, + {0x1.364208p+35, 0x1.8e9p+23}, + {0x1.9a7a78p+35, 0x1.ca7p+23}, + {0x1.c694f8p+35, 0x1.e27p+23}, + {0x1.f6aafcp+35, 0x1.fb5p+23}, + {0x1.aa867cp+35, 0x1.d35p+23}, + {0x1.3e49ccp+35, 0x1.93bp+23}, + {0x1.b28c8cp+35, 0x1.d7bp+23}, + {0x1.ca9868p+35, 0x1.e49p+23}, + {0x1.c2960cp+35, 0x1.e05p+23}, + {0x1.827114p+35, 0x1.bcdp+23}, + {0x1.8e7accp+35, 0x1.c3bp+23}, + {0x1.9e847cp+35, 0x1.ccbp+23}, + {0x1.cea05cp+35, 0x1.e6bp+23}, + {0x1.1e38b4p+35, 0x1.7edp+23}, + {0x1.7a7284p+35, 0x1.b83p+23}, + {0x1.be9ba4p+35, 0x1.de3p+23}, + {0x1.12335cp+35, 0x1.76bp+23}, + {0x1.f2b65cp+35, 0x1.f95p+23}, + {0x1.0e30cp+35, 0x1.73fp+23}, + {0x1.56619cp+35, 0x1.a2bp+23}, + {0x1.5e6698p+35, 0x1.a79p+23}, + {0x1.deaf7p+35, 0x1.ef1p+23}, + {0x1.0228d8p+35, 0x1.6b9p+23}, + {0x1.e6b214p+35, 0x1.f33p+23}, + {0x1.d2acd4p+35, 0x1.e8dp+23}, + {0x1.8a8508p+35, 0x1.c17p+23}, + {0x1.163d88p+35, 0x1.797p+23}, + {0x1.a2939p+35, 0x1.cefp+23}, + {0x1.2a4f34p+35, 0x1.86dp+23}, + {0x1.baa5cp+35, 0x1.dc1p+23}, + {0x1.0a35b4p+35, 0x1.713p+23}, + {0x1.3a5bb4p+35, 0x1.913p+23}, + {0x1.2e5488p+35, 0x1.897p+23}, + {0x1.aea478p+35, 0x1.d59p+23}, + {0x1.eec5bcp+35, 0x1.f75p+23}, + {0x1.2650c4p+35, 0x1.843p+23}, + {0x1.4a6cfcp+35, 0x1.9b5p+23}, + {0x1.7e8ac8p+35, 0x1.ba9p+23}, + {0x1.d6bddp+35, 0x1.eafp+23}, + {0x1.6a7fb4p+35, 0x1.aedp+23}, + {0x1.6e81b4p+35, 0x1.b13p+23}, + {0x1.466bd4p+35, 0x1.98dp+23}, + {0x1.3260cp+35, 0x1.8c1p+23}, + {0x1.1a4f44p+35, 0x1.7c3p+23}, + {0x1.869454p+35, 0x1.bf3p+23}, + {0x1.5a7b74p+35, 0x1.a53p+23}, + {0x1.a6a7b4p+35, 0x1.d13p+23}, + {0x1.4e7464p+35, 0x1.9ddp+23}, + {0x1.b6b46p+35, 0x1.d9fp+23}, + {0x1.728958p+35, 0x1.b39p+23}, + {0x1.668358p+35, 0x1.ac7p+23}, + {0x1.225938p+35, 0x1.819p+23}, + {0x1.064238p+35, 0x1.6e7p+23}, + {0x1.e2cdb4p+35, 0x1.f13p+23}, + {0x1.4270ecp+35, 0x1.965p+23}, + {0x1.fee028p+35, 0x1.ff7p+23}, + {0x1.7696ap+35, 0x1.b5fp+23}, + {0x1.ead91cp+35, 0x1.f55p+23}, + {0x1.52820cp+35, 0x1.a05p+23}, + {0x1.628cap+35, 0x1.aa1p+23}, + {0x1.fae348p+35, 0x1.fd7p+23}, + {0x1.dad35p+35, 0x1.ed1p+23}, + {0x1.96ae8cp+35, 0x1.c85p+23}, + {0x1.3673dcp+35, 0x1.8ebp+23}, + {0x1.92ae6p+35, 0x1.c61p+23}, + {0x1.3e7c44p+35, 0x1.93dp+23}, + {0x1.9ab3c8p+35, 0x1.ca9p+23}, + {0x1.aac0e8p+35, 0x1.d37p+23}, + {0x1.c6d148p+35, 0x1.e29p+23}, + {0x1.b2c784p+35, 0x1.d7dp+23}, + {0x1.f6ea68p+35, 0x1.fb7p+23}, + {0x1.82a8bp+35, 0x1.bcfp+23}, + {0x1.cad4fcp+35, 0x1.e4bp+23}, + {0x1.c2d218p+35, 0x1.e07p+23}, + {0x1.1e689p+35, 0x1.7efp+23}, + {0x1.8eb344p+35, 0x1.c3dp+23}, + {0x1.126234p+35, 0x1.76dp+23}, + {0x1.9ebe14p+35, 0x1.ccdp+23}, + {0x1.cedd34p+35, 0x1.e6dp+23}, + {0x1.7aa98cp+35, 0x1.b85p+23}, + {0x1.0e5f4p+35, 0x1.741p+23}, + {0x1.02564cp+35, 0x1.6bbp+23}, + {0x1.5695f4p+35, 0x1.a2dp+23}, + {0x1.bed76cp+35, 0x1.de5p+23}, + {0x1.5e9b8cp+35, 0x1.a7bp+23}, + {0x1.f2f588p+35, 0x1.f97p+23}, + {0x1.deed54p+35, 0x1.ef3p+23}, + {0x1.e6f07cp+35, 0x1.f35p+23}, + {0x1.166cb8p+35, 0x1.799p+23}, + {0x1.d2e9fp+35, 0x1.e8fp+23}, + {0x1.8abd38p+35, 0x1.c19p+23}, + {0x1.2a801p+35, 0x1.86fp+23}, + {0x1.0a63dcp+35, 0x1.715p+23}, + {0x1.a2cd7p+35, 0x1.cf1p+23}, + {0x1.2e85b8p+35, 0x1.899p+23}, + {0x1.3a8ddcp+35, 0x1.915p+23}, + {0x1.bae144p+35, 0x1.dc3p+23}, + {0x1.26814cp+35, 0x1.845p+23}, + {0x1.4aa068p+35, 0x1.9b7p+23}, + {0x1.aedf2cp+35, 0x1.d5bp+23}, + {0x1.ef04a8p+35, 0x1.f77p+23}, + {0x1.7ec21cp+35, 0x1.babp+23}, + {0x1.6ab59p+35, 0x1.aefp+23}, + {0x1.329244p+35, 0x1.8c3p+23}, + {0x1.6eb7dcp+35, 0x1.b15p+23}, + {0x1.469efp+35, 0x1.98fp+23}, + {0x1.1a7eccp+35, 0x1.7c5p+23}, + {0x1.d6fb3p+35, 0x1.eb1p+23}, + {0x1.5ab01cp+35, 0x1.a55p+23}, + {0x1.4ea82p+35, 0x1.9dfp+23}, + {0x1.86cc3cp+35, 0x1.bf5p+23}, + {0x1.a6e1dcp+35, 0x1.d15p+23}, + {0x1.22896cp+35, 0x1.81bp+23}, + {0x1.067008p+35, 0x1.6e9p+23}, + {0x1.b6efap+35, 0x1.da1p+23}, + {0x1.72bfccp+35, 0x1.b3bp+23}, + {0x1.66b8e8p+35, 0x1.ac9p+23}, + {0x1.42a3b8p+35, 0x1.967p+23}, + {0x1.e30bdcp+35, 0x1.f15p+23}, + {0x1.52b618p+35, 0x1.a07p+23}, + {0x1.76cd6p+35, 0x1.b61p+23}, + {0x1.62c1e4p+35, 0x1.aa3p+23}, + {0x1.ff2018p+35, 0x1.ff9p+23}, + {0x1.eb17c8p+35, 0x1.f57p+23}, + {0x1.36a5b4p+35, 0x1.8edp+23}, + {0x1.96e798p+35, 0x1.c87p+23}, + {0x1.fb22f8p+35, 0x1.fd9p+23}, + {0x1.db10f4p+35, 0x1.ed3p+23}, + {0x1.3eaecp+35, 0x1.93fp+23}, + {0x1.92e724p+35, 0x1.c63p+23}, + {0x1.9aed1cp+35, 0x1.cabp+23}, + {0x1.aafb58p+35, 0x1.d39p+23}, + {0x1.1e987p+35, 0x1.7f1p+23}, + {0x1.c70d9cp+35, 0x1.e2bp+23}, + {0x1.82e05p+35, 0x1.bd1p+23}, + {0x1.b3028p+35, 0x1.d7fp+23}, + {0x1.12911p+35, 0x1.76fp+23}, + {0x1.cb1194p+35, 0x1.e4dp+23}, + {0x1.f729d8p+35, 0x1.fb9p+23}, + {0x1.c30e28p+35, 0x1.e09p+23}, + {0x1.8eebcp+35, 0x1.c3fp+23}, + {0x1.0e8dc4p+35, 0x1.743p+23}, + {0x1.0283c4p+35, 0x1.6bdp+23}, + {0x1.7ae098p+35, 0x1.b87p+23}, + {0x1.9ef7bp+35, 0x1.ccfp+23}, + {0x1.cf1a1p+35, 0x1.e6fp+23}, + {0x1.56ca5p+35, 0x1.a2fp+23}, + {0x1.5ed084p+35, 0x1.a7dp+23}, + {0x1.bf1338p+35, 0x1.de7p+23}, + {0x1.169becp+35, 0x1.79bp+23}, + {0x1.0a9208p+35, 0x1.717p+23}, + {0x1.2ab0fp+35, 0x1.871p+23}, + {0x1.f334b8p+35, 0x1.f99p+23}, + {0x1.df2b3cp+35, 0x1.ef5p+23}, + {0x1.e72ee8p+35, 0x1.f37p+23}, + {0x1.8af56cp+35, 0x1.c1bp+23}, + {0x1.2eb6ecp+35, 0x1.89bp+23}, + {0x1.d3271p+35, 0x1.e91p+23}, + {0x1.3ac008p+35, 0x1.917p+23}, + {0x1.a30754p+35, 0x1.cf3p+23}, + {0x1.26b1d8p+35, 0x1.847p+23}, + {0x1.bb1cccp+35, 0x1.dc5p+23}, + {0x1.4ad3d8p+35, 0x1.9b9p+23}, + {0x1.af19e4p+35, 0x1.d5dp+23}, + {0x1.1aae58p+35, 0x1.7c7p+23}, + {0x1.32c3ccp+35, 0x1.8c5p+23}, + {0x1.6aeb7p+35, 0x1.af1p+23}, + {0x1.46d21p+35, 0x1.991p+23}, + {0x1.7ef974p+35, 0x1.badp+23}, + {0x1.ef4398p+35, 0x1.f79p+23}, + {0x1.6eee08p+35, 0x1.b17p+23}, + {0x1.5ae4c8p+35, 0x1.a57p+23}, + {0x1.4edbep+35, 0x1.9e1p+23}, + {0x1.d73894p+35, 0x1.eb3p+23}, + {0x1.069ddcp+35, 0x1.6ebp+23}, + {0x1.22b9a4p+35, 0x1.81dp+23}, + {0x1.870428p+35, 0x1.bf7p+23}, + {0x1.72f644p+35, 0x1.b3dp+23}, + {0x1.a71c08p+35, 0x1.d17p+23}, + {0x1.66ee7cp+35, 0x1.acbp+23}, + {0x1.b72ae4p+35, 0x1.da3p+23}, + {0x1.42d688p+35, 0x1.969p+23}, + {0x1.e34a08p+35, 0x1.f17p+23}, + {0x1.52ea28p+35, 0x1.a09p+23}, + {0x1.62f72cp+35, 0x1.aa5p+23}, + {0x1.770424p+35, 0x1.b63p+23}, + {0x1.36d79p+35, 0x1.8efp+23}, + {0x1.ff600cp+35, 0x1.ffbp+23}, + {0x1.3ee14p+35, 0x1.941p+23}, + {0x1.eb5678p+35, 0x1.f59p+23}, + {0x1.9720a8p+35, 0x1.c89p+23}, + {0x1.931fecp+35, 0x1.c65p+23}, + {0x1.db4e9cp+35, 0x1.ed5p+23}, + {0x1.fb62acp+35, 0x1.fdbp+23}, + {0x1.1ec854p+35, 0x1.7f3p+23}, + {0x1.12bffp+35, 0x1.771p+23}, + {0x1.9b2674p+35, 0x1.cadp+23}, + {0x1.0ebc4cp+35, 0x1.745p+23}, + {0x1.8317f4p+35, 0x1.bd3p+23}, + {0x1.ab35ccp+35, 0x1.d3bp+23}, + {0x1.b33d8p+35, 0x1.d81p+23}, + {0x1.02b14p+35, 0x1.6bfp+23}, + {0x1.c749f4p+35, 0x1.e2dp+23}, + {0x1.8f244p+35, 0x1.c41p+23}, + {0x1.c34a3cp+35, 0x1.e0bp+23}, + {0x1.cb4e3p+35, 0x1.e4fp+23}, + {0x1.f7694cp+35, 0x1.fbbp+23}, + {0x1.7b17a8p+35, 0x1.b89p+23}, + {0x1.56febp+35, 0x1.a31p+23}, + {0x1.9f315p+35, 0x1.cd1p+23}, + {0x1.5f058p+35, 0x1.a7fp+23}, + {0x1.16cb24p+35, 0x1.79dp+23}, + {0x1.cf56fp+35, 0x1.e71p+23}, + {0x1.0ac038p+35, 0x1.719p+23}, + {0x1.bf4f08p+35, 0x1.de9p+23}, + {0x1.2ae1d4p+35, 0x1.873p+23}, + {0x1.2ee824p+35, 0x1.89dp+23}, + {0x1.3af238p+35, 0x1.919p+23}, + {0x1.f373ecp+35, 0x1.f9bp+23}, + {0x1.8b2da4p+35, 0x1.c1dp+23}, + {0x1.df6928p+35, 0x1.ef7p+23}, + {0x1.26e268p+35, 0x1.849p+23}, + {0x1.e76d58p+35, 0x1.f39p+23}, + {0x1.d36434p+35, 0x1.e93p+23}, + {0x1.a3413cp+35, 0x1.cf5p+23}, + {0x1.4b074cp+35, 0x1.9bbp+23}, + {0x1.bb5858p+35, 0x1.dc7p+23}, + {0x1.1adde8p+35, 0x1.7c9p+23}, + {0x1.32f558p+35, 0x1.8c7p+23}, + {0x1.470534p+35, 0x1.993p+23}, + {0x1.af54ap+35, 0x1.d5fp+23}, + {0x1.6b2154p+35, 0x1.af3p+23}, + {0x1.7f30dp+35, 0x1.bafp+23}, + {0x1.06cbb4p+35, 0x1.6edp+23}, + {0x1.6f2438p+35, 0x1.b19p+23}, + {0x1.22e9ep+35, 0x1.81fp+23}, + {0x1.5b1978p+35, 0x1.a59p+23}, + {0x1.4f0fa4p+35, 0x1.9e3p+23}, + {0x1.ef828cp+35, 0x1.f7bp+23}, + {0x1.873c18p+35, 0x1.bf9p+23}, + {0x1.d775fcp+35, 0x1.eb5p+23}, + {0x1.732ccp+35, 0x1.b3fp+23}, + {0x1.672414p+35, 0x1.acdp+23}, + {0x1.43095cp+35, 0x1.96bp+23}, + {0x1.a75638p+35, 0x1.d19p+23}, + {0x1.b7662cp+35, 0x1.da5p+23}, + {0x1.531e3cp+35, 0x1.a0bp+23}, + {0x1.37097p+35, 0x1.8f1p+23}, + {0x1.e38838p+35, 0x1.f19p+23}, + {0x1.632c78p+35, 0x1.aa7p+23}, + {0x1.773aecp+35, 0x1.b65p+23}, + {0x1.3f13c4p+35, 0x1.943p+23}, + {0x1.9759bcp+35, 0x1.c8bp+23}, + {0x1.1ef83cp+35, 0x1.7f5p+23}, + {0x1.12eed4p+35, 0x1.773p+23}, + {0x1.eb952cp+35, 0x1.f5bp+23}, + {0x1.ffa004p+35, 0x1.ffdp+23}, + {0x1.9358b8p+35, 0x1.c67p+23}, + {0x1.0eead8p+35, 0x1.747p+23}, + {0x1.db8c48p+35, 0x1.ed7p+23}, + {0x1.02decp+35, 0x1.6c1p+23}, + {0x1.fba264p+35, 0x1.fddp+23}, + {0x1.9b5fdp+35, 0x1.cafp+23}, + {0x1.834f9cp+35, 0x1.bd5p+23}, + {0x1.ab7044p+35, 0x1.d3dp+23}, + {0x1.b37884p+35, 0x1.d83p+23}, + {0x1.c7865p+35, 0x1.e2fp+23}, + {0x1.8f5cc4p+35, 0x1.c43p+23}, + {0x1.c38654p+35, 0x1.e0dp+23}, + {0x1.7b4ebcp+35, 0x1.b8bp+23}, + {0x1.16fa6p+35, 0x1.79fp+23}, + {0x1.cb8adp+35, 0x1.e51p+23}, + {0x1.573314p+35, 0x1.a33p+23}, + {0x1.5f3a8p+35, 0x1.a81p+23}, + {0x1.0aee6cp+35, 0x1.71bp+23}, + {0x1.f7a8c4p+35, 0x1.fbdp+23}, + {0x1.9f6af4p+35, 0x1.cd3p+23}, + {0x1.2b12bcp+35, 0x1.875p+23}, + {0x1.cf93d4p+35, 0x1.e73p+23}, + {0x1.2f196p+35, 0x1.89fp+23}, + {0x1.bf8adcp+35, 0x1.debp+23}, + {0x1.3b246cp+35, 0x1.91bp+23}, + {0x1.2712fcp+35, 0x1.84bp+23}, + {0x1.8b65ep+35, 0x1.c1fp+23}, + {0x1.dfa718p+35, 0x1.ef9p+23}, + {0x1.f3b324p+35, 0x1.f9dp+23}, + {0x1.a37b28p+35, 0x1.cf7p+23}, + {0x1.d3a15cp+35, 0x1.e95p+23}, + {0x1.e7abccp+35, 0x1.f3bp+23}, + {0x1.4b3ac4p+35, 0x1.9bdp+23}, + {0x1.1b0d7cp+35, 0x1.7cbp+23}, + {0x1.3326e8p+35, 0x1.8c9p+23}, + {0x1.bb93e8p+35, 0x1.dc9p+23}, + {0x1.47385cp+35, 0x1.995p+23}, + {0x1.06f99p+35, 0x1.6efp+23}, + {0x1.6b573cp+35, 0x1.af5p+23}, + {0x1.231a2p+35, 0x1.821p+23}, + {0x1.af8f6p+35, 0x1.d61p+23}, + {0x1.7f683p+35, 0x1.bb1p+23}, + {0x1.6f5a6cp+35, 0x1.b1bp+23}, + {0x1.4f436cp+35, 0x1.9e5p+23}, + {0x1.5b4e2cp+35, 0x1.a5bp+23}, + {0x1.efc184p+35, 0x1.f7dp+23}, + {0x1.87740cp+35, 0x1.bfbp+23}, + {0x1.73634p+35, 0x1.b41p+23}, + {0x1.433c34p+35, 0x1.96dp+23}, + {0x1.6759bp+35, 0x1.acfp+23}, + {0x1.d7b368p+35, 0x1.eb7p+23}, + {0x1.a7906cp+35, 0x1.d1bp+23}, + {0x1.b7a178p+35, 0x1.da7p+23}, + {0x1.535254p+35, 0x1.a0dp+23}, + {0x1.373b54p+35, 0x1.8f3p+23}, + {0x1.6361c8p+35, 0x1.aa9p+23}, + {0x1.7771b8p+35, 0x1.b67p+23}, + {0x1.e3c66cp+35, 0x1.f1bp+23}, + {0x1.3f464cp+35, 0x1.945p+23}, + {0x1.1f2828p+35, 0x1.7f7p+23}, + {0x1.131dbcp+35, 0x1.775p+23}, + {0x1.0f1968p+35, 0x1.749p+23}, + {0x1.9792d4p+35, 0x1.c8dp+23}, + {0x1.030c44p+35, 0x1.6c3p+23}, + {0x1.939188p+35, 0x1.c69p+23}, + {0x1.ebd3e4p+35, 0x1.f5dp+23}, + {0x1.ffep+35, 0x1.fffp+23}, + {0x1.9b993p+35, 0x1.cb1p+23}, + {0x1.dbc9f8p+35, 0x1.ed9p+23}, + {0x1.838748p+35, 0x1.bd7p+23}, + {0x1.fbe22p+35, 0x1.fdfp+23}, + {0x1.abaacp+35, 0x1.d3fp+23}, + {0x1.1729ap+35, 0x1.7a1p+23}, + {0x1.b3b38cp+35, 0x1.d85p+23}, + {0x1.8f954cp+35, 0x1.c45p+23}, + {0x1.0b1ca4p+35, 0x1.71dp+23}, + {0x1.c7c2bp+35, 0x1.e31p+23}, + {0x1.57677cp+35, 0x1.a35p+23}, + {0x1.7b85d4p+35, 0x1.b8dp+23}, + {0x1.5f6f84p+35, 0x1.a83p+23}, + {0x1.c3c27p+35, 0x1.e0fp+23}, + {0x1.2b43a8p+35, 0x1.877p+23}, + {0x1.cbc774p+35, 0x1.e53p+23}, + {0x1.9fa49cp+35, 0x1.cd5p+23}, + {0x1.2f4aap+35, 0x1.8a1p+23}, + {0x1.f7e84p+35, 0x1.fbfp+23}, + {0x1.3b56a4p+35, 0x1.91dp+23}, + {0x1.cfd0bcp+35, 0x1.e75p+23}, + {0x1.274394p+35, 0x1.84dp+23}, + {0x1.bfc6b4p+35, 0x1.dedp+23}, + {0x1.8b9e2p+35, 0x1.c21p+23}, + {0x1.1b3d14p+35, 0x1.7cdp+23}, + {0x1.4b6e4p+35, 0x1.9bfp+23}, + {0x1.dfe50cp+35, 0x1.efbp+23}, + {0x1.a3b518p+35, 0x1.cf9p+23}, + {0x1.33587cp+35, 0x1.8cbp+23}, + {0x1.f3f26p+35, 0x1.f9fp+23}, + {0x1.d3de88p+35, 0x1.e97p+23}, + {0x1.e7ea44p+35, 0x1.f3dp+23}, + {0x1.07277p+35, 0x1.6f1p+23}, + {0x1.476b88p+35, 0x1.997p+23}, + {0x1.bbcf7cp+35, 0x1.dcbp+23}, + {0x1.234a64p+35, 0x1.823p+23}, + {0x1.6b8d28p+35, 0x1.af7p+23}, + {0x1.4f7738p+35, 0x1.9e7p+23}, + {0x1.5b82e4p+35, 0x1.a5dp+23}, + {0x1.6f90a4p+35, 0x1.b1dp+23}, + {0x1.7f9f94p+35, 0x1.bb3p+23}, + {0x1.afca24p+35, 0x1.d63p+23}, + {0x1.436f1p+35, 0x1.96fp+23}, + {0x1.87ac04p+35, 0x1.bfdp+23}, + {0x1.a7caa4p+35, 0x1.d1dp+23}, + {0x1.7399c4p+35, 0x1.b43p+23}, + {0x1.678f5p+35, 0x1.ad1p+23}, + {0x1.d7f0d8p+35, 0x1.eb9p+23}, + {0x1.b7dcc8p+35, 0x1.da9p+23}, + {0x1.376d3cp+35, 0x1.8f5p+23}, + {0x1.53867p+35, 0x1.a0fp+23}, + {0x1.63971cp+35, 0x1.aabp+23}, + {0x1.3f78d8p+35, 0x1.947p+23}, + {0x1.77a888p+35, 0x1.b69p+23}, + {0x1.134ca8p+35, 0x1.777p+23}, + {0x1.1f5818p+35, 0x1.7f9p+23}, + {0x1.b3ee98p+35, 0x1.d87p+23}, + {0x1.0f47fcp+35, 0x1.74bp+23}, + {0x1.0339ccp+35, 0x1.6c5p+23}, + {0x1.9bd294p+35, 0x1.cb3p+23}, + {0x1.97cbfp+35, 0x1.c8fp+23}, + {0x1.abe54p+35, 0x1.d41p+23}, + {0x1.93ca5cp+35, 0x1.c6bp+23}, + {0x1.83bef8p+35, 0x1.bd9p+23}, + {0x1.8fcdd8p+35, 0x1.c47p+23}, + {0x1.1758e4p+35, 0x1.7a3p+23}, + {0x1.0b4aep+35, 0x1.71fp+23}, + {0x1.c7ff14p+35, 0x1.e33p+23}, + {0x1.1b6cbp+35, 0x1.7cfp+23}, + {0x1.579be8p+35, 0x1.a37p+23}, + {0x1.2b7498p+35, 0x1.879p+23}, + {0x1.7bbcfp+35, 0x1.b8fp+23}, + {0x1.5fa48cp+35, 0x1.a85p+23}, + {0x1.a3ef0cp+35, 0x1.cfbp+23}, + {0x1.2f7be4p+35, 0x1.8a3p+23}, + {0x1.9fde48p+35, 0x1.cd7p+23}, + {0x1.c3fe9p+35, 0x1.e11p+23}, + {0x1.5bb7ap+35, 0x1.a5fp+23}, + {0x1.8bd664p+35, 0x1.c23p+23}, + {0x1.3b88ep+35, 0x1.91fp+23}, + {0x1.4ba1cp+35, 0x1.9c1p+23}, + {0x1.27743p+35, 0x1.84fp+23}, + {0x1.075554p+35, 0x1.6f3p+23}, + {0x1.87e4p+35, 0x1.bffp+23}, + {0x1.237aacp+35, 0x1.825p+23}, + {0x1.4fab08p+35, 0x1.9e9p+23}, + {0x1.338a14p+35, 0x1.8cdp+23}, + {0x1.6bc318p+35, 0x1.af9p+23}, + {0x1.73d04cp+35, 0x1.b45p+23}, + {0x1.0f7694p+35, 0x1.74dp+23}, + {0x1.479eb8p+35, 0x1.999p+23}, + {0x1.67c4f4p+35, 0x1.ad3p+23}, + {0x1.6fc6ep+35, 0x1.b1fp+23}, + {0x1.7fd6fcp+35, 0x1.bb5p+23}, + {0x1.137b98p+35, 0x1.779p+23}, + {0x1.53ba9p+35, 0x1.a11p+23}, + {0x1.83f6acp+35, 0x1.bdbp+23}, + {0x1.379f28p+35, 0x1.8f7p+23}, + {0x1.43a1fp+35, 0x1.971p+23}, + {0x1.1b9c5p+35, 0x1.7d1p+23}, + {0x1.0b792p+35, 0x1.721p+23}, + {0x1.2ba58cp+35, 0x1.87bp+23}, + {0x1.77df5cp+35, 0x1.b6bp+23}, + {0x1.63cc74p+35, 0x1.aadp+23}, + {0x1.5bec6p+35, 0x1.a61p+23}, + {0x1.5fd998p+35, 0x1.a87p+23}, + {0x1.4bd544p+35, 0x1.9c3p+23}, + {0x1.07833cp+35, 0x1.6f5p+23}, + {0x1.3fab68p+35, 0x1.949p+23}, + {0x1.2fad2cp+35, 0x1.8a5p+23}, + {0x1.1f880cp+35, 0x1.7fbp+23}, + {0x1.23aaf8p+35, 0x1.827p+23}, + {0x1.036758p+35, 0x1.6c7p+23}, + {0x1.7bf41p+35, 0x1.b91p+23}, + {0x1.4fdedcp+35, 0x1.9ebp+23}, + {0x1.3bbb2p+35, 0x1.921p+23}, + {0x1.6bf90cp+35, 0x1.afbp+23}, + {0x1.0fa53p+35, 0x1.74fp+23}, + {0x1.57d058p+35, 0x1.a39p+23}, + {0x1.17882cp+35, 0x1.7a5p+23}, + {0x1.27a4dp+35, 0x1.851p+23}, + {0x1.33bbbp+35, 0x1.8cfp+23}, + {0x1.67fa9cp+35, 0x1.ad5p+23}, + {0x1.53eeb4p+35, 0x1.a13p+23}, + {0x1.37d118p+35, 0x1.8f9p+23}, + {0x1.13aa8cp+35, 0x1.77bp+23}, + {0x1.1bcbf4p+35, 0x1.7d3p+23}, + {0x1.0ba764p+35, 0x1.723p+23}, + {0x1.2bd684p+35, 0x1.87dp+23}, + {0x1.47d1ecp+35, 0x1.99bp+23}, + {0x1.43d4d4p+35, 0x1.973p+23}, + {0x1.6ffd2p+35, 0x1.b21p+23}, + {0x1.3fddfcp+35, 0x1.94bp+23}, + {0x1.07b128p+35, 0x1.6f7p+23}, + {0x1.2fde78p+35, 0x1.8a7p+23}, + {0x1.23db48p+35, 0x1.829p+23}, + {0x1.0394e8p+35, 0x1.6c9p+23}, + {0x1.1fb804p+35, 0x1.7fdp+23}, + {0x1.3bed64p+35, 0x1.923p+23}, + {0x1.0fd3dp+35, 0x1.751p+23}, + {0x1.17b778p+35, 0x1.7a7p+23}, + {0x1.27d574p+35, 0x1.853p+23}, + {0x1.33ed5p+35, 0x1.8d1p+23}, + {0x1.13d984p+35, 0x1.77dp+23}, + {0x1.1bfb9cp+35, 0x1.7d5p+23}, + {0x1.0bd5acp+35, 0x1.725p+23}, + {0x1.07df18p+35, 0x1.6f9p+23}, + {0x1.03c27cp+35, 0x1.6cbp+23}, + {0x1.1fe8p+35, 0x1.7ffp+23}, + {0x1.17e6c8p+35, 0x1.7a9p+23}, + {0x1.03f014p+35, 0x1.6cdp+23}, +}; + +#endif // LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H diff --git a/libc/test/src/math/hypotf_test.cpp b/libc/test/src/math/hypotf_test.cpp index 2e958d63d84ad..437e0fc29b76b 100644 --- a/libc/test/src/math/hypotf_test.cpp +++ b/libc/test/src/math/hypotf_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "HypotTest.h" +#include "hypotf_hard_to_round.h" #include "src/math/hypotf.h" @@ -23,3 +24,7 @@ TEST_F(LlvmLibcHypotfTest, SubnormalRange) { TEST_F(LlvmLibcHypotfTest, NormalRange) { test_normal_range(&__llvm_libc::hypotf); } + +TEST_F(LlvmLibcHypotfTest, TrickyInputs) { + test_input_list(&__llvm_libc::hypotf, N_HARD_TO_ROUND, HYPOTF_HARD_TO_ROUND); +} From b80db150cdba17b3d4970389025f95b1c93482b8 Mon Sep 17 00:00:00 2001 From: Evgeny Shulgin Date: Thu, 20 Jan 2022 13:34:28 -0500 Subject: [PATCH 046/946] Add `isConsteval` matcher Support C++20 consteval functions and C++2b if consteval for AST Matchers. --- clang/docs/LibASTMatchersReference.html | 32 +++++++++++++++++++ clang/docs/ReleaseNotes.rst | 2 ++ clang/include/clang/ASTMatchers/ASTMatchers.h | 19 +++++++++++ clang/lib/ASTMatchers/Dynamic/Registry.cpp | 1 + .../ASTMatchers/ASTMatchersNarrowingTest.cpp | 29 +++++++++++++++++ 5 files changed, 83 insertions(+) diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index 9e71608a86250..fb856de0936dc 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -4201,6 +4201,22 @@

Narrowing Matchers

+Matcher<FunctionDecl>isConsteval +
Matches consteval function declarations and if consteval/if ! consteval
+statements.
+
+Given:
+  consteval int a();
+  void b() { if consteval {} }
+  void c() { if ! consteval {} }
+  void d() { if ! consteval {} else {} }
+functionDecl(isConsteval())
+  matches the declaration of "int a()".
+ifStmt(isConsteval())
+  matches the if statement in "void b()", "void c()", "void d()".
+
+ + Matcher<FunctionDecl>isConstexpr
Matches constexpr variable and function declarations,
        and if constexpr.
@@ -4473,6 +4489,22 @@ 

Narrowing Matchers

+Matcher<IfStmt>isConsteval +
Matches consteval function declarations and if consteval/if ! consteval
+statements.
+
+Given:
+  consteval int a();
+  void b() { if consteval {} }
+  void c() { if ! consteval {} }
+  void d() { if ! consteval {} else {} }
+functionDecl(isConsteval())
+  matches the declaration of "int a()".
+ifStmt(isConsteval())
+  matches the if statement in "void b()", "void c()", "void d()".
+
+ + Matcher<IfStmt>isConstexpr
Matches constexpr variable and function declarations,
        and if constexpr.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 08e4d75299d2b..c787d355a3148 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -316,6 +316,8 @@ AST Matchers
   and the underlying ``Type`` with ``hasUnderlyingType``.
   ``hasDeclaration`` continues to see through the alias and apply to the
   underlying type.
+- Added the ``isConsteval`` matcher to match ``consteval`` function
+  declarations as well as `if consteval` and `if ! consteval` statements.
 
 clang-format
 ------------
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 599ab407c442b..c934b708cb96c 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -5170,6 +5170,25 @@ AST_POLYMORPHIC_MATCHER(isNoThrow,
   return FnTy->isNothrow();
 }
 
+/// Matches consteval function declarations and if consteval/if ! consteval
+/// statements.
+///
+/// Given:
+/// \code
+///   consteval int a();
+///   void b() { if consteval {} }
+///   void c() { if ! consteval {} }
+///   void d() { if ! consteval {} else {} }
+/// \endcode
+/// functionDecl(isConsteval())
+///   matches the declaration of "int a()".
+/// ifStmt(isConsteval())
+///   matches the if statement in "void b()", "void c()", "void d()".
+AST_POLYMORPHIC_MATCHER(isConsteval,
+                        AST_POLYMORPHIC_SUPPORTED_TYPES(FunctionDecl, IfStmt)) {
+  return Node.isConsteval();
+}
+
 /// Matches constexpr variable and function declarations,
 ///        and if constexpr.
 ///
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 4f3efdb0a6630..2210c5413cc5a 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -404,6 +404,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isComparisonOperator);
   REGISTER_MATCHER(isConst);
   REGISTER_MATCHER(isConstQualified);
+  REGISTER_MATCHER(isConsteval);
   REGISTER_MATCHER(isConstexpr);
   REGISTER_MATCHER(isCopyAssignmentOperator);
   REGISTER_MATCHER(isCopyConstructor);
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index f604d0a19e18f..51946e1430cf6 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -1790,6 +1790,35 @@ TEST_P(ASTMatchersTest, IsNoThrow_CXX11) {
   EXPECT_TRUE(matches("void f() noexcept;", functionProtoType(isNoThrow())));
 }
 
+TEST_P(ASTMatchersTest, IsConsteval) {
+  if (!GetParam().isCXX20OrLater())
+    return;
+
+  EXPECT_TRUE(matches("consteval int bar();",
+                      functionDecl(hasName("bar"), isConsteval())));
+  EXPECT_TRUE(notMatches("constexpr int bar();",
+                         functionDecl(hasName("bar"), isConsteval())));
+  EXPECT_TRUE(
+      notMatches("int bar();", functionDecl(hasName("bar"), isConsteval())));
+}
+
+TEST_P(ASTMatchersTest, IsConsteval_MatchesIfConsteval) {
+  if (!GetParam().isCXX20OrLater())
+    return;
+
+  EXPECT_TRUE(matches("void baz() { if consteval {} }", ifStmt(isConsteval())));
+  EXPECT_TRUE(
+      matches("void baz() { if ! consteval {} }", ifStmt(isConsteval())));
+  EXPECT_TRUE(matches("void baz() { if ! consteval {} else {} }",
+                      ifStmt(isConsteval())));
+  EXPECT_TRUE(
+      matches("void baz() { if not consteval {} }", ifStmt(isConsteval())));
+  EXPECT_TRUE(notMatches("void baz() { if constexpr(1 > 0) {} }",
+                         ifStmt(isConsteval())));
+  EXPECT_TRUE(
+      notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConsteval())));
+}
+
 TEST_P(ASTMatchersTest, IsConstexpr) {
   if (!GetParam().isCXX11OrLater()) {
     return;

From eb6c6e60585df1a05c11a7b2625cbecffb88e085 Mon Sep 17 00:00:00 2001
From: Roman Lebedev 
Date: Thu, 20 Jan 2022 21:37:26 +0300
Subject: [PATCH 047/946] [NFC][InstCombine] Add test showing failure to sink
 into `resume` block

---
 .../InstCombine/sink-into-resume-block.ll     | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/sink-into-resume-block.ll

diff --git a/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
new file mode 100644
index 0000000000000..a687977d4b975
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Check that InstCombine can sink instructions to the landingpad of the invoke.
+
+define void @t0_noop(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t0_noop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
+; CHECK-NEXT:    [[V0:%.*]] = add i32 [[ARG:%.*]], 42
+; CHECK-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @consume(i32 [[V0]])
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[V1:%.*]] = add i32 [[ARG]], 24
+; CHECK-NEXT:    call void @consume(i32 [[V1]])
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c = call i1 @cond()
+  %v0 = add i32 %arg, 42
+  %v1 = add i32 %arg, 24
+  br i1 %c, label %if.then, label %if.end
+
+if.then:
+  invoke void @simple_throw() to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @consume(i32 %v0)
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.end:
+  call void @consume(i32 %v1)
+  call void @sideeffect()
+  ret void
+}
+
+declare i1 @cond()
+
+declare void @sideeffect()
+
+declare void @simple_throw() noreturn
+
+declare void @destructor()
+
+declare void @consume(i32)
+
+declare dso_local i32 @__gxx_personality_v0(...)

From 9abc593e98891b4cd8ffd2ca308cabe6ea5d142f Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Thu, 20 Jan 2022 10:36:21 -0800
Subject: [PATCH 048/946] [TargetLowering][InstCombine] Simplify BSwap demanded
 bits code a little. NFC

Use alignDown instead of &= ~7.

Replace ResultBit with NLZ. (BitWidth - NLZ - NTZ == 8) so
(BitWidth - NTZ - 8 == NLZ).

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D117804
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 12 ++++-------
 .../InstCombineSimplifyDemanded.cpp           | 21 +++++++++----------
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6af5609993795..3b53a5b8b7532 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1815,20 +1815,16 @@ bool TargetLowering::SimplifyDemandedBits(
     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
     // have 14 leading zeros, round to 8.
-    NLZ &= ~7;
-    NTZ &= ~7;
+    NLZ = alignDown(NLZ, 8);
+    NTZ = alignDown(NTZ, 8);
     // If we need exactly one byte, we can do this transformation.
     if (BitWidth - NLZ - NTZ == 8) {
-      unsigned ResultBit = NTZ;
-      unsigned InputBit = BitWidth - NTZ - 8;
-
       // Replace this with either a left or right shift to get the byte into
       // the right place.
-      unsigned ShiftOpcode = InputBit > ResultBit ? ISD::SRL : ISD::SHL;
+      unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
         EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
-        unsigned ShiftAmount =
-            InputBit > ResultBit ? InputBit - ResultBit : ResultBit - InputBit;
+        unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
         SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
         return TLO.CombineTo(Op, NewOp);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 4dc712f325362..71a5ae24eead9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -800,22 +800,21 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
         // have 14 leading zeros, round to 8.
-        NLZ &= ~7;
-        NTZ &= ~7;
+        NLZ = alignDown(NLZ, 8);
+        NTZ = alignDown(NTZ, 8);
         // If we need exactly one byte, we can do this transformation.
-        if (BitWidth-NLZ-NTZ == 8) {
-          unsigned ResultBit = NTZ;
-          unsigned InputBit = BitWidth-NTZ-8;
-
+        if (BitWidth - NLZ - NTZ == 8) {
           // Replace this with either a left or right shift to get the byte into
           // the right place.
           Instruction *NewVal;
-          if (InputBit > ResultBit)
-            NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
-                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+          if (NLZ > NTZ)
+            NewVal = BinaryOperator::CreateLShr(
+                II->getArgOperand(0),
+                ConstantInt::get(I->getType(), NLZ - NTZ));
           else
-            NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
-                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+            NewVal = BinaryOperator::CreateShl(
+                II->getArgOperand(0),
+                ConstantInt::get(I->getType(), NTZ - NLZ));
           NewVal->takeName(I);
           return InsertNewInstWith(NewVal, *I);
         }

From b58cc9fb23486e73ad7502a451bf338df3b2d444 Mon Sep 17 00:00:00 2001
From: eopXD 
Date: Thu, 20 Jan 2022 10:36:23 -0800
Subject: [PATCH 049/946] [NFC][RISCV] Add end-of-line symbol in target-feature
 testcases

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117808
---
 .../test/Preprocessor/riscv-target-features.c | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 7c685b50e873e..ba310229f14e5 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -7,14 +7,14 @@
 // CHECK-NOT: __riscv_m
 // CHECK-NOT: __riscv_mul
 // CHECK-NOT: __riscv_muldiv
-// CHECK-NOT: __riscv_a 2000000
+// CHECK-NOT: __riscv_a 2000000{{$}}
 // CHECK-NOT: __riscv_atomic
-// CHECK-NOT: __riscv_f 2000000
+// CHECK-NOT: __riscv_f 2000000{{$}}
 // CHECK-NOT: __riscv_d
 // CHECK-NOT: __riscv_flen
 // CHECK-NOT: __riscv_fdiv
 // CHECK-NOT: __riscv_fsqrt
-// CHECK-NOT: __riscv_c 2000000
+// CHECK-NOT: __riscv_c 2000000{{$}}
 // CHECK-NOT: __riscv_compressed
 // CHECK-NOT: __riscv_b
 // CHECK-NOT: __riscv_bitmanip
@@ -38,7 +38,7 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64im -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-M-EXT %s
 // CHECK-M-EXT: __riscv_div 1
-// CHECK-M-EXT: __riscv_m 2000000
+// CHECK-M-EXT: __riscv_m 2000000{{$}}
 // CHECK-M-EXT: __riscv_mul 1
 // CHECK-M-EXT: __riscv_muldiv 1
 
@@ -46,14 +46,14 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-A-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ia -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-A-EXT %s
-// CHECK-A-EXT: __riscv_a 2000000
+// CHECK-A-EXT: __riscv_a 2000000{{$}}
 // CHECK-A-EXT: __riscv_atomic 1
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32if -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-F-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64if -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-F-EXT %s
-// CHECK-F-EXT: __riscv_f 2000000
+// CHECK-F-EXT: __riscv_f 2000000{{$}}
 // CHECK-F-EXT: __riscv_fdiv 1
 // CHECK-F-EXT: __riscv_flen 32
 // CHECK-F-EXT: __riscv_fsqrt 1
@@ -62,7 +62,7 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-D-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ifd -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-D-EXT %s
-// CHECK-D-EXT: __riscv_d 2000000
+// CHECK-D-EXT: __riscv_d 2000000{{$}}
 // CHECK-D-EXT: __riscv_fdiv 1
 // CHECK-D-EXT: __riscv_flen 64
 // CHECK-D-EXT: __riscv_fsqrt 1
@@ -95,7 +95,7 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-C-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ic -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-C-EXT %s
-// CHECK-C-EXT: __riscv_c 2000000
+// CHECK-C-EXT: __riscv_c 2000000{{$}}
 // CHECK-C-EXT: __riscv_compressed 1
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
@@ -150,7 +150,7 @@
 // RUN: -march=rv64izbe0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBE-EXT %s
 // CHECK-ZBE-NOT: __riscv_b
-// CHECK-ZBE-EXT: __riscv_zbe 93000
+// CHECK-ZBE-EXT: __riscv_zbe 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbf0p93 -x c -E -dM %s \
@@ -159,7 +159,7 @@
 // RUN: -march=rv64izbf0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBF-EXT %s
 // CHECK-ZBF-NOT: __riscv_b
-// CHECK-ZBF-EXT: __riscv_zbf 93000
+// CHECK-ZBF-EXT: __riscv_zbf 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbm0p93 -x c -E -dM %s \
@@ -168,7 +168,7 @@
 // RUN: -march=rv64izbm0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBM-EXT %s
 // CHECK-ZBM-NOT: __riscv_b
-// CHECK-ZBM-EXT: __riscv_zbm 93000
+// CHECK-ZBM-EXT: __riscv_zbm 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbp0p93 -x c -E -dM %s \
@@ -177,7 +177,7 @@
 // RUN: -march=rv64izbp0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBP-EXT %s
 // CHECK-ZBP-NOT: __riscv_b
-// CHECK-ZBP-EXT: __riscv_zbp 93000
+// CHECK-ZBP-EXT: __riscv_zbp 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbr0p93 -x c -E -dM %s \
@@ -186,7 +186,7 @@
 // RUN: -march=rv64izbr0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBR-EXT %s
 // CHECK-ZBR-NOT: __riscv_b
-// CHECK-ZBR-EXT: __riscv_zbr 93000
+// CHECK-ZBR-EXT: __riscv_zbr 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izbs1p0 -x c -E -dM %s \
@@ -210,7 +210,7 @@
 // RUN: -march=rv64izbt0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBT-EXT %s
 // CHECK-ZBT-NOT: __riscv_b
-// CHECK-ZBT-EXT: __riscv_zbt 93000
+// CHECK-ZBT-EXT: __riscv_zbt 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32iv0p10 -x c -E -dM %s \
@@ -218,9 +218,9 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64iv0p10 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// CHECK-V-EXT: __riscv_v 10000
+// CHECK-V-EXT: __riscv_v 10000{{$}}
 // CHECK-V-EXT: __riscv_vector 1
-// CHECK-V-EXT: __riscv_zvlsseg 10000
+// CHECK-V-EXT: __riscv_zvlsseg 10000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izfhmin1p0 -x c -E -dM %s \
@@ -228,7 +228,7 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64izfhmin1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZFHMIN-EXT %s
-// CHECK-ZFHMIN-EXT: __riscv_zfhmin 10000
+// CHECK-ZFHMIN-EXT: __riscv_zfhmin 1000000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izfh1p0 -x c -E -dM %s \
@@ -236,7 +236,7 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64izfh1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZFH-EXT %s
-// CHECK-ZFH-EXT: __riscv_zfh 10000
+// CHECK-ZFH-EXT: __riscv_zfh 1000000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64iv0p10 -x c -E -dM %s -o - \
@@ -297,11 +297,11 @@
 // CHECK-ZVE64D-EXT: __riscv_v_elen_fp 64
 // CHECK-ZVE64D-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64D-EXT: __riscv_vector 1
-// CHECK-ZVE64D-EXT: __riscv_zve32f 10000
-// CHECK-ZVE64D-EXT: __riscv_zve32x 10000
-// CHECK-ZVE64D-EXT: __riscv_zve64d 10000
-// CHECK-ZVE64D-EXT: __riscv_zve64f 10000
-// CHECK-ZVE64D-EXT: __riscv_zve64x 10000
+// CHECK-ZVE64D-EXT: __riscv_zve32f 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64d 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64f 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64ifzve64f0p10 -x c -E -dM %s -o - \
@@ -310,10 +310,10 @@
 // CHECK-ZVE64F-EXT: __riscv_v_elen_fp 32
 // CHECK-ZVE64F-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64F-EXT: __riscv_vector 1
-// CHECK-ZVE64F-EXT: __riscv_zve32f 10000
-// CHECK-ZVE64F-EXT: __riscv_zve32x 10000
-// CHECK-ZVE64F-EXT: __riscv_zve64f 10000
-// CHECK-ZVE64F-EXT: __riscv_zve64x 10000
+// CHECK-ZVE64F-EXT: __riscv_zve32f 10000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve64f 10000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve64x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64izve64x0p10 -x c -E -dM %s -o - \
@@ -322,8 +322,8 @@
 // CHECK-ZVE64X-EXT: __riscv_v_elen_fp 0
 // CHECK-ZVE64X-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64X-EXT: __riscv_vector 1
-// CHECK-ZVE64X-EXT: __riscv_zve32x 10000
-// CHECK-ZVE64X-EXT: __riscv_zve64x 10000
+// CHECK-ZVE64X-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE64X-EXT: __riscv_zve64x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64ifzve32f0p10 -x c -E -dM %s -o - \
@@ -332,8 +332,8 @@
 // CHECK-ZVE32F-EXT: __riscv_v_elen_fp 32
 // CHECK-ZVE32F-EXT: __riscv_v_min_vlen 32
 // CHECK-ZVE32F-EXT: __riscv_vector 1
-// CHECK-ZVE32F-EXT: __riscv_zve32f 10000
-// CHECK-ZVE32F-EXT: __riscv_zve32x 10000
+// CHECK-ZVE32F-EXT: __riscv_zve32f 10000{{$}}
+// CHECK-ZVE32F-EXT: __riscv_zve32x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64izve32x0p10 -x c -E -dM %s -o - \
@@ -342,4 +342,4 @@
 // CHECK-ZVE32X-EXT: __riscv_v_elen_fp 0
 // CHECK-ZVE32X-EXT: __riscv_v_min_vlen 32
 // CHECK-ZVE32X-EXT: __riscv_vector 1
-// CHECK-ZVE32X-EXT: __riscv_zve32x 10000
+// CHECK-ZVE32X-EXT: __riscv_zve32x 10000{{$}}

From 587dccfb1238724c3365b12f24f7fc343d60974b Mon Sep 17 00:00:00 2001
From: Sanjay Patel 
Date: Thu, 20 Jan 2022 12:29:12 -0500
Subject: [PATCH 050/946] [InstCombine] avoid 'tmp' usage in test files; NFC

The update script ( utils/update_test_checks.py ) warns against this
because it can conflict with the default FileCheck names given to
anonymous values in the IR.
---
 .../canonicalize-lshr-shl-to-masking.ll       | 216 ++++++++---------
 .../canonicalize-shl-lshr-to-masking.ll       | 218 +++++++++---------
 2 files changed, 217 insertions(+), 217 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
index 40bc4aaab21c2..c8ccd8f6549a5 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
@@ -19,8 +19,8 @@ define i8 @positive_samevar(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl i8 %t0, %y
   ret i8 %ret
 }
 
@@ -29,8 +29,8 @@ define i8 @positive_sameconst(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -40,8 +40,8 @@ define i8 @positive_biggerlshr(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -51,8 +51,8 @@ define i8 @positive_biggershl(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl i8 %t0, 6
   ret i8 %ret
 }
 
@@ -66,8 +66,8 @@ define i8 @positive_samevar_shlnuw(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl nuw i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl nuw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -76,8 +76,8 @@ define i8 @positive_sameconst_shlnuw(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -87,8 +87,8 @@ define i8 @positive_biggerlshr_shlnuw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -98,8 +98,8 @@ define i8 @positive_biggershl_shlnuw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -113,8 +113,8 @@ define i8 @positive_samevar_shlnsw(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl nsw i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -123,8 +123,8 @@ define i8 @positive_sameconst_shlnsw(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -134,8 +134,8 @@ define i8 @positive_biggerlshr_shlnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -145,8 +145,8 @@ define i8 @positive_biggershl_shlnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -160,8 +160,8 @@ define i8 @positive_samevar_shlnuwnsw(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl nuw nsw i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl nuw nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -170,8 +170,8 @@ define i8 @positive_sameconst_shlnuwnsw(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -181,8 +181,8 @@ define i8 @positive_biggerlshr_shlnuwnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -192,8 +192,8 @@ define i8 @positive_biggershl_shlnuwnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -205,8 +205,8 @@ define i8 @positive_samevar_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl i8 %t0, %y
   ret i8 %ret
 }
 
@@ -214,8 +214,8 @@ define i8 @positive_sameconst_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -224,8 +224,8 @@ define i8 @positive_biggerlshr_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -234,8 +234,8 @@ define i8 @positive_biggershl_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl i8 %t0, 6
   ret i8 %ret
 }
 
@@ -247,8 +247,8 @@ define i8 @positive_samevar_shlnsw_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl nsw i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -256,8 +256,8 @@ define i8 @positive_sameconst_shlnsw_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -266,8 +266,8 @@ define i8 @positive_biggerlshr_shlnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -276,8 +276,8 @@ define i8 @positive_biggershl_shlnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nsw i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -289,8 +289,8 @@ define i8 @positive_samevar_shlnuw_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnuw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl nuw i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl nuw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -298,8 +298,8 @@ define i8 @positive_sameconst_shlnuw_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnuw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -308,8 +308,8 @@ define i8 @positive_biggerlshr_shlnuw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -318,8 +318,8 @@ define i8 @positive_biggershl_shlnuw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -331,8 +331,8 @@ define i8 @positive_samevar_shlnuwnsw_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnuwnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl nuw nsw i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl nuw nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -340,8 +340,8 @@ define i8 @positive_sameconst_shlnuwnsw_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnuwnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -350,8 +350,8 @@ define i8 @positive_biggerlshr_shlnuwnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -360,8 +360,8 @@ define i8 @positive_biggershl_shlnuwnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -375,8 +375,8 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, %y
-  %ret = shl <2 x i8> %tmp0, %y
+  %t0 = lshr <2 x i8> %x, %y
+  %ret = shl <2 x i8> %t0, %y
   ret <2 x i8> %ret
 }
 
@@ -389,8 +389,8 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], 
 ; CHECK-NEXT:    ret <2 x i8> [[TMP0]]
 ;
-  %tmp0 = lshr <2 x i8> %x, 
-  %ret = shl <2 x i8> %tmp0, 
+  %t0 = lshr <2 x i8> %x, 
+  %ret = shl <2 x i8> %t0, 
   ret <2 x i8> %ret
 }
 
@@ -400,8 +400,8 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -411,8 +411,8 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -421,8 +421,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <3 x i8> [[X:%.*]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -432,8 +432,8 @@ define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], 
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, 
-  %ret = shl <2 x i8> %tmp0, 
+  %t0 = lshr <2 x i8> %x, 
+  %ret = shl <2 x i8> %t0, 
   ret <2 x i8> %ret
 }
 
@@ -443,8 +443,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -454,8 +454,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef1(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -465,8 +465,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef2(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -476,8 +476,8 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], 
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, 
-  %ret = shl <2 x i8> %tmp0, 
+  %t0 = lshr <2 x i8> %x, 
+  %ret = shl <2 x i8> %t0, 
   ret <2 x i8> %ret
 }
 
@@ -487,8 +487,8 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -498,8 +498,8 @@ define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -509,8 +509,8 @@ define <3 x i8> @positive_biggershl_vec_undef2(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, 
-  %ret = shl <3 x i8> %tmp0, 
+  %t0 = lshr <3 x i8> %x, 
+  %ret = shl <3 x i8> %t0, 
   ret <3 x i8> %ret
 }
 
@@ -525,9 +525,9 @@ define i8 @positive_sameconst_multiuse(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[X]], -8
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -538,9 +538,9 @@ define i8 @positive_biggerlshr_multiuse(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw i8 [[TMP0]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -551,9 +551,9 @@ define i8 @positive_biggershl_multiuse(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[TMP0]], 6
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, 6
   ret i8 %ret
 }
 
@@ -567,8 +567,8 @@ define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, 
-  %ret = shl <2 x i8> %tmp0, 
+  %t0 = lshr <2 x i8> %x, 
+  %ret = shl <2 x i8> %t0, 
   ret <2 x i8> %ret
 }
 
@@ -578,8 +578,8 @@ define <2 x i8> @positive_biggerLlshr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[TMP0]], 
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, 
-  %ret = shl <2 x i8> %tmp0, 
+  %t0 = lshr <2 x i8> %x, 
+  %ret = shl <2 x i8> %t0, 
   ret <2 x i8> %ret
 }
 
@@ -593,8 +593,8 @@ define i8 @negative_twovars(i8 %x, i8 %y, i8 %z) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[TMP0]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl i8 %tmp0, %z ; $z, not %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl i8 %t0, %z ; $z, not %y
   ret i8 %ret
 }
 
@@ -608,8 +608,8 @@ define i8 @negative_oneuse(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[TMP0]], [[Y]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, %y
   ret i8 %ret
 }
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
index 45aa22aa808f3..22952145d54a3 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
@@ -18,18 +18,18 @@ define i32 @positive_samevar(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, %y
-  %ret = lshr i32 %tmp0, %y
+  %t0 = shl i32 %x, %y
+  %ret = lshr i32 %t0, %y
   ret i32 %ret
 }
 
 define i32 @positive_sameconst(i32 %x) {
 ; CHECK-LABEL: @positive_sameconst(
-; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], 134217727
-; CHECK-NEXT:    ret i32 [[TMP0]]
+; CHECK-NEXT:    [[T0:%.*]] = and i32 [[X:%.*]], 134217727
+; CHECK-NEXT:    ret i32 [[T0]]
 ;
-  %tmp0 = shl i32 %x, 5
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 5
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
@@ -39,8 +39,8 @@ define i32 @positive_biggerShl(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 10
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 10
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
@@ -50,8 +50,8 @@ define i32 @positive_biggerLshr(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl i32 %x, 5
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
@@ -61,8 +61,8 @@ define i32 @positive_biggerLshr_lshrexact(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  %ret = lshr exact i32 %tmp0, 10
+  %t0 = shl i32 %x, 5
+  %ret = lshr exact i32 %t0, 10
   ret i32 %ret
 }
 
@@ -74,8 +74,8 @@ define i32 @positive_samevar_shlnuw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnuw(
 ; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
-  %tmp0 = shl nuw i32 %x, %y
-  %ret = lshr i32 %tmp0, %y ; this one is obviously 'exact'.
+  %t0 = shl nuw i32 %x, %y
+  %ret = lshr i32 %t0, %y ; this one is obviously 'exact'.
   ret i32 %ret
 }
 
@@ -83,8 +83,8 @@ define i32 @positive_sameconst_shlnuw(i32 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnuw(
 ; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  %ret = lshr i32 %tmp0, 5 ; this one is obviously 'exact'.
+  %t0 = shl nuw i32 %x, 5
+  %ret = lshr i32 %t0, 5 ; this one is obviously 'exact'.
   ret i32 %ret
 }
 
@@ -93,8 +93,8 @@ define i32 @positive_biggerShl_shlnuw(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X:%.*]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 10
-  %ret = lshr i32 %tmp0, 5 ; this one is obviously 'exact'.
+  %t0 = shl nuw i32 %x, 10
+  %ret = lshr i32 %t0, 5 ; this one is obviously 'exact'.
   ret i32 %ret
 }
 
@@ -103,8 +103,8 @@ define i32 @positive_biggerLshr_shlnuw(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[X:%.*]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl nuw i32 %x, 5
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
@@ -113,8 +113,8 @@ define i32 @positive_biggerLshr_shlnuw_lshrexact(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[X:%.*]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  %ret = lshr exact i32 %tmp0, 10
+  %t0 = shl nuw i32 %x, 5
+  %ret = lshr exact i32 %t0, 10
   ret i32 %ret
 }
 
@@ -128,8 +128,8 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, %y
-  %ret = lshr <2 x i32> %tmp0, %y
+  %t0 = shl <2 x i32> %x, %y
+  %ret = lshr <2 x i32> %t0, %y
   ret <2 x i32> %ret
 }
 
@@ -139,33 +139,33 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec(
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[X:%.*]], 
-; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+; CHECK-NEXT:    [[T0:%.*]] = and <2 x i32> [[X:%.*]], 
+; CHECK-NEXT:    ret <2 x i32> [[T0]]
 ;
-  %tmp0 = shl <2 x i32> %x, 
-  %ret = lshr <2 x i32> %tmp0, 
+  %t0 = shl <2 x i32> %x, 
+  %ret = lshr <2 x i32> %t0, 
   ret <2 x i32> %ret
 }
 
 define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
@@ -174,8 +174,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
@@ -185,41 +185,41 @@ define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], 
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, 
-  %ret = lshr <2 x i32> %tmp0, 
+  %t0 = shl <2 x i32> %x, 
+  %ret = lshr <2 x i32> %t0, 
   ret <2 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerShl_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerShl_vec_undef2(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef2(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
@@ -229,41 +229,41 @@ define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], 
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, 
-  %ret = lshr <2 x i32> %tmp0, 
+  %t0 = shl <2 x i32> %x, 
+  %ret = lshr <2 x i32> %t0, 
   ret <2 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerLshr_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerLshr_vec_undef2(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef2(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, 
-  %ret = lshr <3 x i32> %tmp0, 
+  %t0 = shl <3 x i32> %x, 
+  %ret = lshr <3 x i32> %t0, 
   ret <3 x i32> %ret
 }
 
@@ -273,70 +273,70 @@ define <3 x i32> @positive_biggerLshr_vec_undef2(<3 x i32> %x) {
 
 define i32 @positive_sameconst_multiuse(i32 %x) {
 ; CHECK-LABEL: @positive_sameconst_multiuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 5
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[X]], 134217727
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 5
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
 define i32 @positive_biggerShl_shlnuw_multiuse(i32 %x) {
 ; CHECK-LABEL: @positive_biggerShl_shlnuw_multiuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i32 [[X:%.*]], 10
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 10
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl nuw i32 %x, 10
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
 define i32 @positive_biggerLshr_shlnuw_multiuse(i32 %x) {
 ; CHECK-LABEL: @positive_biggerLshr_shlnuw_multiuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i32 [[X:%.*]], 5
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[X]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl nuw i32 %x, 5
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
 ; NOTE: creates one extra instruction, but this seems intentional.
 define i32 @positive_biggerShl_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerShl_multiuse_extrainstr(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 10
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X]], 5
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 10
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 10
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
 ; NOTE: creates one extra instruction, but this seems intentional.
 define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerLshr_multiuse_extrainstr(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 5
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 5
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl i32 %x, 5
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
@@ -346,23 +346,23 @@ define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
 
 define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_nonsplat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, 
-  %ret = lshr <2 x i32> %tmp0, 
+  %t0 = shl <2 x i32> %x, 
+  %ret = lshr <2 x i32> %t0, 
   ret <2 x i32> %ret
 }
 
 define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshl_vec_nonsplat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i32> [[X:%.*]], 
-; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[TMP0]], 
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i32> [[X:%.*]], 
+; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[T0]], 
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, 
-  %ret = lshr <2 x i32> %tmp0, 
+  %t0 = shl <2 x i32> %x, 
+  %ret = lshr <2 x i32> %t0, 
   ret <2 x i32> %ret
 }
 
@@ -372,12 +372,12 @@ define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) {
 
 define i32 @negative_twovars(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @negative_twovars(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[TMP0]], [[Z:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[T0]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, %y
-  %ret = lshr i32 %tmp0, %z ; $z, not %y
+  %t0 = shl i32 %x, %y
+  %ret = lshr i32 %t0, %z ; $z, not %y
   ret i32 %ret
 }
 
@@ -386,13 +386,13 @@ declare void @use32(i32)
 ; One use only.
 define i32 @negative_oneuse(i32 %x, i32 %y) {
 ; CHECK-LABEL: @negative_oneuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
-; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[TMP0]], [[Y]]
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[T0]], [[Y]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, %y
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, %y
+  %t0 = shl i32 %x, %y
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, %y
   ret i32 %ret
 }

From 2d031ec5e53f4e28ea5cc02b4cfdead98a9c0007 Mon Sep 17 00:00:00 2001
From: Sanjay Patel 
Date: Thu, 20 Jan 2022 13:35:58 -0500
Subject: [PATCH 051/946] [InstCombine] add one-use check to opposite shift
 folds

Test comments say this might be intentional, but I don't
see any hard evidence to support it. The extra instruction
shows up as a potential regression in D117680.

One test does show a missed fold that might be recovered
with better demanded bits analysis.
---
 .../InstCombine/InstCombineShifts.cpp         | 30 +++++++++++--------
 .../canonicalize-shl-lshr-to-masking.ll       | 12 ++++----
 llvm/test/Transforms/InstCombine/shift.ll     |  2 +-
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 0ade25f768253..9acad19df9df5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1032,12 +1032,13 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
           NewLShr->setIsExact(I.isExact());
           return NewLShr;
         }
-        // (X << C1) >>u C  --> (X >>u (C - C1)) & (-1 >> C)
-        Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
-        APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
-        return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
-      }
-      if (C1->ugt(ShAmtC)) {
+        if (Op0->hasOneUse()) {
+          // (X << C1) >>u C  --> (X >>u (C - C1)) & (-1 >> C)
+          Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
+          APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
+          return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
+        }
+      } else if (C1->ugt(ShAmtC)) {
         unsigned ShlAmtC = C1->getZExtValue();
         Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmtC - ShAmtC);
         if (cast(Op0)->hasNoUnsignedWrap()) {
@@ -1046,15 +1047,18 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
           NewShl->setHasNoUnsignedWrap(true);
           return NewShl;
         }
-        // (X << C1) >>u C  --> X << (C1 - C) & (-1 >> C)
-        Value *NewShl = Builder.CreateShl(X, ShiftDiff);
+        if (Op0->hasOneUse()) {
+          // (X << C1) >>u C  --> X << (C1 - C) & (-1 >> C)
+          Value *NewShl = Builder.CreateShl(X, ShiftDiff);
+          APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
+          return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
+        }
+      } else {
+        assert(*C1 == ShAmtC);
+        // (X << C) >>u C --> X & (-1 >>u C)
         APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
-        return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
       }
-      assert(*C1 == ShAmtC);
-      // (X << C) >>u C --> X & (-1 >>u C)
-      APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
-      return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
     }
 
     // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
index 22952145d54a3..265570dfe6d56 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
@@ -310,13 +310,13 @@ define i32 @positive_biggerLshr_shlnuw_multiuse(i32 %x) {
   ret i32 %ret
 }
 
-; NOTE: creates one extra instruction, but this seems intentional.
+; negative test - don't create extra instructions
+
 define i32 @positive_biggerShl_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerShl_multiuse_extrainstr(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 10
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X]], 5
-; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
+; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[T0]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %t0 = shl i32 %x, 10
@@ -325,13 +325,13 @@ define i32 @positive_biggerShl_multiuse_extrainstr(i32 %x) {
   ret i32 %ret
 }
 
-; NOTE: creates one extra instruction, but this seems intentional.
+; negative test - don't create extra instructions
+
 define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerLshr_multiuse_extrainstr(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 5
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 5
-; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[T0]], 10
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %t0 = shl i32 %x, 5
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 11d9be073b025..d165c687849ce 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -650,7 +650,7 @@ define i8 @test39(i32 %a0) {
 ; CHECK-NEXT:    [[I49:%.*]] = shl i8 [[I4]], 6
 ; CHECK-NEXT:    [[I50:%.*]] = and i8 [[I49]], 64
 ; CHECK-NEXT:    [[I51:%.*]] = xor i8 [[I50]], [[I5]]
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[I4]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i8 [[I5]], 3
 ; CHECK-NEXT:    [[I54:%.*]] = and i8 [[TMP0]], 16
 ; CHECK-NEXT:    [[I551:%.*]] = or i8 [[I54]], [[I51]]
 ; CHECK-NEXT:    ret i8 [[I551]]

From c65186c89f35b7b599c41183def666a2bde62ddd Mon Sep 17 00:00:00 2001
From: Marco Elver 
Date: Thu, 20 Jan 2022 19:36:16 +0100
Subject: [PATCH 052/946] [clang] Improve -Wdeclaration-after-statement

With 118f966b46cf, Clang matches GCC's behaviour and allows enabling
-Wdeclaration-after-statement with C99 and later.

However, the check for mixing declarations and code is not a constant time
algorithm, and therefore should be guarded with Diags.isIgnored().

Furthermore, improve test coverage with: non-pedantic C89 with the
warning; C11 with the warning; and when using -Wall.

Finally, mention the changed behaviour in ReleaseNotes.rst.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D117232
---
 clang/docs/ReleaseNotes.rst        |  5 +++++
 clang/lib/Sema/SemaStmt.cpp        |  8 +++++---
 clang/test/Sema/warn-mixed-decls.c | 10 ++++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c787d355a3148..2eec63901932e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -58,6 +58,11 @@ Improvements to Clang's diagnostics
   release being diagnosed against). These new groups are automatically implied
   when passing ``-Wc++N-extensions``. Resolves PR33518.
 
+- Support ``-Wdeclaration-after-statement`` with C99 and later standards, and
+  not just C89, matching GCC's behaviour. A notable usecase is supporting style
+  guides that forbid mixing declarations and code, but want to move to newer C
+  standards.
+
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index ef498f9a52282..746eb82a5bdc7 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -413,7 +413,10 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R,
   // If we're in C mode, check that we don't have any decls after stmts.  If
   // so, emit an extension diagnostic in C89 and potentially a warning in later
   // versions.
-  if (!getLangOpts().CPlusPlus) {
+  const unsigned MixedDeclsCodeID = getLangOpts().C99
+                                        ? diag::warn_mixed_decls_code
+                                        : diag::ext_mixed_decls_code;
+  if (!getLangOpts().CPlusPlus && !Diags.isIgnored(MixedDeclsCodeID, L)) {
     // Note that __extension__ can be around a decl.
     unsigned i = 0;
     // Skip over all declarations.
@@ -426,8 +429,7 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R,
 
     if (i != NumElts) {
       Decl *D = *cast(Elts[i])->decl_begin();
-      Diag(D->getLocation(), !getLangOpts().C99 ? diag::ext_mixed_decls_code
-                                                : diag::warn_mixed_decls_code);
+      Diag(D->getLocation(), MixedDeclsCodeID);
     }
   }
 
diff --git a/clang/test/Sema/warn-mixed-decls.c b/clang/test/Sema/warn-mixed-decls.c
index 219d64472b589..b8a7dc1e2bc09 100644
--- a/clang/test/Sema/warn-mixed-decls.c
+++ b/clang/test/Sema/warn-mixed-decls.c
@@ -1,13 +1,23 @@
 /* RUN: %clang_cc1 -fsyntax-only -verify -std=c89 -pedantic %s
  */
+/* RUN: %clang_cc1 -fsyntax-only -verify -std=c89 -Wdeclaration-after-statement %s
+ */
 /* RUN: %clang_cc1 -fsyntax-only -verify -std=c99 -Wdeclaration-after-statement %s
  */
+/* RUN: %clang_cc1 -fsyntax-only -verify -std=c11 -Wdeclaration-after-statement %s
+ */
 
 /* Should not emit diagnostic when not pedantic, not enabled or in C++ Code*/
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c89 %s
  */
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c99 %s
  */
+/* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c89 -Wall %s
+ */
+/* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c99 -Wall -pedantic %s
+ */
+/* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c11 -Wall -pedantic %s
+ */
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -x c++ %s
  */
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -x c++ -Wdeclaration-after-statement %s

From df31ff1b29bc4c2308ec5df8a7ff0ec2ab0942d4 Mon Sep 17 00:00:00 2001
From: John Ericson 
Date: Wed, 19 Jan 2022 06:45:07 +0000
Subject: [PATCH 053/946] [cmake] Make include(GNUInstallDirs) always below
 project(..)

Its defaulting logic must go after `project(..)` to work correctly,  but `project(..)` is often in a standalone condition making this
awkward, since the rest of the condition code may also need GNUInstallDirs.

The good thing is there are the various standalone booleans, which I had missed before. This makes splitting the conditional blocks less awkward.

Reviewed By: arichardson, phosek, beanz, ldionne, #libunwind, #libc, #libc_abi

Differential Revision: https://reviews.llvm.org/D117639
---
 flang/CMakeLists.txt                    | 10 +++++++---
 libcxx/CMakeLists.txt                   | 11 ++++++-----
 libcxxabi/CMakeLists.txt                | 12 +++++++-----
 libunwind/CMakeLists.txt                | 19 ++++++++++++-------
 lld/CMakeLists.txt                      | 10 ++++++----
 lldb/CMakeLists.txt                     |  9 +++++++--
 lldb/cmake/modules/LLDBStandalone.cmake |  2 --
 lldb/tools/debugserver/CMakeLists.txt   |  5 +++++
 llvm/CMakeLists.txt                     |  5 +++--
 mlir/CMakeLists.txt                     |  9 +++++++--
 polly/CMakeLists.txt                    |  8 ++++++--
 pstl/CMakeLists.txt                     |  5 +++--
 12 files changed, 69 insertions(+), 36 deletions(-)

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index abb9a47d3abb4..5caa79e8da477 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -7,8 +7,6 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-include(GNUInstallDirs)
-
 set(FLANG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE)
@@ -27,7 +25,14 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   message("Building Flang as a standalone project.")
   project(Flang)
   set(FLANG_STANDALONE_BUILD ON)
+else()
+  set(FLANG_STANDALONE_BUILD OFF)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if (FLANG_STANDALONE_BUILD)
   set(FLANG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
   if (NOT MSVC_IDE)
     set(LLVM_ENABLE_ASSERTIONS ${ENABLE_ASSERTIONS}
@@ -179,7 +184,6 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   endif()
 
 else()
-  set(FLANG_STANDALONE_BUILD OFF)
   option(FLANG_INCLUDE_TESTS
          "Generate build targets for the Flang unit tests."
          ${LLVM_INCLUDE_TESTS})
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index b44b16088effe..77df59e4cd755 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -10,8 +10,6 @@ endif()
 #===============================================================================
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom modules
@@ -39,14 +37,17 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL
   # In a standalone build, we don't have llvm to automatically generate the
   # llvm-lit script for us.  So we need to provide an explicit directory that
   # the configurator should write the script into.
-  set(LIBCXX_STANDALONE_BUILD 1)
+  set(LIBCXX_STANDALONE_BUILD TRUE)
   set(LLVM_LIT_OUTPUT_DIR "${LIBCXX_BINARY_DIR}/bin")
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if (LIBCXX_STANDALONE_BUILD)
   # Find the LLVM sources and simulate LLVM CMake options.
   include(HandleOutOfTreeLLVM)
-endif()
 
-if (LIBCXX_STANDALONE_BUILD)
   find_package(Python3 COMPONENTS Interpreter)
   if(NOT Python3_Interpreter_FOUND)
     message(SEND_ERROR "Python3 not found. Python3 is required")
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 78f486418af79..ecbc7091864e7 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -10,8 +10,6 @@ endif()
 
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom modules
@@ -37,17 +35,21 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXXABI_STANDALONE_B
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
+  set(LIBCXXABI_STANDALONE_BUILD TRUE)
+
   # In a standalone build, we don't have llvm to automatically generate the
   # llvm-lit script for us.  So we need to provide an explicit directory that
   # the configurator should write the script into.
-  set(LIBCXXABI_STANDALONE_BUILD 1)
   set(LLVM_LIT_OUTPUT_DIR "${LIBCXXABI_BINARY_DIR}/bin")
+endif()
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if (LIBCXXABI_STANDALONE_BUILD)
   # Find the LLVM sources and simulate LLVM CMake options.
   include(HandleOutOfTreeLLVM)
-endif()
 
-if (LIBCXXABI_STANDALONE_BUILD)
   find_package(Python3 COMPONENTS Interpreter)
   if(NOT Python3_Interpreter_FOUND)
     message(WARNING "Python3 not found, using python2 as a fallback")
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 03bd316d331cb..04550ae51a422 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -8,8 +8,6 @@ endif()
 
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom modules
@@ -30,21 +28,28 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B
   # linking.
   include(EnableLanguageNolink)
   project(libunwind LANGUAGES NONE)
-  llvm_enable_language_nolink(C CXX ASM)
 
   set(PACKAGE_NAME libunwind)
   set(PACKAGE_VERSION 14.0.0git)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
+  set(LIBUNWIND_STANDALONE_BUILD TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(LIBUNWIND_STANDALONE_BUILD)
+  llvm_enable_language_nolink(C CXX ASM)
+
+  # Find the LLVM sources and simulate LLVM CMake options.
+  include(HandleOutOfTreeLLVM)
+
   # In a standalone build, we don't have llvm to automatically generate the
   # llvm-lit script for us.  So we need to provide an explicit directory that
   # the configurator should write the script into.
-  set(LIBUNWIND_STANDALONE_BUILD 1)
   set(LLVM_LIT_OUTPUT_DIR "${LIBUNWIND_BINARY_DIR}/bin")
-
-  # Find the LLVM sources and simulate LLVM CMake options.
-  include(HandleOutOfTreeLLVM)
 else()
   set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py")
 endif()
diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt
index e1a29b884d178..1a53c5bf47a8d 100644
--- a/lld/CMakeLists.txt
+++ b/lld/CMakeLists.txt
@@ -1,12 +1,16 @@
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 # If we are not building as a part of LLVM, build LLD as an
 # standalone project, using LLVM as an external library:
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(lld)
+  set(LLD_BUILT_STANDALONE TRUE)
+endif()
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(LLD_BUILT_STANDALONE)
   set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
   # Rely on llvm-config.
@@ -140,8 +144,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   if(LLVM_HAVE_LIBXAR)
     set(XAR_LIB xar)
   endif()
-
-  set(LLD_BUILT_STANDALONE TRUE)
 endif() # standalone
 
 set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index 79d451965ed43..edb4c513a64d2 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -1,7 +1,5 @@
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 # Add path for custom modules.
 set(CMAKE_MODULE_PATH
   ${CMAKE_MODULE_PATH}
@@ -13,6 +11,13 @@ set(CMAKE_MODULE_PATH
 # using LLVM as an external library.
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(lldb)
+  set(LLDB_BUILT_STANDALONE TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(LLDB_BUILT_STANDALONE)
   include(LLDBStandalone)
 
   set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to")
diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 5be9e57f23bfc..1d3d1bbcc25da 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -108,5 +108,3 @@ include_directories(
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
-
-set(LLDB_BUILT_STANDALONE 1)
diff --git a/lldb/tools/debugserver/CMakeLists.txt b/lldb/tools/debugserver/CMakeLists.txt
index eba5c41491329..3a585a2e3d48d 100644
--- a/lldb/tools/debugserver/CMakeLists.txt
+++ b/lldb/tools/debugserver/CMakeLists.txt
@@ -2,7 +2,12 @@ cmake_minimum_required(VERSION 3.13.4)
 
 project(Debugserver LANGUAGES C CXX ASM-ATT)
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+  set(LLDB_BUILT_STANDALONE TRUE)
+
   set(CMAKE_MODULE_PATH
     ${CMAKE_MODULE_PATH}
     "${CMAKE_SOURCE_DIR}/../../cmake"
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 687bc6489b4ac..76c918306b225 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -2,8 +2,6 @@
 
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 # CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()`
 # New in CMake 3.20. https://cmake.org/cmake/help/latest/policy/CMP0116.html
 if(POLICY CMP0116)
@@ -47,6 +45,9 @@ project(LLVM
   VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}
   LANGUAGES C CXX ASM)
 
+# Must go after project(..)
+include(GNUInstallDirs)
+
 set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to")
 set(CMAKE_CXX_STANDARD_REQUIRED YES)
 if (CYGWIN)
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 63f69ed96d3f5..3612a6ccd0533 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -1,10 +1,15 @@
 # MLIR project.
 
-include(GNUInstallDirs)
-
 # Check if MLIR is built as a standalone project.
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(mlir)
+  set(MLIR_STANDALONE_BUILD TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(MLIR_STANDALONE_BUILD)
   cmake_minimum_required(VERSION 3.13.4)
 
   find_package(LLVM CONFIG REQUIRED)
diff --git a/polly/CMakeLists.txt b/polly/CMakeLists.txt
index d6bcc8f7f285e..6a6e78b06bb2d 100644
--- a/polly/CMakeLists.txt
+++ b/polly/CMakeLists.txt
@@ -1,10 +1,14 @@
-include(GNUInstallDirs)
-
 # Check if this is a in tree build.
 if (NOT DEFINED LLVM_MAIN_SRC_DIR)
   project(Polly)
   cmake_minimum_required(VERSION 3.13.4)
+  set(POLLY_STANDALONE_BUILD TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if(POLLY_STANDALONE_BUILD)
   # Where is LLVM installed?
   find_package(LLVM CONFIG REQUIRED)
   set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LLVM_CMAKE_DIR})
diff --git a/pstl/CMakeLists.txt b/pstl/CMakeLists.txt
index 8784eb07b5742..2461522349ee0 100644
--- a/pstl/CMakeLists.txt
+++ b/pstl/CMakeLists.txt
@@ -7,8 +7,6 @@
 #===----------------------------------------------------------------------===##
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(PARALLELSTL_VERSION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/include/pstl/internal/pstl_config.h")
 file(STRINGS "${PARALLELSTL_VERSION_FILE}" PARALLELSTL_VERSION_SOURCE REGEX "#define _PSTL_VERSION .*$")
 string(REGEX REPLACE "#define _PSTL_VERSION (.*)$" "\\1" PARALLELSTL_VERSION_SOURCE "${PARALLELSTL_VERSION_SOURCE}")
@@ -18,6 +16,9 @@ math(EXPR VERSION_PATCH "(${PARALLELSTL_VERSION_SOURCE} % 10)")
 
 project(ParallelSTL VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH} LANGUAGES CXX)
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
 set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial', 'omp', and 'tbb'. The default is 'serial'.")
 set(PSTL_HIDE_FROM_ABI_PER_TU OFF CACHE BOOL "Whether to constrain ABI-unstable symbols to each translation unit (basically, mark them with C's static keyword).")
 set(_PSTL_HIDE_FROM_ABI_PER_TU ${PSTL_HIDE_FROM_ABI_PER_TU}) # For __pstl_config_site

From 792853cb786b360fad6dedb9066b76ecd958cb93 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson 
Date: Tue, 18 Jan 2022 17:40:26 -0600
Subject: [PATCH 054/946] [SystemZ] Remove the ManipulatesSP flag from backend
 (NFC).

This flag was set in the presence of stacksave/stackrestore in order to force
a frame pointer.

This should however not be needed per the comment in MachineFrameInfo.h
stating that a a variable sized object "...is the sole condition which
prevents frame pointer elimination", and experiments have also shown that
there seems to be no effect whatsoever on code generation with ManipulatesSP.

Review: Ulrich Weigand
---
 llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp     | 3 +--
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp      | 4 ----
 llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h | 9 +--------
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 1adc8994e0f4d..ccc7d0737f531 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -764,8 +764,7 @@ void SystemZELFFrameLowering::inlineStackProbe(
 
 bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const {
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-          MF.getFrameInfo().hasVarSizedObjects() ||
-          MF.getInfo()->getManipulatesSP());
+          MF.getFrameInfo().hasVarSizedObjects());
 }
 
 StackOffset SystemZELFFrameLowering::getFrameIndexReference(
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e1549f3012f95..881346bbe47ed 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -318,8 +318,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
 
-  // Use custom expanders so that we can force the function to use
-  // a frame pointer.
   setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
 
@@ -4194,7 +4192,6 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   const SystemZSubtarget *Subtarget = &MF.getSubtarget();
   auto *Regs = Subtarget->getSpecialRegisters();
-  MF.getInfo()->setManipulatesSP(true);
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     report_fatal_error("Variable-sized stack allocations are not supported "
                        "in GHC calling convention");
@@ -4207,7 +4204,6 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   const SystemZSubtarget *Subtarget = &MF.getSubtarget();
   auto *Regs = Subtarget->getSpecialRegisters();
-  MF.getInfo()->setManipulatesSP(true);
   bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
 
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index f755d5cd3d5b2..ec4b812eb0e1a 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -34,14 +34,12 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
   unsigned VarArgsFrameIndex;
   unsigned RegSaveFrameIndex;
   int FramePointerSaveIndex;
-  bool ManipulatesSP;
   unsigned NumLocalDynamics;
 
 public:
   explicit SystemZMachineFunctionInfo(MachineFunction &MF)
     : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0),
-      RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false),
-      NumLocalDynamics(0) {}
+      RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {}
 
   // Get and set the first and last call-saved GPR that should be saved by
   // this function and the SP offset for the STMG.  These are 0 if no GPRs
@@ -85,11 +83,6 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
   int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
   void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
 
-  // Get and set whether the function directly manipulates the stack pointer,
-  // e.g. through STACKSAVE or STACKRESTORE.
-  bool getManipulatesSP() const { return ManipulatesSP; }
-  void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
-
   // Count number of local-dynamic TLS symbols used.
   unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
   void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }

From 5fa4cf82dfa075e7983ce92d0042480b7b8f4fbc Mon Sep 17 00:00:00 2001
From: Alexandre Ganea 
Date: Thu, 20 Jan 2022 13:38:32 -0500
Subject: [PATCH 055/946] [Clang] Separate the 'debug-info-hotpatch' test in
 two parts: one for ARM and another for AArch64

After 5af2433e1794ebf7e58e848aa612c7912d71dc78, this shall fix: https://lab.llvm.org/buildbot/#/builders/188/builds/8400 - if not I'll revert this patch and 5af2433e1794ebf7e58e848aa612c7912d71dc78.
---
 .../debug-info-hotpatch-aarch64.cpp           | 23 +++++++++++++++++++
 .../CodeGenCXX/debug-info-hotpatch-arm.cpp    |  7 ++----
 2 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp

diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp
new file mode 100644
index 0000000000000..10fb1750f2c55
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp
@@ -0,0 +1,23 @@
+// REQUIRES: aarch64-registered-target
+///
+/// Check that using /hotpatch doesn't generate an error.
+/// Binaries are always hotpatchable on ARM/ARM64.
+///
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
+///
+/// Ensure that we set the hotpatchable flag in the debug information.
+///
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /Z7 -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
+// HOTPATCH: S_COMPILE3 [size = [[#]]]
+// HOTPATCH: flags = hot patchable
+///
+/// Unfortunately we need /Z7, Clang does not systematically generate S_COMPILE3.
+///
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH
+// NO-HOTPATCH-NOT: flags = hot patchable
+
+int main() {
+  return 0;
+}
diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
index 6176f1788760a..48a61f7fb1977 100644
--- a/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
+++ b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
@@ -1,23 +1,20 @@
-// REQUIRES: aarch64-registered-target || arm-registered-target
+// REQUIRES: arm-registered-target
 ///
 /// Check that using /hotpatch doesn't generate an error.
 /// Binaries are always hotpatchable on ARM/ARM64.
 ///
 // RUN: %clang_cl --target=arm-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
-// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
 ///
 /// Ensure that we set the hotpatchable flag in the debug information.
 ///
 // RUN: %clang_cl --target=arm-pc-windows-msvc /c /Z7 -o %t.obj -- %s
 // RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
-// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /Z7 -o %t.obj -- %s
-// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
 // HOTPATCH: S_COMPILE3 [size = [[#]]]
 // HOTPATCH: flags = hot patchable
 ///
 /// Unfortunately we need /Z7, Clang does not systematically generate S_COMPILE3.
 ///
-// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c -o %t.obj -- %s
+// RUN: %clang_cl --target=arm-pc-windows-msvc /c -o %t.obj -- %s
 // RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH
 // NO-HOTPATCH-NOT: flags = hot patchable
 

From 94a0660c14dabff715c0f0b51e89f6f4db406544 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Thu, 20 Jan 2022 11:05:01 -0800
Subject: [PATCH 056/946] [AMDGPU] Regenerate remat-vop.mir. NFC.

---
 llvm/test/CodeGen/AMDGPU/remat-vop.mir | 2210 ++++++++++++------------
 1 file changed, 1105 insertions(+), 1105 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 2eb2e1b7c399b..a677b0815ee7b 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -8,12 +8,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e32
     ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
     %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
@@ -31,17 +31,17 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_mov_b32_e32_impuse
     ; GCN: $m0 = IMPLICIT_DEF
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     $m0 = IMPLICIT_DEF
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
     %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
@@ -60,13 +60,13 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def
     ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
     %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
@@ -83,12 +83,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e64
     ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
     %2:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
@@ -106,16 +106,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
     ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
@@ -131,12 +131,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_read_b32
     ; GCN: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
     %1:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
     %2:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
@@ -152,12 +152,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_write_b32
     ; GCN: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
-    ; GCN: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_NOP 0, implicit killed renamable $agpr1
-    ; GCN: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
+    ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
     %1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
     %2:agpr_32 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
@@ -173,12 +173,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b64_pseudo
     ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec
     %1:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec
     %2:vreg_64_align2 = V_MOV_B64_PSEUDO 3, implicit $exec
@@ -194,12 +194,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -217,16 +217,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_fp_except
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -245,17 +245,17 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_mode_def
     ; GCN: $mode = IMPLICIT_DEF
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     $mode = IMPLICIT_DEF
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -272,12 +272,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
@@ -295,12 +295,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64_undef
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
@@ -318,16 +318,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_dpp
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_I32_F64_dpp undef %2:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CVT_I32_F64_dpp undef %3:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
@@ -345,16 +345,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_def
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
     %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -372,16 +372,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_use
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
     %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
@@ -397,12 +397,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_i32_e32
     ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
@@ -418,12 +418,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
@@ -439,12 +439,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_f32_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
@@ -460,12 +460,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_u32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
@@ -481,12 +481,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_u32_e32
     ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
@@ -502,12 +502,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_e32
     ; GCN: renamable $vgpr0 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
@@ -523,12 +523,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_sdwa
     ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -548,16 +548,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_f32_i32_sdwa_dst_unused_preserve
     ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0)
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0)
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %1:vgpr_32(tied-def 0)
     %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %2:vgpr_32(tied-def 0)
     %3:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %3:vgpr_32(tied-def 0)
@@ -573,12 +573,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_u32_e32
     ; GCN: renamable $vgpr0 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
@@ -594,12 +594,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_u32_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
@@ -615,12 +615,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -636,12 +636,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_f16_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
@@ -657,12 +657,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_rpi_i32_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -678,12 +678,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_flr_i32_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -699,12 +699,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_off_f32_i4_e32
     ; GCN: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
@@ -720,12 +720,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_ubyte0_e32
     ; GCN: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
@@ -741,12 +741,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fract_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
@@ -762,12 +762,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_trunc_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
@@ -783,12 +783,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ceil_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
@@ -804,12 +804,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rndne_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
@@ -825,12 +825,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_floor_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
@@ -846,12 +846,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_exp_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
@@ -867,12 +867,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_log_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
@@ -888,12 +888,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
@@ -909,12 +909,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_iflag_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
@@ -930,12 +930,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rsq_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
@@ -951,12 +951,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sqrt_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
@@ -972,12 +972,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
@@ -993,12 +993,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rsq_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
@@ -1014,12 +1014,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sqrt_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
@@ -1035,12 +1035,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sin_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
@@ -1056,12 +1056,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cos_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
@@ -1077,12 +1077,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_not_b32_e32
     ; GCN: renamable $vgpr0 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
@@ -1098,12 +1098,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfrev_b32_e32
     ; GCN: renamable $vgpr0 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
@@ -1119,12 +1119,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbh_u32_e32
     ; GCN: renamable $vgpr0 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
@@ -1140,12 +1140,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbl_b32_e32
     ; GCN: renamable $vgpr0 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
@@ -1161,12 +1161,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbh_i32_e32
     ; GCN: renamable $vgpr0 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
@@ -1182,12 +1182,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -1203,12 +1203,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_mant_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
@@ -1224,12 +1224,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fract_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
@@ -1245,12 +1245,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -1266,12 +1266,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_mant_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
@@ -1287,12 +1287,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_exp_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
@@ -1308,12 +1308,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_log_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
@@ -1329,12 +1329,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sat_pk_u8_i16_e32
     ; GCN: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
@@ -1350,12 +1350,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_mov_b32
     ; GCN: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN: renamable $agpr1 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_NOP 0, implicit killed renamable $agpr1
-    ; GCN: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
+    ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
     %1:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
     %2:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
@@ -1373,16 +1373,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_e32
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
     %3:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
@@ -1400,16 +1400,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_sdwa
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
     %3:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
@@ -1427,16 +1427,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_dpp
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
     %3:vgpr_32 = V_CNDMASK_B32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
@@ -1452,12 +1452,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cndmask_b32_e64
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
     %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
     %3:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
@@ -1473,12 +1473,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_madmk_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_MADMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MADMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MADMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MADMK_F32 3, 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1494,12 +1494,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1515,12 +1515,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -1536,12 +1536,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_sdwa
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -1559,16 +1559,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_add_f32_dpp
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
@@ -1584,12 +1584,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SUB_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_SUB_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1605,12 +1605,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_subrev_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SUBREV_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_SUBREV_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1626,12 +1626,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1647,12 +1647,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MUL_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1668,12 +1668,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_i32_i24_e32
     ; GCN: renamable $vgpr0 = V_MUL_I32_I24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_I32_I24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_I32_I24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_I32_I24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_I32_I24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_I32_I24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1689,12 +1689,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_i32_i24_e32
     ; GCN: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_I32_I24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_I24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_I32_I24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1710,12 +1710,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_u32_u24_e32
     ; GCN: renamable $vgpr0 = V_MUL_U32_U24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_U32_U24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_U32_U24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_U32_U24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_U32_U24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_U32_U24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1731,12 +1731,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_u32_u24_e32
     ; GCN: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_U32_U24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_U24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_U32_U24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1752,12 +1752,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MIN_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MIN_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1773,12 +1773,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAX_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAX_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1794,12 +1794,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_i32_e32
     ; GCN: renamable $vgpr0 = V_MIN_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1815,12 +1815,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_i32_e32
     ; GCN: renamable $vgpr0 = V_MAX_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1836,12 +1836,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_u32_e32
     ; GCN: renamable $vgpr0 = V_MIN_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1857,12 +1857,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_u32_e32
     ; GCN: renamable $vgpr0 = V_MAX_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1878,12 +1878,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshrrev_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHRREV_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHRREV_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHRREV_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHRREV_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHRREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHRREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHRREV_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1899,12 +1899,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshlrev_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHLREV_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHLREV_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHLREV_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHLREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHLREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHLREV_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1920,12 +1920,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashrrev_i32_e32
     ; GCN: renamable $vgpr0 = V_ASHRREV_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ASHRREV_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ASHRREV_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ASHRREV_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHRREV_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHRREV_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ASHRREV_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1941,12 +1941,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_and_b32_e32
     ; GCN: renamable $vgpr0 = V_AND_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_AND_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_AND_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_AND_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_AND_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_AND_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_AND_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1962,12 +1962,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_or_b32_e32
     ; GCN: renamable $vgpr0 = V_OR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_OR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_OR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_OR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_OR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_OR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_OR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_OR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1983,12 +1983,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xor_b32_e32
     ; GCN: renamable $vgpr0 = V_XOR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_XOR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_XOR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_XOR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_XOR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_XOR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2004,12 +2004,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_madak_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_MADAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MADAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MADAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MADAK_F32 3, undef %0:vgpr_32, 3, implicit $exec, implicit $mode
@@ -2025,12 +2025,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_u32_e32
     ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ADD_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2046,12 +2046,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_u32_e32
     ; GCN: renamable $vgpr0 = V_SUB_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SUB_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SUB_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SUB_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUB_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUB_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_SUB_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2067,12 +2067,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_subrev_u32_e32
     ; GCN: renamable $vgpr0 = V_SUBREV_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SUBREV_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SUBREV_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SUBREV_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUBREV_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUBREV_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_SUBREV_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2088,12 +2088,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfm_b32_e32
     ; GCN: renamable $vgpr0 = V_BFM_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFM_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFM_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFM_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFM_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFM_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFM_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFM_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2109,12 +2109,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bcnt_u32_b32_e32
     ; GCN: renamable $vgpr0 = V_BCNT_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BCNT_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BCNT_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BCNT_U32_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BCNT_U32_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BCNT_U32_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2130,12 +2130,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mbcnt_lo_u32_b32_e32
     ; GCN: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MBCNT_LO_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_LO_U32_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MBCNT_LO_U32_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2151,12 +2151,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mbcnt_hi_u32_b32_e32
     ; GCN: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MBCNT_HI_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_HI_U32_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MBCNT_HI_U32_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2172,12 +2172,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ldexp_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_LDEXP_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_LDEXP_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2193,12 +2193,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pknorm_i16_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PKNORM_I16_F32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_I16_F32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2214,12 +2214,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pknorm_u16_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PKNORM_U16_F32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_U16_F32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2235,12 +2235,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pkrtz_f16_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2256,12 +2256,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_u16_u32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PK_U16_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_U16_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PK_U16_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2277,12 +2277,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_i16_i32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PK_I16_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_I16_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PK_I16_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2298,12 +2298,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2319,12 +2319,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2340,12 +2340,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshr_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2361,12 +2361,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHL_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHL_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHL_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHL_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2382,12 +2382,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashr_i32_e32
     ; GCN: renamable $vgpr0 = V_ASHR_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ASHR_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ASHR_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ASHR_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ASHR_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHR_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHR_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ASHR_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2405,16 +2405,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_ldexp_f16_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F16_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F16_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LDEXP_F16_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LDEXP_F16_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_LDEXP_F16_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2432,16 +2432,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_lshrrev_b16_e32
     ; GCN: renamable $vgpr0 = V_LSHRREV_B16_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_LSHRREV_B16_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr0 = V_LSHRREV_B16_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHRREV_B16_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHRREV_B16_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHRREV_B16_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHRREV_B16_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2459,16 +2459,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_lshlrev_b16_e32
     ; GCN: renamable $vgpr0 = V_LSHLREV_B16_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_LSHLREV_B16_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr0 = V_LSHLREV_B16_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B16_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHLREV_B16_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHLREV_B16_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHLREV_B16_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2486,16 +2486,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_ashrrev_i16_e32
     ; GCN: renamable $vgpr0 = V_ASHRREV_I16_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_ASHRREV_I16_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr0 = V_ASHRREV_I16_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_ASHRREV_I16_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHRREV_I16_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHRREV_I16_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ASHRREV_I16_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2511,12 +2511,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xnor_b32_e32
     ; GCN: renamable $vgpr0 = V_XNOR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_XNOR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_XNOR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_XNOR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_XNOR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XNOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XNOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_XNOR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2532,12 +2532,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fmamk_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_FMAMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMAMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMAMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMAMK_F32 3, 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2553,12 +2553,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fmaak_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_FMAAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMAAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMAAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMAAK_F32 3, undef %0:vgpr_32, 3, implicit $exec, implicit $mode
@@ -2574,12 +2574,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_legacy_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2595,12 +2595,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2616,12 +2616,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_legacy_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2637,12 +2637,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2658,12 +2658,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_i32_i24_e64
     ; GCN: renamable $vgpr0 = V_MAD_I32_I24_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAD_I32_I24_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAD_I32_I24_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAD_I32_I24_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_MAD_I32_I24_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -2679,12 +2679,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_u32_u24_e64
     ; GCN: renamable $vgpr0 = V_MAD_U32_U24_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAD_U32_U24_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAD_U32_U24_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAD_U32_U24_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAD_U32_U24_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_MAD_U32_U24_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -2700,12 +2700,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lerp_u8_e64
     ; GCN: renamable $vgpr0 = V_LERP_U8_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LERP_U8_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LERP_U8_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LERP_U8_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LERP_U8_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LERP_U8_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LERP_U8_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LERP_U8_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -2721,12 +2721,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2742,12 +2742,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2763,12 +2763,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2784,12 +2784,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2805,12 +2805,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2826,12 +2826,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_lo_u32_e64
     ; GCN: renamable $vgpr0 = V_MUL_LO_U32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_LO_U32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_LO_U32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_LO_U32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_U32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_LO_U32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2847,12 +2847,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_u32_e64
     ; GCN: renamable $vgpr0 = V_MUL_HI_U32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_U32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_U32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_U32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_U32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_U32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2868,12 +2868,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_lo_i32_e64
     ; GCN: renamable $vgpr0 = V_MUL_LO_I32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_LO_I32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_LO_I32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_I32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_I32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_LO_I32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_I32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_LO_I32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2889,12 +2889,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_i32_e64
     ; GCN: renamable $vgpr0 = V_MUL_HI_I32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_I32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_I32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_I32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_I32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_I32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2910,12 +2910,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubeid_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2931,12 +2931,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubesc_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2952,12 +2952,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubetc_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2973,12 +2973,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubema_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2994,12 +2994,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfe_u32_e64
     ; GCN: renamable $vgpr0 = V_BFE_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFE_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFE_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFE_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFE_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFE_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFE_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFE_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3015,12 +3015,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfe_i32_e64
     ; GCN: renamable $vgpr0 = V_BFE_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFE_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFE_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFE_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFE_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFE_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFE_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFE_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3036,12 +3036,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfi_b32_e64
     ; GCN: renamable $vgpr0 = V_BFI_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFI_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFI_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFI_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFI_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFI_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFI_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFI_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3057,12 +3057,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_alignbit_b32_e64
     ; GCN: renamable $vgpr0 = V_ALIGNBIT_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ALIGNBIT_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ALIGNBIT_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBIT_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBIT_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ALIGNBIT_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3078,12 +3078,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_alignbyte_b32_e64
     ; GCN: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ALIGNBYTE_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBYTE_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ALIGNBYTE_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3099,12 +3099,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_i32_e64
     ; GCN: renamable $vgpr0 = V_MIN3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN3_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3120,12 +3120,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_u32_e64
     ; GCN: renamable $vgpr0 = V_MIN3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3141,12 +3141,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_i32_e64
     ; GCN: renamable $vgpr0 = V_MAX3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX3_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3162,12 +3162,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_u32_e64
     ; GCN: renamable $vgpr0 = V_MAX3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3183,12 +3183,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_i32_e64
     ; GCN: renamable $vgpr0 = V_MED3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MED3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MED3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MED3_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MED3_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MED3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MED3_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3204,12 +3204,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_u32_e64
     ; GCN: renamable $vgpr0 = V_MED3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MED3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MED3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MED3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MED3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MED3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MED3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3225,12 +3225,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_f32_e64
     ; GCN: renamable $vgpr0 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3246,12 +3246,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_f32_e64
     ; GCN: renamable $vgpr0 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3267,12 +3267,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_f32_e64
     ; GCN: renamable $vgpr0 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3288,12 +3288,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u8_e64
     ; GCN: renamable $vgpr0 = V_SAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_U8_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3309,12 +3309,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_hi_u8_e64
     ; GCN: renamable $vgpr0 = V_SAD_HI_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_HI_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_HI_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_HI_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_HI_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_HI_U8_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3330,12 +3330,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u16_e64
     ; GCN: renamable $vgpr0 = V_SAD_U16_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_U16_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_U16_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U16_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U16_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U16_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U16_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_U16_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3351,12 +3351,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u32_e64
     ; GCN: renamable $vgpr0 = V_SAD_U32_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_U32_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_U32_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U32_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U32_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U32_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U32_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_U32_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3372,12 +3372,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_u8_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
@@ -3393,12 +3393,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_div_fixup_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -3414,12 +3414,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ldexp_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3435,12 +3435,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_msad_u8_e64
     ; GCN: renamable $vgpr0 = V_MSAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MSAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MSAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MSAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MSAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_MSAD_U8_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3456,12 +3456,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_trig_preop_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3477,12 +3477,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshlrev_b64_e64
     ; GCN: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
     %3:vreg_64_align2 = V_LSHLREV_B64_e64 3, undef %0:vreg_64_align2, implicit $exec
@@ -3498,12 +3498,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshrrev_b64_e64
     ; GCN: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_LSHRREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHRREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
     %3:vreg_64_align2 = V_LSHRREV_B64_e64 3, undef %0:vreg_64_align2, implicit $exec
@@ -3519,12 +3519,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashrrev_i64_e64
     ; GCN: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_ASHRREV_I64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_ASHRREV_I64_e64 2, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 3, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %0:vreg_64_align2, implicit $exec
     %3:vreg_64_align2 = V_ASHRREV_I64_e64 3, undef %0:vreg_64_align2, implicit $exec
@@ -3540,12 +3540,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_perm_b32_e64
     ; GCN: renamable $vgpr0 = V_PERM_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_PERM_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_PERM_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_PERM_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_PERM_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_PERM_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_PERM_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_PERM_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3561,12 +3561,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add3_u32_e64
     ; GCN: renamable $vgpr0 = V_ADD3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ADD3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3582,12 +3582,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_and_or_b32_e64
     ; GCN: renamable $vgpr0 = V_AND_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_AND_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_AND_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_AND_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_AND_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_AND_OR_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3603,12 +3603,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_or3_b32_e64
     ; GCN: renamable $vgpr0 = V_OR3_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_OR3_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_OR3_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_OR3_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_OR3_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_OR3_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_OR3_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_OR3_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3624,12 +3624,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xad_u32_e64
     ; GCN: renamable $vgpr0 = V_XAD_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_XAD_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_XAD_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_XAD_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_XAD_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XAD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XAD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_XAD_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3645,12 +3645,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_i32_e64
     ; GCN: renamable $vgpr0 = V_ADD_I32_e64 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD_I32_e64 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD_I32_e64 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD_I32_e64 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_I32_e64 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_ADD_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_ADD_I32_e64 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3666,12 +3666,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_lshl_u32_e64
     ; GCN: renamable $vgpr0 = V_ADD_LSHL_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD_LSHL_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD_LSHL_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD_LSHL_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_LSHL_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ADD_LSHL_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3687,12 +3687,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_i32_e64
     ; GCN: renamable $vgpr0 = V_SUB_I32_e64 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SUB_I32_e64 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SUB_I32_e64 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SUB_I32_e64 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SUB_I32_e64 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUB_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SUB_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SUB_I32_e64 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3708,12 +3708,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_add_u32_e64
     ; GCN: renamable $vgpr0 = V_LSHL_ADD_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHL_ADD_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHL_ADD_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_ADD_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHL_ADD_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3729,12 +3729,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_or_b32_e64
     ; GCN: renamable $vgpr0 = V_LSHL_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHL_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHL_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHL_OR_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec

From 493c856484015873737d7c995cac9e34101fb9e9 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Thu, 20 Jan 2022 07:39:22 -0800
Subject: [PATCH 057/946] [clang][NFC] Small mangler cleanups

In working on a module mangling problem I noticed a few cleanups to the mangler.

1) Use 'if (auto x = ...' idiom in a couple of places.

2) I noticed both 'isFileContext' and 'isNamespace || isTranslationUnit'
   synonyms. Let's use the former.

3) The control flow in the seqId mangling was misordered. Let's channel Count
   von Count. Also fix the inconsistent bracing.

Differential Revision: https://reviews.llvm.org/D117799
---
 clang/lib/AST/ItaniumMangle.cpp | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 7afc1250a36f4..2e734e2b28cdb 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -659,8 +659,7 @@ bool ItaniumMangleContextImpl::isUniqueInternalLinkageDecl(
 }
 
 bool ItaniumMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
-  const FunctionDecl *FD = dyn_cast(D);
-  if (FD) {
+  if (const auto *FD = dyn_cast(D)) {
     LanguageLinkage L = FD->getLanguageLinkage();
     // Overloadable functions need mangling.
     if (FD->hasAttr())
@@ -696,21 +695,24 @@ bool ItaniumMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
   if (!getASTContext().getLangOpts().CPlusPlus)
     return false;
 
-  const VarDecl *VD = dyn_cast(D);
-  if (VD && !isa(D)) {
+  if (const auto *VD = dyn_cast(D)) {
+    // Decompositions are mangled.
+    if (isa(VD))
+      return true;
+
     // C variables are not mangled.
     if (VD->isExternC())
       return false;
 
-    // Variables at global scope with non-internal linkage are not mangled
+    // Variables at global scope with non-internal linkage are not mangled.
     const DeclContext *DC = getEffectiveDeclContext(D);
     // Check for extern variable declared locally.
     if (DC->isFunctionOrMethod() && D->hasLinkage())
-      while (!DC->isNamespace() && !DC->isTranslationUnit())
+      while (!DC->isFileContext())
         DC = getEffectiveParentContext(DC);
     if (DC->isTranslationUnit() && D->getFormalLinkage() != InternalLinkage &&
         !CXXNameMangler::shouldHaveAbiTags(*this, VD) &&
-        !isa(D))
+        !isa(VD))
       return false;
   }
 
@@ -5889,9 +5891,11 @@ void CXXNameMangler::mangleTemplateParameter(unsigned Depth, unsigned Index) {
 }
 
 void CXXNameMangler::mangleSeqID(unsigned SeqID) {
-  if (SeqID == 1)
+  if (SeqID == 0) {
+    // Nothing.
+  } else if (SeqID == 1) {
     Out << '0';
-  else if (SeqID > 1) {
+  } else {
     SeqID--;
 
     //  is encoded in base-36, using digits and upper case letters.

From 6b92bb47901f3a2d4a9aa683b0365088113a729e Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh 
Date: Fri, 14 Jan 2022 23:10:37 +0000
Subject: [PATCH 058/946] [Support] [DebugInfo] Lazily create cache dir.

This change defers creating Support/Caching.cpp's cache directory until
it actually writes to the cache.

This allows using Caching library in a read-only fashion. If read-only,
the cache is guaranteed not to write to disk. This keeps tools using
DebugInfod (currently llvm-symbolizer) hermetic when not configured to
perform remote lookups.

Reviewed By: phosek

Differential Revision: https://reviews.llvm.org/D117589
---
 llvm/include/llvm/Support/Caching.h           |  9 +++----
 llvm/lib/Support/Caching.cpp                  |  8 +++++--
 llvm/test/ThinLTO/X86/cache.ll                |  2 +-
 .../ThinLTO/X86/empty_module_with_cache.ll    |  2 +-
 llvm/unittests/Debuginfod/DebuginfodTests.cpp | 24 +++++++++++++++----
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
index 5c30a822ef388..bef23ae757f2e 100644
--- a/llvm/include/llvm/Support/Caching.h
+++ b/llvm/include/llvm/Support/Caching.h
@@ -62,10 +62,11 @@ using AddBufferFn =
     std::function MB)>;
 
 /// Create a local file system cache which uses the given cache name, temporary
-/// file prefix, cache directory and file callback. This function also creates
-/// the cache directory if it does not already exist. The cache name appears in
-/// error messages for errors during caching. The temporary file prefix is used
-/// in the temporary file naming scheme used when writing files atomically.
+/// file prefix, cache directory and file callback.  This function does not
+/// immediately create the cache directory if it does not yet exist; this is
+/// done lazily the first time a file is added.  The cache name appears in error
+/// messages for errors during caching. The temporary file prefix is used in the
+/// temporary file naming scheme used when writing files atomically.
 Expected localCache(
     Twine CacheNameRef, Twine TempFilePrefixRef, Twine CacheDirectoryPathRef,
     AddBufferFn AddBuffer = [](size_t Task, std::unique_ptr MB) {
diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp
index 8c685640f791a..d6902f660e39e 100644
--- a/llvm/lib/Support/Caching.cpp
+++ b/llvm/lib/Support/Caching.cpp
@@ -30,8 +30,6 @@ Expected llvm::localCache(Twine CacheNameRef,
                                      Twine TempFilePrefixRef,
                                      Twine CacheDirectoryPathRef,
                                      AddBufferFn AddBuffer) {
-  if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPathRef))
-    return errorCodeToError(EC);
 
   // Create local copies which are safely captured-by-copy in lambdas
   SmallString<64> CacheName, TempFilePrefix, CacheDirectoryPath;
@@ -140,6 +138,12 @@ Expected llvm::localCache(Twine CacheNameRef,
     };
 
     return [=](size_t Task) -> Expected> {
+      // Create the cache directory if not already done. Doing this lazily
+      // ensures the filesystem isn't mutated until the cache is.
+      if (std::error_code EC = sys::fs::create_directories(
+              CacheDirectoryPath, /*IgnoreExisting=*/true))
+        return errorCodeToError(EC);
+
       // Write to a temporary to avoid race condition
       SmallString<64> TempFilenameModel;
       sys::path::append(TempFilenameModel, CacheDirectoryPath,
diff --git a/llvm/test/ThinLTO/X86/cache.ll b/llvm/test/ThinLTO/X86/cache.ll
index 406a1a456f89c..009b78713316a 100644
--- a/llvm/test/ThinLTO/X86/cache.ll
+++ b/llvm/test/ThinLTO/X86/cache.ll
@@ -20,7 +20,7 @@
 ; RUN:  -r=%t2.bc,_main,plx \
 ; RUN:  -r=%t2.bc,_globalfunc,lx \
 ; RUN:  -r=%t.bc,_globalfunc,plx
-; RUN: ls %t.cache.noindex | count 0
+; RUN: not ls %t.cache.noindex
 
 
 ; Repeat again, *with* hash this time.
diff --git a/llvm/test/ThinLTO/X86/empty_module_with_cache.ll b/llvm/test/ThinLTO/X86/empty_module_with_cache.ll
index 8e58d9f0db959..693f264b8ff26 100644
--- a/llvm/test/ThinLTO/X86/empty_module_with_cache.ll
+++ b/llvm/test/ThinLTO/X86/empty_module_with_cache.ll
@@ -28,7 +28,7 @@
 ; RUN: rm -Rf %t.cache
 ; RUN: llvm-lto2 run -o %t.o %t2.bc  %t.bc -cache-dir %t.cache \
 ; RUN:  -r=%t2.bc,_main,plx
-; RUN: ls %t.cache | count 0
+; RUN: not ls %t.cache
 
 
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Debuginfod/DebuginfodTests.cpp b/llvm/unittests/Debuginfod/DebuginfodTests.cpp
index fc5eb705969e7..5312912599e93 100644
--- a/llvm/unittests/Debuginfod/DebuginfodTests.cpp
+++ b/llvm/unittests/Debuginfod/DebuginfodTests.cpp
@@ -17,6 +17,17 @@
 #define setenv(name, var, ignore) _putenv_s(name, var)
 #endif
 
+#define ASSERT_NO_ERROR(x)                                                     \
+  if (std::error_code ASSERT_NO_ERROR_ec = x) {                                \
+    SmallString<128> MessageStorage;                                           \
+    raw_svector_ostream Message(MessageStorage);                               \
+    Message << #x ": did not return errc::success.\n"                          \
+            << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n"          \
+            << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n";      \
+    GTEST_FATAL_FAILURE_(MessageStorage.c_str());                              \
+  } else {                                                                     \
+  }
+
 using namespace llvm;
 
 // Check that the Debuginfod client can find locally cached artifacts.
@@ -40,11 +51,12 @@ TEST(DebuginfodClient, CacheHit) {
 // Check that the Debuginfod client returns an Error when it fails to find an
 // artifact.
 TEST(DebuginfodClient, CacheMiss) {
-  // Set the cache path to a temp directory to avoid permissions issues if $HOME
-  // is not writable.
-  SmallString<32> TempDir;
-  sys::path::system_temp_directory(true, TempDir);
-  setenv("DEBUGINFOD_CACHE_PATH", TempDir.c_str(),
+  SmallString<32> CacheDir;
+  ASSERT_NO_ERROR(
+      sys::fs::createUniqueDirectory("debuginfod-unittest", CacheDir));
+  sys::path::append(CacheDir, "cachedir");
+  ASSERT_FALSE(sys::fs::exists(CacheDir));
+  setenv("DEBUGINFOD_CACHE_PATH", CacheDir.c_str(),
          /*replace=*/1);
   // Ensure there are no urls to guarantee a cache miss.
   setenv("DEBUGINFOD_URLS", "", /*replace=*/1);
@@ -52,4 +64,6 @@ TEST(DebuginfodClient, CacheMiss) {
   Expected PathOrErr = getCachedOrDownloadArtifact(
       /*UniqueKey=*/"nonexistent-key", /*UrlPath=*/"/null");
   EXPECT_THAT_EXPECTED(PathOrErr, Failed());
+  // A cache miss with no possible URLs should not create the cache directory.
+  EXPECT_FALSE(sys::fs::exists(CacheDir));
 }

From 94e69fbb4f3a9719d4d8cc7268dd5db5d0be7e8f Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Thu, 20 Jan 2022 11:32:26 -0800
Subject: [PATCH 059/946] [RISCV] Add DAG combine to fold (fp_to_int_sat
 (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))

Similar for ceil, trunc, round, and roundeven. This allows us to use
static rounding modes to avoid a libcall.

This is similar to D116771, but for the saturating conversions.

This optimization is done for AArch64 as isel patterns.
RISCV doesn't have instructions for ceil/floor/trunc/round/roundeven
so the operations don't stick around until isel to enable a pattern
match. Thus I've implemented a DAG combine.

I'm only handling saturating to i64 or i32. This could be extended
to other sizes in the future.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D116864
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  87 +-
 .../CodeGen/RISCV/double-round-conv-sat.ll    | 940 +++++++++++++++++
 .../CodeGen/RISCV/float-round-conv-sat.ll     | 940 +++++++++++++++++
 .../test/CodeGen/RISCV/half-round-conv-sat.ll | 970 ++++++++++++++++++
 4 files changed, 2927 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
 create mode 100644 llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
 create mode 100644 llvm/test/CodeGen/RISCV/half-round-conv-sat.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 304c05d9378f2..f942f395d5328 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1051,6 +1051,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::FP_TO_SINT);
     setTargetDAGCombine(ISD::FP_TO_UINT);
+    setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+    setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
   }
   if (Subtarget.hasVInstructions()) {
     setTargetDAGCombine(ISD::FCOPYSIGN);
@@ -7180,13 +7182,24 @@ static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
 }
 
+static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
+  switch (Op.getOpcode()) {
+  case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
+  case ISD::FTRUNC:     return RISCVFPRndMode::RTZ;
+  case ISD::FFLOOR:     return RISCVFPRndMode::RDN;
+  case ISD::FCEIL:      return RISCVFPRndMode::RUP;
+  case ISD::FROUND:     return RISCVFPRndMode::RMM;
+  }
+
+  return RISCVFPRndMode::Invalid;
+}
+
 // Fold
 //   (fp_to_int (froundeven X)) -> fcvt X, rne
 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
 //   (fp_to_int (fceil X))      -> fcvt X, rup
 //   (fp_to_int (fround X))     -> fcvt X, rmm
-// FIXME: We should also do this for fp_to_int_sat.
 static SDValue performFP_TO_INTCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const RISCVSubtarget &Subtarget) {
@@ -7210,16 +7223,9 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
     return SDValue();
 
-  RISCVFPRndMode::RoundingMode FRM;
-  switch (Src->getOpcode()) {
-  default:
+  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+  if (FRM == RISCVFPRndMode::Invalid)
     return SDValue();
-  case ISD::FROUNDEVEN: FRM = RISCVFPRndMode::RNE; break;
-  case ISD::FTRUNC:     FRM = RISCVFPRndMode::RTZ; break;
-  case ISD::FFLOOR:     FRM = RISCVFPRndMode::RDN; break;
-  case ISD::FCEIL:      FRM = RISCVFPRndMode::RUP; break;
-  case ISD::FROUND:     FRM = RISCVFPRndMode::RMM; break;
-  }
 
   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
 
@@ -7235,6 +7241,64 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
 }
 
+// Fold
+//   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
+//   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
+//   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
+//   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
+//   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
+static SDValue performFP_TO_INT_SATCombine(SDNode *N,
+                                       TargetLowering::DAGCombinerInfo &DCI,
+                                       const RISCVSubtarget &Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  // Only handle XLen types. Other types narrower than XLen will eventually be
+  // legalized to XLenVT.
+  EVT DstVT = N->getValueType(0);
+  if (DstVT != XLenVT)
+    return SDValue();
+
+  SDValue Src = N->getOperand(0);
+
+  // Ensure the FP type is also legal.
+  if (!TLI.isTypeLegal(Src.getValueType()))
+    return SDValue();
+
+  // Don't do this for f16 with Zfhmin and not Zfh.
+  if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
+    return SDValue();
+
+  EVT SatVT = cast(N->getOperand(1))->getVT();
+
+  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+  if (FRM == RISCVFPRndMode::Invalid)
+    return SDValue();
+
+  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+
+  unsigned Opc;
+  if (SatVT == DstVT)
+    Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
+  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
+    Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+  else
+    return SDValue();
+  // FIXME: Support other SatVTs by clamping before or after the conversion.
+
+  Src = Src.getOperand(0);
+
+  SDLoc DL(N);
+  SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
+                                DAG.getTargetConstant(FRM, DL, XLenVT));
+
+  // RISCV FP-to-int conversions saturate to the destination register size, but
+  // don't produce 0 for nan.
+  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
+  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+}
+
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -7548,6 +7612,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
     return performFP_TO_INTCombine(N, DCI, Subtarget);
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT:
+    return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
   case ISD::FCOPYSIGN: {
     EVT VT = N->getValueType(0);
     if (!VT.isVector())
diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
new file mode 100644
index 0000000000000..38d82f6e46ff2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s
+; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s
+
+define signext i32 @test_floor_si32(double %x) {
+; RV32IFD-LABEL: test_floor_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB0_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB0_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rdn
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_floor_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB0_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB0_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_floor_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call floor@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI1_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB1_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB1_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI1_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB1_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB1_10
+; RV32IFD-NEXT:  .LBB1_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB1_11
+; RV32IFD-NEXT:  .LBB1_5:
+; RV32IFD-NEXT:    bnez a3, .LBB1_12
+; RV32IFD-NEXT:  .LBB1_6:
+; RV32IFD-NEXT:    bnez a2, .LBB1_8
+; RV32IFD-NEXT:  .LBB1_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB1_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB1_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB1_4
+; RV32IFD-NEXT:  .LBB1_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB1_5
+; RV32IFD-NEXT:  .LBB1_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB1_6
+; RV32IFD-NEXT:  .LBB1_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB1_7
+; RV32IFD-NEXT:    j .LBB1_8
+;
+; RV64IFD-LABEL: test_floor_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB1_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB1_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(double %x) {
+; RV32IFD-LABEL: test_floor_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB2_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB2_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rdn
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_floor_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB2_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB2_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_floor_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call floor@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB3_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB3_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI3_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB3_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB3_8
+; RV32IFD-NEXT:  .LBB3_4:
+; RV32IFD-NEXT:    bnez a4, .LBB3_6
+; RV32IFD-NEXT:  .LBB3_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB3_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB3_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB3_4
+; RV32IFD-NEXT:  .LBB3_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB3_5
+; RV32IFD-NEXT:    j .LBB3_6
+;
+; RV64IFD-LABEL: test_floor_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB3_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB3_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(double %x) {
+; RV32IFD-LABEL: test_ceil_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB4_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB4_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rup
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_ceil_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB4_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB4_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_ceil_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call ceil@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI5_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB5_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB5_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI5_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB5_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB5_10
+; RV32IFD-NEXT:  .LBB5_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB5_11
+; RV32IFD-NEXT:  .LBB5_5:
+; RV32IFD-NEXT:    bnez a3, .LBB5_12
+; RV32IFD-NEXT:  .LBB5_6:
+; RV32IFD-NEXT:    bnez a2, .LBB5_8
+; RV32IFD-NEXT:  .LBB5_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB5_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB5_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB5_4
+; RV32IFD-NEXT:  .LBB5_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB5_5
+; RV32IFD-NEXT:  .LBB5_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB5_6
+; RV32IFD-NEXT:  .LBB5_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB5_7
+; RV32IFD-NEXT:    j .LBB5_8
+;
+; RV64IFD-LABEL: test_ceil_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB5_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB5_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(double %x) {
+; RV32IFD-LABEL: test_ceil_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB6_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB6_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rup
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_ceil_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB6_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB6_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_ceil_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call ceil@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB7_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB7_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB7_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB7_8
+; RV32IFD-NEXT:  .LBB7_4:
+; RV32IFD-NEXT:    bnez a4, .LBB7_6
+; RV32IFD-NEXT:  .LBB7_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB7_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB7_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB7_4
+; RV32IFD-NEXT:  .LBB7_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB7_5
+; RV32IFD-NEXT:    j .LBB7_6
+;
+; RV64IFD-LABEL: test_ceil_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB7_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB7_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(double %x) {
+; RV32IFD-LABEL: test_trunc_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB8_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB8_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_trunc_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB8_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB8_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_trunc_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call trunc@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI9_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB9_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB9_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI9_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB9_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB9_10
+; RV32IFD-NEXT:  .LBB9_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB9_11
+; RV32IFD-NEXT:  .LBB9_5:
+; RV32IFD-NEXT:    bnez a3, .LBB9_12
+; RV32IFD-NEXT:  .LBB9_6:
+; RV32IFD-NEXT:    bnez a2, .LBB9_8
+; RV32IFD-NEXT:  .LBB9_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB9_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB9_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB9_4
+; RV32IFD-NEXT:  .LBB9_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB9_5
+; RV32IFD-NEXT:  .LBB9_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB9_6
+; RV32IFD-NEXT:  .LBB9_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB9_7
+; RV32IFD-NEXT:    j .LBB9_8
+;
+; RV64IFD-LABEL: test_trunc_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB9_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB9_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(double %x) {
+; RV32IFD-LABEL: test_trunc_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB10_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB10_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_trunc_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB10_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB10_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_trunc_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call trunc@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB11_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB11_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI11_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB11_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB11_8
+; RV32IFD-NEXT:  .LBB11_4:
+; RV32IFD-NEXT:    bnez a4, .LBB11_6
+; RV32IFD-NEXT:  .LBB11_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB11_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB11_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB11_4
+; RV32IFD-NEXT:  .LBB11_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB11_5
+; RV32IFD-NEXT:    j .LBB11_6
+;
+; RV64IFD-LABEL: test_trunc_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB11_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB11_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(double %x) {
+; RV32IFD-LABEL: test_round_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB12_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB12_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rmm
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_round_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB12_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB12_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_round_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call round@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI13_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB13_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB13_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI13_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB13_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB13_10
+; RV32IFD-NEXT:  .LBB13_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB13_11
+; RV32IFD-NEXT:  .LBB13_5:
+; RV32IFD-NEXT:    bnez a3, .LBB13_12
+; RV32IFD-NEXT:  .LBB13_6:
+; RV32IFD-NEXT:    bnez a2, .LBB13_8
+; RV32IFD-NEXT:  .LBB13_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB13_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB13_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB13_4
+; RV32IFD-NEXT:  .LBB13_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB13_5
+; RV32IFD-NEXT:  .LBB13_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB13_6
+; RV32IFD-NEXT:  .LBB13_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB13_7
+; RV32IFD-NEXT:    j .LBB13_8
+;
+; RV64IFD-LABEL: test_round_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB13_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB13_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(double %x) {
+; RV32IFD-LABEL: test_round_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB14_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB14_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rmm
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_round_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB14_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB14_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_round_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call round@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB15_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB15_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI15_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB15_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB15_8
+; RV32IFD-NEXT:  .LBB15_4:
+; RV32IFD-NEXT:    bnez a4, .LBB15_6
+; RV32IFD-NEXT:  .LBB15_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB15_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB15_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB15_4
+; RV32IFD-NEXT:  .LBB15_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB15_5
+; RV32IFD-NEXT:    j .LBB15_6
+;
+; RV64IFD-LABEL: test_round_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB15_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB15_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(double %x) {
+; RV32IFD-LABEL: test_roundeven_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB16_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB16_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rne
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_roundeven_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB16_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB16_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_roundeven_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call roundeven@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI17_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB17_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB17_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI17_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB17_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB17_10
+; RV32IFD-NEXT:  .LBB17_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB17_11
+; RV32IFD-NEXT:  .LBB17_5:
+; RV32IFD-NEXT:    bnez a3, .LBB17_12
+; RV32IFD-NEXT:  .LBB17_6:
+; RV32IFD-NEXT:    bnez a2, .LBB17_8
+; RV32IFD-NEXT:  .LBB17_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB17_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB17_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB17_4
+; RV32IFD-NEXT:  .LBB17_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB17_5
+; RV32IFD-NEXT:  .LBB17_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB17_6
+; RV32IFD-NEXT:  .LBB17_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB17_7
+; RV32IFD-NEXT:    j .LBB17_8
+;
+; RV64IFD-LABEL: test_roundeven_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB17_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB17_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(double %x) {
+; RV32IFD-LABEL: test_roundeven_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB18_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB18_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rne
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_roundeven_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB18_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB18_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_roundeven_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call roundeven@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB19_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB19_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI19_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB19_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB19_8
+; RV32IFD-NEXT:  .LBB19_4:
+; RV32IFD-NEXT:    bnez a4, .LBB19_6
+; RV32IFD-NEXT:  .LBB19_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB19_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB19_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB19_4
+; RV32IFD-NEXT:  .LBB19_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB19_5
+; RV32IFD-NEXT:    j .LBB19_6
+;
+; RV64IFD-LABEL: test_roundeven_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB19_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB19_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.round.f64(double)
+declare double @llvm.roundeven.f64(double)
+declare i32 @llvm.fptosi.sat.i32.f64(double)
+declare i64 @llvm.fptosi.sat.i64.f64(double)
+declare i32 @llvm.fptoui.sat.i32.f64(double)
+declare i64 @llvm.fptoui.sat.i64.f64(double)
diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
new file mode 100644
index 0000000000000..9893b697af294
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s
+; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s
+
+define signext i32 @test_floor_si32(float %x) {
+; RV32IF-LABEL: test_floor_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB0_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB0_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rdn
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_floor_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB0_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB0_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(float %x) nounwind {
+; RV32IF-LABEL: test_floor_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call floorf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB1_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB1_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB1_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB1_10
+; RV32IF-NEXT:  .LBB1_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB1_11
+; RV32IF-NEXT:  .LBB1_5:
+; RV32IF-NEXT:    bnez a3, .LBB1_12
+; RV32IF-NEXT:  .LBB1_6:
+; RV32IF-NEXT:    bnez a2, .LBB1_8
+; RV32IF-NEXT:  .LBB1_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB1_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB1_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB1_4
+; RV32IF-NEXT:  .LBB1_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB1_5
+; RV32IF-NEXT:  .LBB1_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB1_6
+; RV32IF-NEXT:  .LBB1_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB1_7
+; RV32IF-NEXT:    j .LBB1_8
+;
+; RV64IF-LABEL: test_floor_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB1_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB1_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(float %x) {
+; RV32IF-LABEL: test_floor_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB2_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB2_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rdn
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_floor_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB2_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB2_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_floor_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call floorf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB3_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB3_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB3_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB3_8
+; RV32IF-NEXT:  .LBB3_4:
+; RV32IF-NEXT:    bnez a4, .LBB3_6
+; RV32IF-NEXT:  .LBB3_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB3_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB3_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB3_4
+; RV32IF-NEXT:  .LBB3_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB3_5
+; RV32IF-NEXT:    j .LBB3_6
+;
+; RV64IF-LABEL: test_floor_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB3_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB3_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(float %x) {
+; RV32IF-LABEL: test_ceil_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB4_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB4_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rup
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_ceil_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB4_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB4_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(float %x) nounwind {
+; RV32IF-LABEL: test_ceil_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call ceilf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB5_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB5_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI5_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB5_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB5_10
+; RV32IF-NEXT:  .LBB5_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB5_11
+; RV32IF-NEXT:  .LBB5_5:
+; RV32IF-NEXT:    bnez a3, .LBB5_12
+; RV32IF-NEXT:  .LBB5_6:
+; RV32IF-NEXT:    bnez a2, .LBB5_8
+; RV32IF-NEXT:  .LBB5_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB5_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB5_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB5_4
+; RV32IF-NEXT:  .LBB5_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB5_5
+; RV32IF-NEXT:  .LBB5_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB5_6
+; RV32IF-NEXT:  .LBB5_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB5_7
+; RV32IF-NEXT:    j .LBB5_8
+;
+; RV64IF-LABEL: test_ceil_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB5_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB5_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(float %x) {
+; RV32IF-LABEL: test_ceil_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB6_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB6_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rup
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_ceil_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB6_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB6_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_ceil_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call ceilf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB7_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB7_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI7_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB7_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB7_8
+; RV32IF-NEXT:  .LBB7_4:
+; RV32IF-NEXT:    bnez a4, .LBB7_6
+; RV32IF-NEXT:  .LBB7_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB7_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB7_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB7_4
+; RV32IF-NEXT:  .LBB7_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB7_5
+; RV32IF-NEXT:    j .LBB7_6
+;
+; RV64IF-LABEL: test_ceil_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB7_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB7_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(float %x) {
+; RV32IF-LABEL: test_trunc_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB8_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB8_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rtz
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_trunc_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB8_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB8_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(float %x) nounwind {
+; RV32IF-LABEL: test_trunc_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call truncf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI9_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB9_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB9_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI9_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB9_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB9_10
+; RV32IF-NEXT:  .LBB9_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB9_11
+; RV32IF-NEXT:  .LBB9_5:
+; RV32IF-NEXT:    bnez a3, .LBB9_12
+; RV32IF-NEXT:  .LBB9_6:
+; RV32IF-NEXT:    bnez a2, .LBB9_8
+; RV32IF-NEXT:  .LBB9_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB9_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB9_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB9_4
+; RV32IF-NEXT:  .LBB9_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB9_5
+; RV32IF-NEXT:  .LBB9_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB9_6
+; RV32IF-NEXT:  .LBB9_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB9_7
+; RV32IF-NEXT:    j .LBB9_8
+;
+; RV64IF-LABEL: test_trunc_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB9_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB9_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(float %x) {
+; RV32IF-LABEL: test_trunc_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB10_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB10_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rtz
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_trunc_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB10_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB10_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_trunc_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call truncf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB11_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB11_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI11_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB11_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB11_8
+; RV32IF-NEXT:  .LBB11_4:
+; RV32IF-NEXT:    bnez a4, .LBB11_6
+; RV32IF-NEXT:  .LBB11_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB11_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB11_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB11_4
+; RV32IF-NEXT:  .LBB11_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB11_5
+; RV32IF-NEXT:    j .LBB11_6
+;
+; RV64IF-LABEL: test_trunc_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB11_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB11_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(float %x) {
+; RV32IF-LABEL: test_round_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB12_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB12_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_round_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB12_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB12_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(float %x) nounwind {
+; RV32IF-LABEL: test_round_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB13_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB13_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI13_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB13_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB13_10
+; RV32IF-NEXT:  .LBB13_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB13_11
+; RV32IF-NEXT:  .LBB13_5:
+; RV32IF-NEXT:    bnez a3, .LBB13_12
+; RV32IF-NEXT:  .LBB13_6:
+; RV32IF-NEXT:    bnez a2, .LBB13_8
+; RV32IF-NEXT:  .LBB13_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB13_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB13_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB13_4
+; RV32IF-NEXT:  .LBB13_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB13_5
+; RV32IF-NEXT:  .LBB13_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB13_6
+; RV32IF-NEXT:  .LBB13_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB13_7
+; RV32IF-NEXT:    j .LBB13_8
+;
+; RV64IF-LABEL: test_round_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB13_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB13_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(float %x) {
+; RV32IF-LABEL: test_round_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB14_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB14_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rmm
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_round_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB14_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB14_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_round_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB15_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB15_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI15_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB15_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB15_8
+; RV32IF-NEXT:  .LBB15_4:
+; RV32IF-NEXT:    bnez a4, .LBB15_6
+; RV32IF-NEXT:  .LBB15_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB15_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB15_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB15_4
+; RV32IF-NEXT:  .LBB15_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB15_5
+; RV32IF-NEXT:    j .LBB15_6
+;
+; RV64IF-LABEL: test_round_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB15_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB15_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(float %x) {
+; RV32IF-LABEL: test_roundeven_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB16_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB16_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rne
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_roundeven_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB16_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB16_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(float %x) nounwind {
+; RV32IF-LABEL: test_roundeven_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundevenf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI17_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB17_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB17_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI17_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB17_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB17_10
+; RV32IF-NEXT:  .LBB17_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB17_11
+; RV32IF-NEXT:  .LBB17_5:
+; RV32IF-NEXT:    bnez a3, .LBB17_12
+; RV32IF-NEXT:  .LBB17_6:
+; RV32IF-NEXT:    bnez a2, .LBB17_8
+; RV32IF-NEXT:  .LBB17_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB17_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB17_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB17_4
+; RV32IF-NEXT:  .LBB17_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB17_5
+; RV32IF-NEXT:  .LBB17_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB17_6
+; RV32IF-NEXT:  .LBB17_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB17_7
+; RV32IF-NEXT:    j .LBB17_8
+;
+; RV64IF-LABEL: test_roundeven_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB17_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB17_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(float %x) {
+; RV32IF-LABEL: test_roundeven_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB18_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB18_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rne
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_roundeven_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB18_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB18_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_roundeven_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundevenf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB19_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB19_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI19_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB19_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB19_8
+; RV32IF-NEXT:  .LBB19_4:
+; RV32IF-NEXT:    bnez a4, .LBB19_6
+; RV32IF-NEXT:  .LBB19_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB19_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB19_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB19_4
+; RV32IF-NEXT:  .LBB19_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB19_5
+; RV32IF-NEXT:    j .LBB19_6
+;
+; RV64IF-LABEL: test_roundeven_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB19_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB19_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+declare float @llvm.floor.f32(float)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.round.f32(float)
+declare float @llvm.roundeven.f32(float)
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i64 @llvm.fptosi.sat.i64.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
+declare i64 @llvm.fptoui.sat.i64.f32(float)
diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
new file mode 100644
index 0000000000000..7b3104c69bef6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
@@ -0,0 +1,970 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IZFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IZFH %s
+
+define signext i32 @test_floor_si32(half %x) {
+; RV32IZFH-LABEL: test_floor_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB0_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB0_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rdn
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_floor_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB0_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB0_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_floor_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call floorf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB1_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB1_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB1_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB1_10
+; RV32IZFH-NEXT:  .LBB1_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB1_11
+; RV32IZFH-NEXT:  .LBB1_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB1_12
+; RV32IZFH-NEXT:  .LBB1_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB1_8
+; RV32IZFH-NEXT:  .LBB1_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB1_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB1_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB1_4
+; RV32IZFH-NEXT:  .LBB1_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB1_5
+; RV32IZFH-NEXT:  .LBB1_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB1_6
+; RV32IZFH-NEXT:  .LBB1_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB1_7
+; RV32IZFH-NEXT:    j .LBB1_8
+;
+; RV64IZFH-LABEL: test_floor_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB1_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB1_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(half %x) {
+; RV32IZFH-LABEL: test_floor_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB2_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB2_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rdn
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_floor_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB2_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB2_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_floor_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call floorf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB3_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB3_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB3_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB3_8
+; RV32IZFH-NEXT:  .LBB3_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB3_6
+; RV32IZFH-NEXT:  .LBB3_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB3_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB3_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB3_4
+; RV32IZFH-NEXT:  .LBB3_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB3_5
+; RV32IZFH-NEXT:    j .LBB3_6
+;
+; RV64IZFH-LABEL: test_floor_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB3_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB3_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(half %x) {
+; RV32IZFH-LABEL: test_ceil_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB4_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB4_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rup
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_ceil_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB4_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB4_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_ceil_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call ceilf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB5_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB5_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI5_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB5_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB5_10
+; RV32IZFH-NEXT:  .LBB5_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB5_11
+; RV32IZFH-NEXT:  .LBB5_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB5_12
+; RV32IZFH-NEXT:  .LBB5_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB5_8
+; RV32IZFH-NEXT:  .LBB5_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB5_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB5_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB5_4
+; RV32IZFH-NEXT:  .LBB5_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB5_5
+; RV32IZFH-NEXT:  .LBB5_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB5_6
+; RV32IZFH-NEXT:  .LBB5_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB5_7
+; RV32IZFH-NEXT:    j .LBB5_8
+;
+; RV64IZFH-LABEL: test_ceil_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB5_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB5_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(half %x) {
+; RV32IZFH-LABEL: test_ceil_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB6_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB6_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rup
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_ceil_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB6_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB6_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_ceil_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call ceilf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB7_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB7_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI7_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB7_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB7_8
+; RV32IZFH-NEXT:  .LBB7_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB7_6
+; RV32IZFH-NEXT:  .LBB7_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB7_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB7_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB7_4
+; RV32IZFH-NEXT:  .LBB7_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB7_5
+; RV32IZFH-NEXT:    j .LBB7_6
+;
+; RV64IZFH-LABEL: test_ceil_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB7_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB7_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(half %x) {
+; RV32IZFH-LABEL: test_trunc_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB8_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB8_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rtz
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_trunc_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB8_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB8_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_trunc_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call truncf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI9_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB9_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB9_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI9_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB9_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB9_10
+; RV32IZFH-NEXT:  .LBB9_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB9_11
+; RV32IZFH-NEXT:  .LBB9_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB9_12
+; RV32IZFH-NEXT:  .LBB9_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB9_8
+; RV32IZFH-NEXT:  .LBB9_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB9_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB9_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB9_4
+; RV32IZFH-NEXT:  .LBB9_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB9_5
+; RV32IZFH-NEXT:  .LBB9_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB9_6
+; RV32IZFH-NEXT:  .LBB9_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB9_7
+; RV32IZFH-NEXT:    j .LBB9_8
+;
+; RV64IZFH-LABEL: test_trunc_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB9_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB9_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(half %x) {
+; RV32IZFH-LABEL: test_trunc_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB10_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB10_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_trunc_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB10_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB10_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_trunc_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call truncf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB11_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB11_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI11_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB11_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB11_8
+; RV32IZFH-NEXT:  .LBB11_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB11_6
+; RV32IZFH-NEXT:  .LBB11_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB11_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB11_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB11_4
+; RV32IZFH-NEXT:  .LBB11_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB11_5
+; RV32IZFH-NEXT:    j .LBB11_6
+;
+; RV64IZFH-LABEL: test_trunc_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB11_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB11_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(half %x) {
+; RV32IZFH-LABEL: test_round_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB12_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB12_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_round_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB12_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB12_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_round_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB13_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB13_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI13_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB13_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB13_10
+; RV32IZFH-NEXT:  .LBB13_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB13_11
+; RV32IZFH-NEXT:  .LBB13_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB13_12
+; RV32IZFH-NEXT:  .LBB13_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB13_8
+; RV32IZFH-NEXT:  .LBB13_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB13_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB13_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB13_4
+; RV32IZFH-NEXT:  .LBB13_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB13_5
+; RV32IZFH-NEXT:  .LBB13_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB13_6
+; RV32IZFH-NEXT:  .LBB13_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB13_7
+; RV32IZFH-NEXT:    j .LBB13_8
+;
+; RV64IZFH-LABEL: test_round_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB13_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB13_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(half %x) {
+; RV32IZFH-LABEL: test_round_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB14_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB14_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rmm
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_round_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB14_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB14_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_round_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB15_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB15_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI15_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB15_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB15_8
+; RV32IZFH-NEXT:  .LBB15_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB15_6
+; RV32IZFH-NEXT:  .LBB15_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB15_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB15_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB15_4
+; RV32IZFH-NEXT:  .LBB15_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB15_5
+; RV32IZFH-NEXT:    j .LBB15_6
+;
+; RV64IZFH-LABEL: test_round_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB15_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB15_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(half %x) {
+; RV32IZFH-LABEL: test_roundeven_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB16_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB16_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rne
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_roundeven_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB16_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB16_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_roundeven_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundevenf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI17_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB17_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB17_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI17_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB17_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB17_10
+; RV32IZFH-NEXT:  .LBB17_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB17_11
+; RV32IZFH-NEXT:  .LBB17_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB17_12
+; RV32IZFH-NEXT:  .LBB17_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB17_8
+; RV32IZFH-NEXT:  .LBB17_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB17_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB17_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB17_4
+; RV32IZFH-NEXT:  .LBB17_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB17_5
+; RV32IZFH-NEXT:  .LBB17_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB17_6
+; RV32IZFH-NEXT:  .LBB17_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB17_7
+; RV32IZFH-NEXT:    j .LBB17_8
+;
+; RV64IZFH-LABEL: test_roundeven_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB17_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB17_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(half %x) {
+; RV32IZFH-LABEL: test_roundeven_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB18_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB18_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rne
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_roundeven_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB18_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB18_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_roundeven_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundevenf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB19_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB19_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI19_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB19_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB19_8
+; RV32IZFH-NEXT:  .LBB19_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB19_6
+; RV32IZFH-NEXT:  .LBB19_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB19_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB19_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB19_4
+; RV32IZFH-NEXT:  .LBB19_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB19_5
+; RV32IZFH-NEXT:    j .LBB19_6
+;
+; RV64IZFH-LABEL: test_roundeven_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB19_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB19_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+declare half @llvm.floor.f16(half)
+declare half @llvm.ceil.f16(half)
+declare half @llvm.trunc.f16(half)
+declare half @llvm.round.f16(half)
+declare half @llvm.roundeven.f16(half)
+declare i32 @llvm.fptosi.sat.i32.f16(half)
+declare i64 @llvm.fptosi.sat.i64.f16(half)
+declare i32 @llvm.fptoui.sat.i32.f16(half)
+declare i64 @llvm.fptoui.sat.i64.f16(half)

From 63a991d0358970d76700d084f05eb95cd29234c0 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer 
Date: Mon, 3 Jan 2022 20:28:00 -0500
Subject: [PATCH 060/946] [libc++] Eliminate the `__function_like` helper.

As prefigured in the comments on D115315.
This gives us one unified style for all niebloids,
and also simplifies the modulemap.

Differential Revision: https://reviews.llvm.org/D116570
---
 libcxx/include/CMakeLists.txt                 |   1 -
 libcxx/include/__function_like.h              |  51 -----
 libcxx/include/__iterator/advance.h           |   7 +-
 libcxx/include/__iterator/next.h              |   8 +-
 libcxx/include/__iterator/prev.h              |   8 +-
 libcxx/include/__memory/ranges_construct_at.h |  26 +--
 .../ranges_uninitialized_algorithms.h         |  78 ++------
 libcxx/include/module.modulemap               |  26 +--
 .../function_like.h.module.verify.cpp         |  15 --
 .../special_function.compile.pass.cpp         |  23 ---
 .../special_function.compile.pass.cpp         |  23 ---
 .../special_function.compile.pass.cpp         |  23 ---
 .../niebloid.compile.pass.cpp                 | 188 ++++++++++++++++++
 libcxx/test/support/is_niebloid.h             |  39 ----
 14 files changed, 230 insertions(+), 286 deletions(-)
 delete mode 100644 libcxx/include/__function_like.h
 delete mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp
 delete mode 100644 libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp
 delete mode 100644 libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp
 delete mode 100644 libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp
 create mode 100644 libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
 delete mode 100644 libcxx/test/support/is_niebloid.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 7d56123a69d1a..b7222540846fa 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -182,7 +182,6 @@ set(files
   __format/formatter_integral.h
   __format/formatter_string.h
   __format/parser_std_format_spec.h
-  __function_like.h
   __functional/binary_function.h
   __functional/binary_negate.h
   __functional/bind.h
diff --git a/libcxx/include/__function_like.h b/libcxx/include/__function_like.h
deleted file mode 100644
index 4075355174d99..0000000000000
--- a/libcxx/include/__function_like.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// -*- C++ -*-
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP___ITERATOR_FUNCTION_LIKE_H
-#define _LIBCPP___ITERATOR_FUNCTION_LIKE_H
-
-#include <__config>
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#pragma GCC system_header
-#endif
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-#if !defined(_LIBCPP_HAS_NO_RANGES)
-
-namespace ranges {
-// Per [range.iter.ops.general] and [algorithms.requirements], functions in namespace std::ranges
-// can't be found by ADL and inhibit ADL when found by unqualified lookup. The easiest way to
-// facilitate this is to use function objects.
-//
-// Since these are still standard library functions, we use `__function_like` to eliminate most of
-// the properties that function objects get by default (e.g. semiregularity, addressability), to
-// limit the surface area of the unintended public interface, so as to curb the effect of Hyrum's
-// law.
-struct __function_like {
-  __function_like() = delete;
-  __function_like(__function_like const&) = delete;
-  __function_like& operator=(__function_like const&) = delete;
-
-  void operator&() const = delete;
-
-  struct __tag { };
-
-protected:
-  constexpr explicit __function_like(__tag) noexcept {}
-  ~__function_like() = default;
-};
-} // namespace ranges
-
-#endif // !defined(_LIBCPP_HAS_NO_RANGES)
-
-_LIBCPP_END_NAMESPACE_STD
-
-#endif // _LIBCPP___ITERATOR_FUNCTION_LIKE_H
diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h
index ee3fba30dc1e1..831f88f462744 100644
--- a/libcxx/include/__iterator/advance.h
+++ b/libcxx/include/__iterator/advance.h
@@ -12,7 +12,6 @@
 
 #include <__config>
 #include <__debug>
-#include <__function_like.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/iterator_traits.h>
@@ -72,7 +71,7 @@ void advance(_InputIter& __i, _Distance __orig_n) {
 namespace ranges {
 namespace __advance {
 
-struct __fn final : private __function_like {
+struct __fn {
 private:
   template 
   _LIBCPP_HIDE_FROM_ABI
@@ -99,8 +98,6 @@ struct __fn final : private __function_like {
   }
 
 public:
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
   // Preconditions: If `I` does not model `bidirectional_iterator`, `n` is not negative.
   template 
   _LIBCPP_HIDE_FROM_ABI
@@ -191,7 +188,7 @@ struct __fn final : private __function_like {
 } // namespace __advance
 
 inline namespace __cpo {
-  inline constexpr auto advance = __advance::__fn(__function_like::__tag());
+  inline constexpr auto advance = __advance::__fn{};
 } // namespace __cpo
 } // namespace ranges
 
diff --git a/libcxx/include/__iterator/next.h b/libcxx/include/__iterator/next.h
index 12c213a1e4d7c..b9bdd6b27e05a 100644
--- a/libcxx/include/__iterator/next.h
+++ b/libcxx/include/__iterator/next.h
@@ -12,7 +12,6 @@
 
 #include <__config>
 #include <__debug>
-#include <__function_like.h>
 #include <__iterator/advance.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
@@ -43,10 +42,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 namespace ranges {
 namespace __next {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template 
   _LIBCPP_HIDE_FROM_ABI
   constexpr _Ip operator()(_Ip __x) const {
@@ -79,7 +75,7 @@ struct __fn final : private __function_like {
 } // namespace __next
 
 inline namespace __cpo {
-  inline constexpr auto next = __next::__fn(__function_like::__tag());
+  inline constexpr auto next = __next::__fn{};
 } // namespace __cpo
 } // namespace ranges
 
diff --git a/libcxx/include/__iterator/prev.h b/libcxx/include/__iterator/prev.h
index 84c69f9c13af6..870cbe64eaeee 100644
--- a/libcxx/include/__iterator/prev.h
+++ b/libcxx/include/__iterator/prev.h
@@ -12,7 +12,6 @@
 
 #include <__config>
 #include <__debug>
-#include <__function_like.h>
 #include <__iterator/advance.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
@@ -42,10 +41,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 namespace ranges {
 namespace __prev {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template 
   _LIBCPP_HIDE_FROM_ABI
   constexpr _Ip operator()(_Ip __x) const {
@@ -71,7 +67,7 @@ struct __fn final : private __function_like {
 } // namespace __prev
 
 inline namespace __cpo {
-  inline constexpr auto prev = __prev::__fn(__function_like::__tag());
+  inline constexpr auto prev = __prev::__fn{};
 } // namespace __cpo
 } // namespace ranges
 
diff --git a/libcxx/include/__memory/ranges_construct_at.h b/libcxx/include/__memory/ranges_construct_at.h
index 9b0edb7c2562d..1a72da7396821 100644
--- a/libcxx/include/__memory/ranges_construct_at.h
+++ b/libcxx/include/__memory/ranges_construct_at.h
@@ -12,7 +12,6 @@
 
 #include <__concepts/destructible.h>
 #include <__config>
-#include <__function_like.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/readable_traits.h>
 #include <__memory/concepts.h>
@@ -37,9 +36,7 @@ namespace ranges {
 
 namespace __construct_at {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template()) _Tp(declval<_Args>()...)
   )>
@@ -52,16 +49,14 @@ struct __fn final : private __function_like {
 } // namespace __construct_at
 
 inline namespace __cpo {
-  inline constexpr auto construct_at = __construct_at::__fn(__function_like::__tag());
+  inline constexpr auto construct_at = __construct_at::__fn{};
 } // namespace __cpo
 
 // destroy_at
 
 namespace __destroy_at {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template 
   _LIBCPP_HIDE_FROM_ABI
   constexpr void operator()(_Tp* __location) const noexcept {
@@ -72,16 +67,14 @@ struct __fn final : private __function_like {
 } // namespace __destroy_at
 
 inline namespace __cpo {
-  inline constexpr auto destroy_at = __destroy_at::__fn(__function_like::__tag());
+  inline constexpr auto destroy_at = __destroy_at::__fn{};
 } // namespace __cpo
 
 // destroy
 
 namespace __destroy {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_input_iterator _InputIterator, __nothrow_sentinel_for<_InputIterator> _Sentinel>
     requires destructible>
   _LIBCPP_HIDE_FROM_ABI
@@ -100,16 +93,14 @@ struct __fn final : private __function_like {
 } // namespace __destroy
 
 inline namespace __cpo {
-  inline constexpr auto destroy = __destroy::__fn(__function_like::__tag());
+  inline constexpr auto destroy = __destroy::__fn{};
 } // namespace __cpo
 
 // destroy_n
 
 namespace __destroy_n {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_input_iterator _InputIterator>
     requires destructible>
   _LIBCPP_HIDE_FROM_ABI
@@ -121,10 +112,11 @@ struct __fn final : private __function_like {
 } // namespace __destroy_n
 
 inline namespace __cpo {
-  inline constexpr auto destroy_n = __destroy_n::__fn(__function_like::__tag());
+  inline constexpr auto destroy_n = __destroy_n::__fn{};
 } // namespace __cpo
 
 } // namespace ranges
+
 #endif // !defined(_LIBCPP_HAS_NO_RANGES)
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__memory/ranges_uninitialized_algorithms.h b/libcxx/include/__memory/ranges_uninitialized_algorithms.h
index 8cd2748e45c97..6a8f9f070ed70 100644
--- a/libcxx/include/__memory/ranges_uninitialized_algorithms.h
+++ b/libcxx/include/__memory/ranges_uninitialized_algorithms.h
@@ -13,7 +13,6 @@
 #include <__algorithm/in_out_result.h>
 #include <__concepts/constructible.h>
 #include <__config>
-#include <__function_like.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/iter_move.h>
@@ -40,9 +39,7 @@ namespace ranges {
 
 namespace __uninitialized_default_construct {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator,
             __nothrow_sentinel_for<_ForwardIterator> _Sentinel>
     requires default_initializable>
@@ -62,16 +59,14 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_default_construct
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_default_construct = __uninitialized_default_construct::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_default_construct = __uninitialized_default_construct::__fn{};
 } // namespace __cpo
 
 // uninitialized_default_construct_n
 
 namespace __uninitialized_default_construct_n {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator>
     requires default_initializable>
   _ForwardIterator operator()(_ForwardIterator __first,
@@ -84,18 +79,14 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_default_construct_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_default_construct_n =
-      __uninitialized_default_construct_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_default_construct_n = __uninitialized_default_construct_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_value_construct
 
 namespace __uninitialized_value_construct {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator,
             __nothrow_sentinel_for<_ForwardIterator> _Sentinel>
     requires default_initializable>
@@ -110,24 +101,19 @@ struct __fn final : private __function_like {
   borrowed_iterator_t<_ForwardRange> operator()(_ForwardRange&& __range) const {
     return (*this)(ranges::begin(__range), ranges::end(__range));
   }
-
 };
 
 } // namespace __uninitialized_value_construct
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_value_construct =
-      __uninitialized_value_construct::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_value_construct = __uninitialized_value_construct::__fn{};
 } // namespace __cpo
 
 // uninitialized_value_construct_n
 
 namespace __uninitialized_value_construct_n {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator>
     requires default_initializable>
   _ForwardIterator operator()(_ForwardIterator __first,
@@ -135,24 +121,19 @@ struct __fn final : private __function_like {
     using _ValueType = remove_reference_t>;
     return _VSTD::__uninitialized_value_construct_n<_ValueType>(_VSTD::move(__first), __n);
   }
-
 };
 
 } // namespace __uninitialized_value_construct_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_value_construct_n =
-      __uninitialized_value_construct_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_value_construct_n = __uninitialized_value_construct_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_fill
 
 namespace __uninitialized_fill {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator,
             __nothrow_sentinel_for<_ForwardIterator> _Sentinel,
             class _Tp>
@@ -167,23 +148,19 @@ struct __fn final : private __function_like {
   borrowed_iterator_t<_ForwardRange> operator()(_ForwardRange&& __range, const _Tp& __x) const {
     return (*this)(ranges::begin(__range), ranges::end(__range), __x);
   }
-
 };
 
 } // namespace __uninitialized_fill
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_fill = __uninitialized_fill::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_fill = __uninitialized_fill::__fn{};
 } // namespace __cpo
 
 // uninitialized_fill_n
 
 namespace __uninitialized_fill_n {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator, class _Tp>
     requires constructible_from, const _Tp&>
   _ForwardIterator operator()(_ForwardIterator __first,
@@ -192,13 +169,12 @@ struct __fn final : private __function_like {
     using _ValueType = remove_reference_t>;
     return _VSTD::__uninitialized_fill_n<_ValueType>(_VSTD::move(__first), __n, __x);
   }
-
 };
 
 } // namespace __uninitialized_fill_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_fill_n = __uninitialized_fill_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_fill_n = __uninitialized_fill_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_copy
@@ -208,9 +184,7 @@ using uninitialized_copy_result = in_out_result<_InputIterator, _OutputIterator>
 
 namespace __uninitialized_copy {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template  _Sentinel1,
             __nothrow_forward_iterator _OutputIterator,
@@ -237,7 +211,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_copy
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_copy = __uninitialized_copy::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_copy = __uninitialized_copy::__fn{};
 } // namespace __cpo
 
 // uninitialized_copy_n
@@ -247,9 +221,7 @@ using uninitialized_copy_n_result = in_out_result<_InputIterator, _OutputIterato
 
 namespace __uninitialized_copy_n {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template  _Sentinel>
@@ -267,7 +239,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_copy_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_copy_n = __uninitialized_copy_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_copy_n = __uninitialized_copy_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_move
@@ -277,9 +249,7 @@ using uninitialized_move_result = in_out_result<_InputIterator, _OutputIterator>
 
 namespace __uninitialized_move {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template  _Sentinel1,
             __nothrow_forward_iterator _OutputIterator,
@@ -289,7 +259,6 @@ struct __fn final : private __function_like {
   operator()(_InputIterator __ifirst, _Sentinel1 __ilast, _OutputIterator __ofirst, _Sentinel2 __olast) const {
     using _ValueType = remove_reference_t>;
     auto __iter_move = [](auto&& __iter) -> decltype(auto) { return ranges::iter_move(__iter); };
-
     auto __result = _VSTD::__uninitialized_move<_ValueType>(_VSTD::move(__ifirst), _VSTD::move(__ilast),
                                                             _VSTD::move(__ofirst), _VSTD::move(__olast), __iter_move);
     return {_VSTD::move(__result.first), _VSTD::move(__result.second)};
@@ -307,7 +276,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_move
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_move = __uninitialized_move::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_move = __uninitialized_move::__fn{};
 } // namespace __cpo
 
 // uninitialized_move_n
@@ -317,9 +286,7 @@ using uninitialized_move_n_result = in_out_result<_InputIterator, _OutputIterato
 
 namespace __uninitialized_move_n {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template  _Sentinel>
@@ -329,9 +296,8 @@ struct __fn final : private __function_like {
              _OutputIterator __ofirst, _Sentinel __olast) const {
     using _ValueType = remove_reference_t>;
     auto __iter_move = [](auto&& __iter) -> decltype(auto) { return ranges::iter_move(__iter); };
-
-    auto __result = _VSTD::__uninitialized_move_n<_ValueType>(_VSTD::move(__ifirst), __n, _VSTD::move(__ofirst),
-                                                              _VSTD::move(__olast), __iter_move);
+    auto __result = _VSTD::__uninitialized_move_n<_ValueType>(_VSTD::move(__ifirst), __n,
+                                                              _VSTD::move(__ofirst), _VSTD::move(__olast), __iter_move);
     return {_VSTD::move(__result.first), _VSTD::move(__result.second)};
   }
 };
@@ -339,7 +305,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_move_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_move_n = __uninitialized_move_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_move_n = __uninitialized_move_n::__fn{};
 } // namespace __cpo
 
 } // namespace ranges
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index c17ecc98aa5d9..a927f9d0e6700 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -592,10 +592,7 @@ module std [system] {
 
     module __iterator {
       module access                { private header "__iterator/access.h" }
-      module advance               {
-        private header "__iterator/advance.h"
-        export __function_like
-      }
+      module advance               { private header "__iterator/advance.h" }
       module back_insert_iterator  { private header "__iterator/back_insert_iterator.h" }
       module common_iterator       { private header "__iterator/common_iterator.h" }
       module concepts              { private header "__iterator/concepts.h" }
@@ -616,16 +613,10 @@ module std [system] {
       module iterator              { private header "__iterator/iterator.h" }
       module iterator_traits       { private header "__iterator/iterator_traits.h" }
       module move_iterator         { private header "__iterator/move_iterator.h" }
-      module next                  {
-        private header "__iterator/next.h"
-        export __function_like
-      }
+      module next                  { private header "__iterator/next.h" }
       module ostream_iterator      { private header "__iterator/ostream_iterator.h" }
       module ostreambuf_iterator   { private header "__iterator/ostreambuf_iterator.h" }
-      module prev                  {
-        private header "__iterator/prev.h"
-        export __function_like
-      }
+      module prev                  { private header "__iterator/prev.h" }
       module projected             { private header "__iterator/projected.h" }
       module readable_traits       { private header "__iterator/readable_traits.h" }
       module reverse_access        { private header "__iterator/reverse_access.h" }
@@ -673,14 +664,8 @@ module std [system] {
       module concepts                        { private header "__memory/concepts.h" }
       module construct_at                    { private header "__memory/construct_at.h" }
       module pointer_traits                  { private header "__memory/pointer_traits.h" }
-      module ranges_construct_at {
-        private header "__memory/ranges_construct_at.h"
-        export __function_like
-      }
-      module ranges_uninitialized_algorithms {
-        private header "__memory/ranges_uninitialized_algorithms.h"
-        export __function_like
-      }
+      module ranges_construct_at             { private header "__memory/ranges_construct_at.h" }
+      module ranges_uninitialized_algorithms { private header "__memory/ranges_uninitialized_algorithms.h" }
       module raw_storage_iterator            { private header "__memory/raw_storage_iterator.h" }
       module shared_ptr                      { private header "__memory/shared_ptr.h" }
       module temporary_buffer                { private header "__memory/temporary_buffer.h" }
@@ -988,7 +973,6 @@ module std [system] {
   module __bits              { private header "__bits"              export * }
   module __debug             {         header "__debug"             export * }
   module __errc              { private header "__errc"              export * }
-  module __function_like     { private header "__function_like.h"   export * }
   module __hash_table        {         header "__hash_table"        export * }
   module __locale            { private header "__locale"            export * }
   module __mbstate_t         { private header "__mbstate_t.h"       export * }
diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp
deleted file mode 100644
index 40622e2e5c98a..0000000000000
--- a/libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// REQUIRES: modules-build
-
-// WARNING: This test was generated by 'generate_private_header_tests.py'
-// and should not be edited manually.
-
-// expected-error@*:* {{use of private header from outside its module: '__function_like.h'}}
-#include <__function_like.h>
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp
deleted file mode 100644
index 8ce5a8e9a643d..0000000000000
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// ranges::advance
-
-#include 
-
-#include "is_niebloid.h"
-#include "test_macros.h"
-
-// Because this is a variable and not a function, it's guaranteed that ADL won't be used. However,
-// implementations are allowed to use a different mechanism to achieve this effect, so this check is
-// libc++-specific.
-LIBCPP_STATIC_ASSERT(std::is_class_v);
-LIBCPP_STATIC_ASSERT(is_niebloid());
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp
deleted file mode 100644
index 6f5e4b3783f3d..0000000000000
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// ranges::next
-
-#include 
-
-#include "is_niebloid.h"
-#include "test_macros.h"
-
-// Because this is a variable and not a function, it's guaranteed that ADL won't be used. However,
-// implementations are allowed to use a different mechanism to achieve this effect, so this check is
-// libc++-specific.
-LIBCPP_STATIC_ASSERT(std::is_class_v);
-LIBCPP_STATIC_ASSERT(is_niebloid());
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp
deleted file mode 100644
index a87464feb25f5..0000000000000
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// ranges::prev
-
-#include 
-
-#include "is_niebloid.h"
-#include "test_macros.h"
-
-// Because this is a variable and not a function, it's guaranteed that ADL won't be used. However,
-// implementations are allowed to use a different mechanism to achieve this effect, so this check is
-// libc++-specific.
-LIBCPP_STATIC_ASSERT(std::is_class_v);
-LIBCPP_STATIC_ASSERT(is_niebloid());
diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
new file mode 100644
index 0000000000000..fcca5813dcb31
--- /dev/null
+++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
@@ -0,0 +1,188 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts, libcpp-has-no-incomplete-ranges
+// REQUIRES: stdlib=libc++
+
+// [algorithms.requirements]/2
+// [range.iter.ops.general]/2
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+// Niebloids, unlike CPOs, are *not* required to be semiregular or even to have
+// a declared type at all; they are specified as "magic" overload sets whose
+// names are not found by argument-dependent lookup and which inhibit
+// argument-dependent lookup if they are found via a `using`-declaration.
+//
+// libc++ implements them using the same function-object technique we use for CPOs;
+// therefore this file should stay in sync with ./cpo.compile.pass.cpp.
+
+template 
+constexpr bool test(CPO& o, Args&&...) {
+  static_assert(std::is_class_v);
+  static_assert(std::is_trivial_v);
+
+  auto p = o;
+  using T = decltype(p);
+
+  // The type of a customization point object, ignoring cv-qualifiers, shall model semiregular.
+  static_assert(std::semiregular);
+
+  // The type T of a customization point object, ignoring cv-qualifiers, shall model...
+  static_assert(std::invocable);
+  static_assert(std::invocable);
+  static_assert(std::invocable);
+  static_assert(std::invocable);
+
+  return true;
+}
+
+int *p;
+int a[10];
+//auto odd = [](int x) { return x % 2 != 0; };
+//auto triple = [](int x) { return 3*x; };
+//auto plus = [](int x, int y) { return x == y; };
+//std::mt19937 g;
+
+// [algorithm.syn]
+
+//static_assert(test(std::ranges::adjacent_find, a));
+//static_assert(test(std::ranges::all_of, a, odd));
+//static_assert(test(std::ranges::any_of, a, odd));
+//static_assert(test(std::ranges::binary_search, a, 42));
+//static_assert(test(std::ranges::clamp, 42, 42, 42));
+//static_assert(test(std::ranges::copy, a, a));
+//static_assert(test(std::ranges::copy_backward, a, a));
+//static_assert(test(std::ranges::copy_if, a, a, odd));
+//static_assert(test(std::ranges::copy_n, a, 10, a));
+//static_assert(test(std::ranges::count, a, 42));
+//static_assert(test(std::ranges::count_if, a, odd));
+//static_assert(test(std::ranges::ends_with, a, a));
+//static_assert(test(std::ranges::equal, a, a));
+//static_assert(test(std::ranges::equal_range, a, 42));
+//static_assert(test(std::ranges::fill, a, 42));
+//static_assert(test(std::ranges::fill_n, a, 10, 42));
+//static_assert(test(std::ranges::find, a, 42));
+//static_assert(test(std::ranges::find_end, a, a));
+//static_assert(test(std::ranges::find_first_of, a, a));
+//static_assert(test(std::ranges::find_if, a, odd));
+//static_assert(test(std::ranges::find_if_not, a, odd));
+//static_assert(test(std::ranges::for_each, a, odd));
+//static_assert(test(std::ranges::for_each_n, a, 10, odd));
+//static_assert(test(std::ranges::generate, a, 42));
+//static_assert(test(std::ranges::generate_n, a, 10, 42));
+//static_assert(test(std::ranges::includes, a, a));
+//static_assert(test(std::ranges::inplace_merge, a, a+5));
+//static_assert(test(std::ranges::is_heap, a));
+//static_assert(test(std::ranges::is_heap_until, a));
+//static_assert(test(std::ranges::is_partitioned, a, odd));
+//static_assert(test(std::ranges::is_permutation, a, a));
+//static_assert(test(std::ranges::is_sorted, a));
+//static_assert(test(std::ranges::is_sorted_until, a));
+//static_assert(test(std::ranges::lexicographical_compare, a, a));
+//static_assert(test(std::ranges::lower_bound, a, 42));
+//static_assert(test(std::ranges::make_heap, a));
+//static_assert(test(std::ranges::max, a));
+//static_assert(test(std::ranges::max_element, a));
+//static_assert(test(std::ranges::merge, a, a, a));
+//static_assert(test(std::ranges::min, a));
+//static_assert(test(std::ranges::min_element, a));
+//static_assert(test(std::ranges::minmax, a));
+//static_assert(test(std::ranges::minmax_element, a));
+//static_assert(test(std::ranges::mismatch, a, a));
+//static_assert(test(std::ranges::move, a, a));
+//static_assert(test(std::ranges::move_backward, a, a));
+//static_assert(test(std::ranges::next_permutation, a));
+//static_assert(test(std::ranges::none_of, a, odd));
+//static_assert(test(std::ranges::nth_element, a, a+5));
+//static_assert(test(std::ranges::partial_sort, a, a+5));
+//static_assert(test(std::ranges::partial_sort_copy, a, a));
+//static_assert(test(std::ranges::partition, a, odd));
+//static_assert(test(std::ranges::partition_copy, a, a, a, odd));
+//static_assert(test(std::ranges::partition_point, a, odd));
+//static_assert(test(std::ranges::pop_heap, a));
+//static_assert(test(std::ranges::prev_permutation, a));
+//static_assert(test(std::ranges::push_heap, a));
+//static_assert(test(std::ranges::remove, a, 42));
+//static_assert(test(std::ranges::remove_copy, a, a, 42));
+//static_assert(test(std::ranges::remove_copy_if, a, a, odd));
+//static_assert(test(std::ranges::remove_if, a, odd));
+//static_assert(test(std::ranges::replace, a, 42, 43));
+//static_assert(test(std::ranges::replace_copy, a, a, 42, 43));
+//static_assert(test(std::ranges::replace_copy_if, a, a, odd, 43));
+//static_assert(test(std::ranges::replace_if, a, odd, 43));
+//static_assert(test(std::ranges::reverse, a));
+//static_assert(test(std::ranges::reverse_copy, a, a));
+//static_assert(test(std::ranges::rotate, a, a+5));
+//static_assert(test(std::ranges::rotate_copy, a, a+5, a));
+//static_assert(test(std::ranges::sample, a, a, 5));
+//static_assert(test(std::ranges::search, a, a));
+//static_assert(test(std::ranges::search_n, a, 10, 42));
+//static_assert(test(std::ranges::set_difference, a, a, a));
+//static_assert(test(std::ranges::set_intersection, a, a, a));
+//static_assert(test(std::ranges::set_symmetric_difference, a, a, a));
+//static_assert(test(std::ranges::set_union, a, a, a));
+//static_assert(test(std::ranges::shuffle, a, g));
+//static_assert(test(std::ranges::sort, a));
+//static_assert(test(std::ranges::sort_heap, a));
+//static_assert(test(std::ranges::stable_partition, a, odd));
+//static_assert(test(std::ranges::stable_sort, a));
+//static_assert(test(std::ranges::starts_with, a, a));
+//static_assert(test(std::ranges::swap_ranges, a, a));
+//static_assert(test(std::ranges::transform, a, a, triple));
+//static_assert(test(std::ranges::unique, a));
+//static_assert(test(std::ranges::unique_copy, a, a));
+//static_assert(test(std::ranges::upper_bound, a, 42));
+
+// [memory.syn]
+
+static_assert(test(std::ranges::construct_at, a, 42));
+static_assert(test(std::ranges::destroy, a));
+static_assert(test(std::ranges::destroy, a, a+10));
+static_assert(test(std::ranges::destroy_at, a));
+static_assert(test(std::ranges::destroy_n, a, 10));
+static_assert(test(std::ranges::uninitialized_copy, a, a));
+static_assert(test(std::ranges::uninitialized_copy, a, a+10, a, a+10));
+static_assert(test(std::ranges::uninitialized_copy_n, a, 10, a, a+10));
+static_assert(test(std::ranges::uninitialized_default_construct, a));
+static_assert(test(std::ranges::uninitialized_default_construct, a, a+10));
+static_assert(test(std::ranges::uninitialized_default_construct_n, a, 10));
+static_assert(test(std::ranges::uninitialized_fill, a, 42));
+static_assert(test(std::ranges::uninitialized_fill, a, a+10, 42));
+static_assert(test(std::ranges::uninitialized_fill_n, a, 10, 42));
+static_assert(test(std::ranges::uninitialized_move, a, a));
+static_assert(test(std::ranges::uninitialized_move, a, a+10, a, a+10));
+static_assert(test(std::ranges::uninitialized_move_n, a, 10, a, a+10));
+static_assert(test(std::ranges::uninitialized_value_construct, a));
+static_assert(test(std::ranges::uninitialized_value_construct, a, a+10));
+static_assert(test(std::ranges::uninitialized_value_construct_n, a, 10));
+
+// [numeric.ops.overview] currently has no ranges algorithms. See P1813, P2214
+
+// [range.iter.ops]
+
+static_assert(test(std::ranges::advance, p, 5));
+static_assert(test(std::ranges::advance, p, 5, a+10));
+static_assert(test(std::ranges::advance, p, a+10));
+//static_assert(test(std::ranges::distance, a));
+//static_assert(test(std::ranges::distance, a, a+10));
+static_assert(test(std::ranges::next, a));
+static_assert(test(std::ranges::next, a, 5));
+static_assert(test(std::ranges::next, a, 5, a+10));
+static_assert(test(std::ranges::next, a, a+10));
+static_assert(test(std::ranges::prev, a+10));
+static_assert(test(std::ranges::prev, a+10, 5));
+static_assert(test(std::ranges::prev, a+10, 5, a));
diff --git a/libcxx/test/support/is_niebloid.h b/libcxx/test/support/is_niebloid.h
deleted file mode 100644
index 2405d498939d2..0000000000000
--- a/libcxx/test/support/is_niebloid.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LIBCXX_TEST_SUPPORT_IS_NIEBLOID_H
-#define LIBCXX_TEST_SUPPORT_IS_NIEBLOID_H
-
-#include "test_macros.h"
-
-#if TEST_STD_VER >= 20
-template 
-constexpr bool is_addressable = requires(T t) {
-  &t;
-};
-
-template 
-constexpr bool is_niebloid() {
-  using X = std::remove_cvref_t;
-  static_assert(!is_addressable);
-  static_assert(!is_addressable);
-
-  static_assert(std::destructible && !std::default_initializable);
-
-  static_assert(!std::move_constructible);
-  static_assert(!std::assignable_from);
-
-  static_assert(!std::copy_constructible);
-  static_assert(!std::assignable_from);
-  static_assert(!std::assignable_from);
-  static_assert(!std::assignable_from);
-  static_assert(std::is_final_v);
-  return true;
-}
-#endif
-
-#endif // LIBCXX_TEST_SUPPORT_IS_NIEBLOID_H

From ba8eb31bd9542828f6424e15a3014f80f14522c8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev 
Date: Thu, 20 Jan 2022 22:41:31 +0300
Subject: [PATCH 061/946] [InstCombine] Instruction sinking: fix check for
 function terminating block

Checking for specific function terminating opcodes
means we don't handle other non-hardcoded ones :)

This should probably be generalized to something
similar to the `IsBlockFollowedByDeoptOrUnreachable()`.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D117810
---
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp   | 5 ++---
 llvm/test/Transforms/InstCombine/sink-into-resume-block.ll | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d8544bf3211b8..89c5fef18eca8 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3966,12 +3966,11 @@ bool InstCombinerImpl::run() {
       // predecessor, so that we don't have to split the critical edge.
       // Another option where we can sink is a block that ends with a
       // terminator that does not pass control to other block (such as
-      // return or unreachable). In this case:
+      // return or unreachable or resume). In this case:
       //   - I dominates the User (by SSA form);
       //   - the User will be executed at most once.
       // So sinking I down to User is always profitable or neutral.
-      if (UserParent->getUniquePredecessor() == BB ||
-          (isa(Term) || isa(Term))) {
+      if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) {
         assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
         return UserParent;
       }
diff --git a/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
index a687977d4b975..cb7de75276cfc 100644
--- a/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
+++ b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
@@ -7,7 +7,6 @@ define void @t0_noop(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_person
 ; CHECK-LABEL: @t0_noop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
-; CHECK-NEXT:    [[V0:%.*]] = add i32 [[ARG:%.*]], 42
 ; CHECK-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    invoke void @simple_throw()
@@ -17,6 +16,7 @@ define void @t0_noop(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_person
 ; CHECK:       lpad:
 ; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
 ; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    [[V0:%.*]] = add i32 [[ARG:%.*]], 42
 ; CHECK-NEXT:    call void @consume(i32 [[V0]])
 ; CHECK-NEXT:    call void @destructor()
 ; CHECK-NEXT:    resume { i8*, i32 } [[EH]]

From 8105e404f186c6f31e08185b37f81e6da904f6d7 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell 
Date: Thu, 20 Jan 2022 07:40:12 -0800
Subject: [PATCH 062/946] [demangler][NFC] Small cleanups and sync

Some precursor work to adding module demangling.

* some mismatched comment and code in the demangler

* a const fn was not marked thusly

* we use std::islower.  A direct range check is smaller code (no function call),
  and we know we're in ASCII-land and later in that same function make the same
  assumption about upper-case contiguity.  Heck, maybe just drop the switch's
  precondition and rely on the optimizer to do its thing?

* the directory is cloned in two places, which had gotten out of sync.

Differential Revision: https://reviews.llvm.org/D117800
---
 libcxxabi/src/demangle/ItaniumDemangle.h     |  9 +++++----
 llvm/include/llvm/Demangle/ItaniumDemangle.h | 13 +++++++------
 llvm/include/llvm/Demangle/StringView.h      |  4 ++--
 llvm/include/llvm/Demangle/Utility.h         |  4 ++--
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 85e1511346c0b..b25139d8a72ba 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -310,7 +310,7 @@ class Node {
       printRight(OB);
   }
 
-  // Print the "left" side of this Node into OutputString.
+  // Print the "left" side of this Node into OutputBuffer.
   virtual void printLeft(OutputBuffer &) const = 0;
 
   // Print the "right". This distinction is necessary to represent C++ types
@@ -1210,7 +1210,8 @@ class TemplateParamPackDecl final : public Node {
 class ParameterPack final : public Node {
   NodeArray Data;
 
-  // Setup OutputString for a pack expansion unless we're already expanding one.
+  // Setup OutputBuffer for a pack expansion, unless we're already expanding
+  // one.
   void initializePackExpansion(OutputBuffer &OB) const {
     if (OB.CurrentPackMax == std::numeric_limits::max()) {
       OB.CurrentPackMax = static_cast(Data.size());
@@ -2473,7 +2474,7 @@ template  struct AbstractManglingParser {
 
   char consume() { return First != Last ? *First++ : '\0'; }
 
-  char look(unsigned Lookahead = 0) {
+  char look(unsigned Lookahead = 0) const {
     if (static_cast(Last - First) <= Lookahead)
       return '\0';
     return First[Lookahead];
@@ -5437,7 +5438,7 @@ Node *AbstractManglingParser::parseSubstitution() {
   if (!consumeIf('S'))
     return nullptr;
 
-  if (std::islower(look())) {
+  if (look() >= 'a' && look() <= 'z') {
     Node *SpecialSub;
     switch (look()) {
     case 'a':
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 86f5c992b63d1..b25139d8a72ba 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
-#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#ifndef DEMANGLE_ITANIUMDEMANGLE_H
+#define DEMANGLE_ITANIUMDEMANGLE_H
 
 // FIXME: (possibly) incomplete list of features that clang mangles that this
 // file does not yet support:
@@ -1210,7 +1210,8 @@ class TemplateParamPackDecl final : public Node {
 class ParameterPack final : public Node {
   NodeArray Data;
 
-  // Setup OutputBuffer for a pack expansion unless we're already expanding one.
+  // Setup OutputBuffer for a pack expansion, unless we're already expanding
+  // one.
   void initializePackExpansion(OutputBuffer &OB) const {
     if (OB.CurrentPackMax == std::numeric_limits::max()) {
       OB.CurrentPackMax = static_cast(Data.size());
@@ -2473,7 +2474,7 @@ template  struct AbstractManglingParser {
 
   char consume() { return First != Last ? *First++ : '\0'; }
 
-  char look(unsigned Lookahead = 0) {
+  char look(unsigned Lookahead = 0) const {
     if (static_cast(Last - First) <= Lookahead)
       return '\0';
     return First[Lookahead];
@@ -5437,7 +5438,7 @@ Node *AbstractManglingParser::parseSubstitution() {
   if (!consumeIf('S'))
     return nullptr;
 
-  if (std::islower(look())) {
+  if (look() >= 'a' && look() <= 'z') {
     Node *SpecialSub;
     switch (look()) {
     case 'a':
@@ -5747,4 +5748,4 @@ struct ManglingParser : AbstractManglingParser, Alloc> {
 
 DEMANGLE_NAMESPACE_END
 
-#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#endif // DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
index 378e853416376..1e4d3803f06cd 100644
--- a/llvm/include/llvm/Demangle/StringView.h
+++ b/llvm/include/llvm/Demangle/StringView.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_STRINGVIEW_H
-#define LLVM_DEMANGLE_STRINGVIEW_H
+#ifndef DEMANGLE_STRINGVIEW_H
+#define DEMANGLE_STRINGVIEW_H
 
 #include "DemangleConfig.h"
 #include 
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 4fea9351a4bfc..733d83ad1b6ba 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_UTILITY_H
-#define LLVM_DEMANGLE_UTILITY_H
+#ifndef DEMANGLE_UTILITY_H
+#define DEMANGLE_UTILITY_H
 
 #include "StringView.h"
 #include 

From 864b5b49fd3f6639efbc16d9b1c827d0194eeb0d Mon Sep 17 00:00:00 2001
From: Casey Carter 
Date: Tue, 18 Jan 2022 22:50:15 -0800
Subject: [PATCH 063/946] [libcxx] chrono::month_weekday should not be default
 constructible

It was not in P0355R7, nor has it ever been so in a working draft.

Drive-by:
* tests should test something: fix loop bounds so initial value is not >= final value
* calender type streaming tests are useless - let's remove them
* don't declare printf, especially if you don't intend to use it

Differential Revision: https://reviews.llvm.org/D117638
---
 libcxx/docs/ReleaseNotes.rst                  |  3 +
 libcxx/include/__chrono/calendar.h            |  1 -
 .../comparisons.pass.cpp                      |  2 +-
 .../streaming.pass.cpp                        | 59 -------------------
 .../time.cal.md.members/ok.pass.cpp           |  2 +-
 .../comparisons.pass.cpp                      |  4 +-
 .../time.cal.md.nonmembers/streaming.pass.cpp | 42 -------------
 .../time.cal.mdlast/comparisons.pass.cpp      | 10 ++--
 .../time/time.cal/time.cal.mdlast/ok.pass.cpp |  2 +-
 .../time.cal.mdlast/streaming.pass.cpp        | 35 -----------
 .../comparisons.pass.cpp                      |  6 +-
 .../time.cal.month.nonmembers/minus.pass.cpp  | 15 ++---
 .../time.cal.month.nonmembers/plus.pass.cpp   | 10 ++--
 .../streaming.pass.cpp                        | 54 -----------------
 .../time.cal.mwd.members/month.pass.cpp       |  2 -
 .../time.cal.mwd.members/ok.pass.cpp          |  4 +-
 .../streaming.pass.cpp                        | 37 ------------
 .../time.cal.mwdlast.members/ctor.pass.cpp    |  8 +--
 .../comparisons.pass.cpp                      | 16 ++---
 .../streaming.pass.cpp                        | 38 ------------
 .../month_day_last.pass.cpp                   |  9 +--
 .../time.cal.wdidx.members/ok.pass.cpp        |  4 +-
 .../comparisons.pass.cpp                      |  4 +-
 .../streaming.pass.cpp                        | 37 ------------
 .../comparisons.pass.cpp                      |  4 +-
 .../streaming.pass.cpp                        | 35 -----------
 .../time.cal.weekday.members/ctor.pass.cpp    |  4 +-
 .../iso_encoding.pass.cpp                     |  6 +-
 .../comparisons.pass.cpp                      |  4 +-
 .../minus.pass.cpp                            | 12 ++--
 .../time.cal.weekday.nonmembers/plus.pass.cpp | 10 ++--
 .../streaming.pass.cpp                        | 57 ------------------
 .../comparisons.pass.cpp                      |  4 +-
 .../time.cal.year.nonmembers/minus.pass.cpp   |  2 -
 .../streaming.pass.cpp                        | 56 ------------------
 .../comparisons.pass.cpp                      | 10 ++--
 .../time.cal.ym.nonmembers/minus.pass.cpp     | 12 ++--
 .../time.cal.ym.nonmembers/streaming.pass.cpp | 58 ------------------
 .../ctor.sys_days.pass.cpp                    |  6 +-
 .../time.cal.ymd.members/ok.pass.cpp          |  6 +-
 .../op.local_days.pass.cpp                    |  4 +-
 .../time.cal.ymd.members/op.sys_days.pass.cpp |  6 +-
 .../comparisons.pass.cpp                      | 26 ++++----
 .../streaming.pass.cpp                        | 59 -------------------
 .../time.cal.ymdlast.members/day.pass.cpp     |  4 +-
 .../comparisons.pass.cpp                      | 18 +++---
 .../minus.pass.cpp                            |  4 +-
 .../streaming.pass.cpp                        | 38 ------------
 .../op.local_days.pass.cpp                    |  4 +-
 .../op.sys_days.pass.cpp                      |  4 +-
 .../comparisons.pass.cpp                      | 26 ++++----
 .../streaming.pass.cpp                        | 58 ------------------
 .../comparisons.pass.cpp                      | 26 ++++----
 .../streaming.pass.cpp                        | 39 ------------
 54 files changed, 148 insertions(+), 858 deletions(-)
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp

diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index 8f57df3f9af4f..e1be3a6c114c1 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -108,6 +108,9 @@ API Changes
   as either ``for (auto&& c : path)`` or ``for (const auto& c : path)``.
   ``std::reverse_iterator`` is no longer rejected.
 
+- Removed the nonstandard default constructor from ``std::chrono::month_weekday``.
+  You must now explicitly initialize with a ``chrono::month`` and
+  ``chrono::weekday_indexed`` instead of "meh, whenever".
 
 ABI Changes
 -----------
diff --git a/libcxx/include/__chrono/calendar.h b/libcxx/include/__chrono/calendar.h
index 0854ca60d3dfb..745f7f5cf5290 100644
--- a/libcxx/include/__chrono/calendar.h
+++ b/libcxx/include/__chrono/calendar.h
@@ -540,7 +540,6 @@ class month_weekday {
     chrono::month __m;
     chrono::weekday_indexed __wdi;
 public:
-    month_weekday() = default;
     constexpr month_weekday(const chrono::month& __mval, const chrono::weekday_indexed& __wdival) noexcept
         : __m{__mval}, __wdi{__wdival} {}
     inline constexpr chrono::month                     month() const noexcept { return __m; }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp
index 252a1728da9de..fbae12e057883 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp
@@ -33,7 +33,7 @@ int main(int, char**)
     static_assert(testComparisons6Values(0U, 0U), "");
     static_assert(testComparisons6Values(0U, 1U), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons6Values( 5U,  5U), "");
     static_assert(testComparisons6Values( 5U, 10U), "");
 
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp
deleted file mode 100644
index d53c67c492d92..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-has-no-localization
-// XFAIL: *
-
-// 
-// class day;
-
-// template
-//   basic_ostream&
-//   operator<<(basic_ostream& os, const day& d);
-//
-//   Effects: Inserts format(fmt, d) where fmt is "%d" widened to charT.
-//                If !d.ok(), appends with " is not a valid day".
-//
-// template
-//   basic_ostream&
-//   to_stream(basic_ostream& os, const charT* fmt, const day& d);
-//
-//   Effects: Streams d into os using the format specified by the NTCTS fmt.
-//              fmt encoding follows the rules specified in 25.11.
-//
-// template>
-//   basic_istream&
-//   from_stream(basic_istream& is, const charT* fmt,
-//             day& d, basic_string* abbrev = nullptr,
-//             minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the day d using the format flags
-//             given in the NTCTS fmt as specified in 25.12.
-//             If the parse fails to decode a valid day, is.setstate(ios_base::failbit)
-//             shall be called and d shall not be modified.
-//             If %Z is used and successfully parsed, that value will be assigned to *abbrev
-//             if abbrev is non-null. If %z (or a modified variant) is used and
-//             successfully parsed, that value will be assigned to *offset if offset is non-null.
-//
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using day = std::chrono::day;
-   std::cout << day{1};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp
index 7f25b2d52bd2f..2bc7a5aadd16c 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp
@@ -48,7 +48,7 @@ int main(int, char**)
         assert(!(month_day{month{i}, day{32}}.ok()));
     }
 
-//  If the month is not ok, all the days are bad
+    //  If the month is not ok, all the days are bad
     for (unsigned i = 1; i <= 35; ++i)
         assert(!(month_day{month{13}, day{i}}.ok()));
 
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp
index 573ec1a9030b2..d64e0e194ceaa 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp
@@ -50,7 +50,7 @@ int main(int, char**)
         month_day{std::chrono::February, day{1}},
         false, true), "");
 
-//  same day, different months
+    //  same day, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -58,7 +58,7 @@ int main(int, char**)
                 month_day{month{j}, day{1}},
                 i == j, i < j )));
 
-//  same month, different days
+    //  same month, different days
     for (unsigned i = 1; i < 31; ++i)
         for (unsigned j = 1; j < 31; ++j)
             assert((testComparisons6(
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 84305e4a6d89d..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class month_day;
-
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const month_day& md);
-//
-//     Returns: os << md.month() << '/' << md.day().
-//
-// template
-//     basic_ostream&
-//     to_stream(basic_ostream& os, const charT* fmt, const month_day& md);
-//
-// Effects: Streams md into os using the format specified by the NTCTS fmt.
-//          fmt encoding follows the rules specified in 25.11.
-
-
-#include 
-#include 
-#include 
-#include 
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_day = std::chrono::month_day;
-    using month     = std::chrono::month;
-    using day       = std::chrono::day;
-    std::cout << month_day{month{1}, day{1}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp
index ebccc9f14ccba..a83b045beb4a3 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp
@@ -35,10 +35,10 @@ int main(int, char**)
     static_assert( testComparisons6Values(month{1}, month{1}), "");
     static_assert( testComparisons6Values(month{1}, month{2}), "");
 
-//  same day, different months
-    for (unsigned i = 1; i < 12; ++i)
-        for (unsigned j = 1; j < 12; ++j)
-            assert((testComparisons6Values(month{i}, month{j})));
+    // same day, different months
+    for (unsigned i = 1; i <= 12; ++i)
+        for (unsigned j = 1; j <= 12; ++j)
+            assert(testComparisons6Values(month{i}, month{j}));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp
index b9d6535890e3c..c6bec5d33a592 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp
@@ -36,7 +36,7 @@ int main(int, char**)
         assert( mdl.ok());
     }
 
-//  If the month is not ok, all the days are bad
+    //  If the month is not ok, all the days are bad
     for (unsigned i = 13; i <= 50; ++i)
     {
         month_day_last mdl{month{i}};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp
deleted file mode 100644
index 5960d0239f881..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class month_day_last;
-//
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const month_day_last& mdl);
-//
-//     Returns: os << mdl.month() << "/last".
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_day_last = std::chrono::month_day_last;
-    using month          = std::chrono::month;
-    std::cout << month_day_last{month{1}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp
index df709a5cb0432..41f31cd084de2 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp
@@ -36,12 +36,12 @@ int main(int, char**)
     static_assert(testComparisons6Values(0U ,0U), "");
     static_assert(testComparisons6Values(0U, 1U), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons6Values( 5U,  5U), "");
     static_assert(testComparisons6Values( 5U, 10U), "");
 
-    for (unsigned i = 1; i < 10; ++i)
-        for (unsigned j = 10; j < 10; ++j)
+    for (unsigned i = 1; i <= 12; ++i)
+        for (unsigned j = 1; j <= 12; ++j)
             assert(testComparisons6Values(i, j));
 
   return 0;
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp
index 0f1bf7f0e87a8..0fe2f68e0e8ea 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp
@@ -31,12 +31,13 @@ constexpr bool testConstexpr()
     {
     M m{5};
     Ms offset{3};
-    if (m - offset != M{2}) return false;
-    if (m - M{2} != offset) return false;
+    assert(m - offset == M{2});
+    assert(m - M{2} == offset);
     }
 
-//  Check the example
-    if (M{1} - M{2} != Ms{11}) return false;
+    //  Check the example
+    assert(M{1} - M{2} == Ms{11});
+
     return true;
 }
 
@@ -51,18 +52,18 @@ int main(int, char**)
     ASSERT_SAME_TYPE(month , decltype(std::declval() - std::declval()));
     ASSERT_SAME_TYPE(months, decltype(std::declval() - std::declval ()));
 
-static_assert(testConstexpr(), "");
+    static_assert(testConstexpr(), "");
 
     month m{6};
     for (unsigned i = 1; i <= 12; ++i)
     {
         month m1   = m - months{i};
-//      months off = m - month {i};
+        // months off = m - month {i};
         int exp = 6 - i;
         if (exp < 1)
             exp += 12;
         assert(static_cast(m1) == static_cast(exp));
-//          assert(off.count()               == static_cast(exp));
+        // assert(off.count()            == static_cast(exp));
     }
 
   return 0;
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp
index 538efd32d8633..c40aef055301f 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp
@@ -36,10 +36,10 @@ constexpr bool testConstexpr()
 {
     M m{1};
     Ms offset{4};
-    if (m + offset != M{5}) return false;
-    if (offset + m != M{5}) return false;
-//  Check the example
-    if (M{2} + Ms{11} != M{1}) return false;
+    assert(m + offset == M{5});
+    assert(offset + m == M{5});
+    //  Check the example
+    assert(M{2} + Ms{11} == M{1});
     return true;
 }
 
@@ -69,5 +69,5 @@ int main(int, char**)
         assert(static_cast(m2) == exp);
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 369fa6f4c2179..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class month;
-
-// template
-//   basic_ostream&
-//   operator<<(basic_ostream& os, const month& m);
-//
-//   Effects: If m.ok() == true inserts format(os.getloc(), fmt, m) where fmt is "%b" widened to charT.
-//   Otherwise inserts int{m} << " is not a valid month".
-//
-// template
-//   basic_ostream&
-//   to_stream(basic_ostream& os, const charT* fmt, const month& m);
-//
-//   Effects: Streams m into os using the format specified by the NTCTS fmt.
-//   fmt encoding follows the rules specified in 25.11.
-//
-// template>
-//   basic_istream&
-//   from_stream(basic_istream& is, const charT* fmt,
-//             month& m, basic_string* abbrev = nullptr,
-//             minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the month m using the format flags
-//   given in the NTCTS fmt as specified in 25.12. If the parse fails to decode a valid month,
-//   is.setstate(ios_- base::failbit) shall be called and m shall not be modified.
-//   If %Z is used and successfully parsed, that value will be assigned to *abbrev if
-//   abbrev is non-null. If %z (or a modified variant) is used and successfully parsed,
-//   that value will be assigned to *offset if offset is non-null.
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using month = std::chrono::month;
-   std::cout << month{1};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp
index daf959e424c69..f6e79f5a90d09 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp
@@ -31,8 +31,6 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                 std::declval().month());
     ASSERT_SAME_TYPE(month, decltype(std::declval().month()));
 
-    static_assert( month_weekday{}.month() == month{}, "");
-
     for (unsigned i = 1; i <= 50; ++i)
     {
         month_weekday md(month{i}, weekday_indexed{Sunday, 1});
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp
index f76797cbae511..a46ffabb9b71d 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp
@@ -44,9 +44,9 @@ int main(int, char**)
             assert(mwd.ok() == (j >= 1 && j <= 5));
         }
 
-//  If the month is not ok, all the weekday_indexed are bad
+    //  If the month is not ok, all the weekday_indexed are bad
     for (unsigned i = 1; i <= 10; ++i)
         assert(!(month_weekday{month{13}, weekday_indexed{Sunday, i}}.ok()));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 96555e36238d8..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class month_weekday;
-
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const month_weekday& mwd);
-//
-//     Returns: os << mwd.month() << '/' << mwd.weekday_indexed().
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_weekday   = std::chrono::month_weekday;
-    using month           = std::chrono::month;
-    using weekday_indexed = std::chrono::weekday_indexed;
-    using weekday         = std::chrono::weekday;
-
-    std::cout << month_weekday{month{1}, weekday_indexed{weekday{3}, 3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp
index af9c93d5820bc..69e58d3fc1963 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp
@@ -39,23 +39,23 @@ int main(int, char**)
 
     ASSERT_NOEXCEPT(month_weekday_last{January, weekday_last{Tuesday}});
 
-//  bad month
+    //  bad month
     constexpr month_weekday_last mwdl1{month{}, weekday_last{Tuesday}};
     static_assert( mwdl1.month() == month{},                      "");
     static_assert( mwdl1.weekday_last() == weekday_last{Tuesday}, "");
     static_assert(!mwdl1.ok(),                                    "");
 
-//  bad weekday_last
+    //  bad weekday_last
     constexpr month_weekday_last mwdl2{January, weekday_last{weekday{16}}};
     static_assert( mwdl2.month() == January,                          "");
     static_assert( mwdl2.weekday_last() == weekday_last{weekday{16}}, "");
     static_assert(!mwdl2.ok(),                                        "");
 
-//  Good month and weekday_last
+    //  Good month and weekday_last
     constexpr month_weekday_last mwdl3{January, weekday_last{weekday{4}}};
     static_assert( mwdl3.month() == January,                         "");
     static_assert( mwdl3.weekday_last() == weekday_last{weekday{4}}, "");
     static_assert( mwdl3.ok(),                                       "");
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp
index 38f85570650ef..91b5e4f63773e 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp
@@ -48,7 +48,7 @@ int main(int, char**)
         month_weekday_last{std::chrono::January, weekday_last{Wednesday}},
         false), "");
 
-//  vary the months
+    //  vary the months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons2(
@@ -56,7 +56,7 @@ int main(int, char**)
                 month_weekday_last{month{j}, weekday_last{Tuesday}},
             i == j)));
 
-//  vary the weekday
+    //  vary the weekday
     for (unsigned i = 0; i < 6; ++i)
         for (unsigned j = 0; j < 6; ++j)
             assert((testComparisons2(
@@ -64,11 +64,11 @@ int main(int, char**)
                 month_weekday_last{January, weekday_last{weekday{j}}},
             i == j)));
 
-//  both different
-        assert((testComparisons2(
-            month_weekday_last{month{1}, weekday_last{weekday{1}}},
-            month_weekday_last{month{2}, weekday_last{weekday{2}}},
-        false)));
+    //  both different
+    assert((testComparisons2(
+        month_weekday_last{month{1}, weekday_last{weekday{1}}},
+        month_weekday_last{month{2}, weekday_last{weekday{2}}},
+    false)));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 3e7eb9f38bd2e..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class month_weekday_last;
-//
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const month_weekday_last& mdl);
-//
-//     Returns: os << mdl.month() << "/last".
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_weekday_last = std::chrono::month_weekday_last;
-    using month              = std::chrono::month;
-    using weekday            = std::chrono::weekday;
-    using weekday_last       = std::chrono::weekday_last;
-
-    std::cout << month_weekday_last{month{1}, weekday_last{weekday{3}}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp
index ddc9cd52464d1..ae9d7e4fcab56 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp
@@ -34,11 +34,6 @@
 //     static_assert(mdl.month() == February);
 // --end example]
 
-
-
-
-
-
 #include 
 #include 
 #include 
@@ -57,7 +52,7 @@ int main(int, char**)
     ASSERT_SAME_TYPE(month_day_last, decltype(last/February));
     ASSERT_SAME_TYPE(month_day_last, decltype(February/last));
 
-//  Run the example
+    //  Run the example
     {
     constexpr auto mdl = February/std::chrono::last;
     static_assert(mdl.month() == February, "");
@@ -104,5 +99,5 @@ int main(int, char**)
     }
 
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp
index a4ed7bf539cea..3c96412bcefd1 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp
@@ -43,8 +43,8 @@ int main(int, char**)
         assert(!wdi.ok());
     }
 
-//  Not a valid weekday
+    //  Not a valid weekday
     assert(!(weekday_indexed(weekday{9U}, 1).ok()));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp
index 58520205c45a8..c56c2442c2aed 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp
@@ -36,7 +36,7 @@ int main(int, char**)
     static_assert(!(weekday_indexed{} == weekday_indexed{std::chrono::Tuesday, 1}), "");
     static_assert( (weekday_indexed{} != weekday_indexed{std::chrono::Tuesday, 1}), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert( (weekday_indexed{weekday{1}, 2} == weekday_indexed{weekday{1}, 2}), "");
     static_assert(!(weekday_indexed{weekday{1}, 2} != weekday_indexed{weekday{1}, 2}), "");
 
@@ -45,5 +45,5 @@ int main(int, char**)
     static_assert(!(weekday_indexed{weekday{1}, 2} == weekday_indexed{weekday{2}, 2}),  "");
     static_assert( (weekday_indexed{weekday{1}, 2} != weekday_indexed{weekday{2}, 2}),  "");
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp
deleted file mode 100644
index be16c8fa883ac..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class weekday_indexed;
-
-// template
-//   basic_ostream&
-//   operator<<(basic_ostream& os, const weekday_indexed& wdi);
-//
-//   Effects: os << wdi.weekday() << '[' << wdi.index().
-//     If wdi.index() is in the range [1, 5], appends with ']',
-//       otherwise appends with " is not a valid index]".
-
-
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using weekday_indexed = std::chrono::weekday_indexed;
-    using weekday         = std::chrono::weekday;
-
-    std::cout << weekday_indexed{weekday{3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp
index 03f4c3af8ecbc..bb02729552a09 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp
@@ -32,7 +32,7 @@ int main(int, char**)
     static_assert(testComparisons2Values(weekday{0}, weekday{0}), "");
     static_assert(testComparisons2Values(weekday{0}, weekday{1}), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons2Values(weekday{2}, weekday{2}), "");
     static_assert(testComparisons2Values(weekday{2}, weekday{3}), "");
 
@@ -40,5 +40,5 @@ int main(int, char**)
         for (unsigned j = 0; j < 6; ++j)
             assert(testComparisons2Values(weekday{i}, weekday{j}));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index a3941a5744bc5..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class weekday_last;
-
-//   template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const weekday_last& wdl);
-//
-//   Returns: os << wdl.weekday() << "[last]".
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using weekday_last = std::chrono::weekday_last;
-   using weekday      = std::chrono::weekday;
-
-   std::cout << weekday_last{weekday{3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp
index 6d7be0a4ead72..d59df23b563a8 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
         assert(m.c_encoding() == (i == 7 ? 0 : i));
     }
 
-// TODO - sys_days and local_days ctor tests
+    // TODO - sys_days and local_days ctor tests
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp
index afb6a81a9d5ef..0e56a3e0f79fe 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp
@@ -36,13 +36,13 @@ int main(int, char**)
 
     static_assert(testConstexpr(), "");
 
-//  This is different than all the other tests, because the '7' gets converted to
-//  a zero in the constructor, but then back to '7' by iso_encoding().
+    //  This is different than all the other tests, because the '7' gets converted to
+    //  a zero in the constructor, but then back to '7' by iso_encoding().
     for (unsigned i = 0; i <= 10; ++i)
     {
         weekday wd(i);
         assert(wd.iso_encoding() == (i == 0 ? 7 : i));
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp
index 213f490559c20..759a78b61a5ce 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp
@@ -31,7 +31,7 @@ int main(int, char**)
     static_assert(testComparisons2Values(0U ,0U), "");
     static_assert(testComparisons2Values(0U, 1U), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons2Values(5U, 5U), "");
     static_assert(testComparisons2Values(5U, 2U), "");
 
@@ -39,5 +39,5 @@ int main(int, char**)
         for (unsigned j = 0; j < 6; ++j)
             assert(testComparisons2Values(i, j));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
index 7bac92761e4e8..434662ba6c8a5 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
@@ -19,9 +19,6 @@
 // Otherwise the value returned is unspecified.
 // [Example: Sunday - Monday == days{6}. —end example]
 
-
-extern "C" int printf(const char *, ...);
-
 #include 
 #include 
 #include 
@@ -35,12 +32,13 @@ constexpr bool testConstexpr()
     {
     WD wd{5};
     Ds offset{3};
-    if (wd - offset != WD{2}) return false;
-    if (wd - WD{2} != offset) return false;
+    assert(wd - offset == WD{2});
+    assert(wd - WD{2} == offset);
     }
 
-//  Check the example
-    if (WD{0} - WD{1} != Ds{6}) return false;
+    //  Check the example
+    assert(WD{0} - WD{1} == Ds{6});
+
     return true;
 }
 
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp
index 7d98b471faa22..1adcee8f8ede2 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp
@@ -37,10 +37,10 @@ constexpr bool testConstexpr()
 {
     M m{1};
     Ms offset{4};
-    if (m + offset != M{5}) return false;
-    if (offset + m != M{5}) return false;
-//  Check the example
-    if (M{1} + Ms{6} != M{0}) return false;
+    assert(m + offset == M{5});
+    assert(offset + m == M{5});
+    //  Check the example
+    assert(M{1} + Ms{6} == M{0});
     return true;
 }
 
@@ -67,5 +67,5 @@ int main(int, char**)
             assert((wd2.c_encoding() == euclidian_addition(i, j)));
         }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 300d12f0fae1c..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class weekday;
-
-// template
-//   basic_ostream&
-//   operator<<(basic_ostream& os, const weekday& wd);
-//
-//   Effects: If wd.ok() == true inserts format(os.getloc(), fmt, wd) where fmt is "%a" widened to charT.
-//     Otherwise inserts unsigned{wd} << " is not a valid weekday".
-//
-// template
-//   basic_ostream&
-//   to_stream(basic_ostream& os, const charT* fmt, const weekday& wd);
-//
-//   Effects: Streams wd into os using the format specified by the NTCTS fmt.
-//   fmt encoding follows the rules specified in 25.11.
-//
-// template>
-//   basic_istream&
-//   from_stream(basic_istream& is, const charT* fmt,
-//             weekday& wd, basic_string* abbrev = nullptr,
-//             minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the weekday wd using
-//       the format flags given in the NTCTS fmt as specified in 25.12.
-//     If the parse fails to decode a valid weekday, is.setstate(ios_- base::failbit)
-//       shall be called and wd shall not be modified.
-//     If %Z is used and successfully parsed, that value will be assigned
-//       to *abbrev if abbrev is non-null.
-//     If %z (or a modified variant) is used and successfully parsed,
-//       that value will be assigned to *offset if offset is non-null.
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using weekday = std::chrono::weekday;
-
-   std::cout << weekday{3};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp
index 1e4e2a16babf5..9d92055e64ba7 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp
@@ -36,7 +36,7 @@ int main(int, char**)
     static_assert(testComparisons6Values(0,0), "");
     static_assert(testComparisons6Values(0,1), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons6Values( 5, 5), "");
     static_assert(testComparisons6Values( 5,10), "");
 
@@ -44,5 +44,5 @@ int main(int, char**)
         for (int j = 1; j < 10; ++j)
             assert(testComparisons6Values(i, j));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp
index 0681152c439ef..b38110772fc51 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp
@@ -19,8 +19,6 @@
 //   Otherwise the value returned is unspecified.
 //   [Example: January - February == years{11}. —end example]
 
-extern "C" int printf(const char *, ...);
-
 #include 
 #include 
 #include 
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 9ba502f63ea2a..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class year;
-
-// template
-//   basic_ostream&
-//   operator<<(basic_ostream& os, const year& y);
-//
-//   Effects: Inserts format(fmt, y) where fmt is "%Y" widened to charT.
-//   If !y.ok(), appends with " is not a valid year".
-//
-// template
-//   basic_ostream&
-//   to_stream(basic_ostream& os, const charT* fmt, const year& y);
-//
-//   Effects: Streams y into os using the format specified by the NTCTS fmt.
-//     fmt encoding follows the rules specified in 25.11.
-//
-// template>
-//   basic_istream&
-//   from_stream(basic_istream& is, const charT* fmt,
-//               year& y, basic_string* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the year y using the format flags
-//     given in the NTCTS fmt as specified in 25.12. If the parse fails to decode a valid year,
-//     is.setstate(ios_base::failbit) shall be called and y shall not be modified. If %Z is used
-//     and successfully parsed, that value will be assigned to *abbrev if abbrev is non-null.
-//     If %z (or a modified variant) is used and successfully parsed, that value will be
-//     assigned to *offset if offset is non-null.
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using year = std::chrono::year;
-
-   std::cout << year{2018};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp
index fce247d42d37f..031b512e4e3d1 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp
@@ -50,7 +50,7 @@ int main(int, char**)
         year_month{year{1235}, std::chrono::January},
         false, true), "");
 
-//  same year, different months
+    //  same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -58,13 +58,13 @@ int main(int, char**)
                 year_month{year{1234}, month{j}},
                 i == j, i < j )));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    //  same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons6(
             year_month{year{i}, std::chrono::January},
             year_month{year{j}, std::chrono::January},
             i == j, i < j )));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp
index 73cec3b7607f5..3c58ed0e14019 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp
@@ -39,7 +39,7 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                      std::declval() - std::declval());
     ASSERT_SAME_TYPE(year_month, decltype(std::declval() - std::declval()));
 
-//  static_assert(testConstexprYears (year_month{year{1}, month{1}}), "");
+    // static_assert(testConstexprYears (year_month{year{1}, month{1}}), "");
 
     year_month ym{year{1234}, std::chrono::January};
     for (int i = 0; i <= 10; ++i)
@@ -54,7 +54,7 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                      std::declval() - std::declval());
     ASSERT_SAME_TYPE(year_month, decltype(std::declval() - std::declval()));
 
-//  static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
+    // static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
 
     year_month ym{year{1234}, std::chrono::November};
     for (int i = 0; i <= 10; ++i)  // TODO test wrap-around
@@ -69,9 +69,9 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                  std::declval() - std::declval());
     ASSERT_SAME_TYPE(months, decltype(std::declval() - std::declval()));
 
-//  static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
+    // static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
 
-//  Same year
+    //  Same year
     year y{2345};
     for (int i = 1; i <= 12; ++i)
         for (int j = 1; j <= 12; ++j)
@@ -80,9 +80,9 @@ int main(int, char**)
         assert(diff.count() == i - j);
     }
 
-//  TODO: different year
+    // TODO: different year
 
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp
deleted file mode 100644
index d2aee69fe0ae0..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class year_month;
-
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const year_month& ym);
-//
-// Returns: os << ym.year() << '/' << ym.month().
-//
-//
-// template
-//     basic_ostream&
-//     to_stream(basic_ostream& os, const charT* fmt, const year_month& ym);
-//
-// Effects: Streams ym into os using the format specified by the NTCTS fmt. fmt encoding follows the rules specified in 25.11.
-//
-// template>
-//     basic_istream&
-//   from_stream(basic_istream& is, const charT* fmt,
-//               year_month& ym, basic_string* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-// Effects: Attempts to parse the input stream is into the year_month ym using the format
-//         flags given in the NTCTS fmt as specified in 25.12. If the parse fails to decode
-//         a valid year_month, is.setstate(ios_- base::failbit) shall be called and ym shall
-//         not be modified. If %Z is used and successfully parsed, that value will be assigned
-//         to *abbrev if abbrev is non-null. If %z (or a modified variant) is used and
-//         successfully parsed, that value will be assigned to *offset if offset is non-null.
-
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month = std::chrono::year_month;
-    using year       = std::chrono::year;
-    using month      = std::chrono::month;
-
-    std::cout << year_month{year{2018}, month{3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp
index 01311ce66b0ec..b0d963a58a4d9 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp
@@ -59,8 +59,8 @@ int main(int, char**)
     }
 
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    //  There's one more leap day between 1/1/40 and 1/1/70
+    //  when compared to 1/1/70 -> 1/1/2000
     {
     constexpr sys_days sd{days{-10957}};
     constexpr year_month_day ymd{sd};
@@ -81,5 +81,5 @@ int main(int, char**)
     assert( ymd.day()   == day{29});
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp
index 078cc8591cd7b..8d38a6f024dc8 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp
@@ -43,7 +43,7 @@ int main(int, char**)
 
     static_assert( year_month_day{year{2019},   January, day{1}}.ok(), ""); // All OK
 
-//  Some months have a 31st
+    // Some months have a 31st
     static_assert( year_month_day{year{2020},   month{ 1}, day{31}}.ok(), "");
     static_assert(!year_month_day{year{2020},   month{ 2}, day{31}}.ok(), "");
     static_assert( year_month_day{year{2020},   month{ 3}, day{31}}.ok(), "");
@@ -57,7 +57,7 @@ int main(int, char**)
     static_assert(!year_month_day{year{2020},   month{11}, day{31}}.ok(), "");
     static_assert( year_month_day{year{2020},   month{12}, day{31}}.ok(), "");
 
-//  Everyone except FEB has a 30th
+    // Everyone except FEB has a 30th
     static_assert( year_month_day{year{2020},   month{ 1}, day{30}}.ok(), "");
     static_assert(!year_month_day{year{2020},   month{ 2}, day{30}}.ok(), "");
     static_assert( year_month_day{year{2020},   month{ 3}, day{30}}.ok(), "");
@@ -93,5 +93,5 @@ int main(int, char**)
         assert( ym.ok() == year{i}.ok());
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp
index 76292f5da3fc3..54f6d84452f2c 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp
@@ -72,8 +72,8 @@ int main(int, char**)
     static_assert( year_month_day{sd} == ymd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_day ymd{year{1940}, month{1}, day{2}};
     constexpr local_days sd{ymd};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp
index bc801924d4c66..195ddfb72d053 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp
@@ -72,8 +72,8 @@ int main(int, char**)
     static_assert( year_month_day{sd} == ymd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_day ymd{year{1940}, month{1}, day{2}};
     constexpr sys_days sd{ymd};
@@ -90,7 +90,7 @@ int main(int, char**)
     assert( year_month_day{sd} == ymd); // and back
     }
 
-//  These two tests check the wording for LWG 3206
+    // These two tests check the wording for LWG 3206
     {
     constexpr year_month_day ymd{year{1971}, month{1}, day{0}}; // bad day
     static_assert(!ymd.ok(),         "");
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp
index 5290759783750..287dc3a33924f 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp
@@ -47,51 +47,51 @@ int main(int, char**)
         year_month_day{year{1234}, January, day{1}},
         true, false), "");
 
-//  different day
+    // different day
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{1}},
         year_month_day{year{1234}, January, day{2}},
         false, true), "");
 
-//  different month
+    // different month
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{1}},
         year_month_day{year{1234}, February, day{1}},
         false, true), "");
 
-//  different year
+    // different year
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{1}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
 
-//  different month and day
+    // different month and day
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{2}},
         year_month_day{year{1234}, February, day{1}},
         false, true), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons6(
         year_month_day{year{1234}, February, day{1}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
-//  different year and day
+    // different year and day
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{2}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
-//  different year, month and day
+    // different year, month and day
     static_assert( testComparisons6(
         year_month_day{year{1234}, February, day{2}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
 
-//  same year, different days
+    // same year, different days
     for (unsigned i = 1; i < 28; ++i)
         for (unsigned j = 1; j < 28; ++j)
             assert((testComparisons6(
@@ -99,7 +99,7 @@ int main(int, char**)
                 year_month_day{year{1234}, January, day{j}},
                 i == j, i < j )));
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -107,13 +107,13 @@ int main(int, char**)
                 year_month_day{year{1234}, month{j}, day{12}},
                 i == j, i < j )));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons6(
             year_month_day{year{i}, January, day{12}},
             year_month_day{year{j}, January, day{12}},
             i == j, i < j )));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 3991efad078af..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class year_month_day;
-
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const year_month_day& ym);
-//
-// Returns: os << ym.year() << '/' << ym.month().
-//
-//
-// template
-//     basic_ostream&
-//     to_stream(basic_ostream& os, const charT* fmt, const year_month_day& ym);
-//
-// Effects: Streams ym into os using the format specified by the NTCTS fmt. fmt encoding follows the rules specified in 25.11.
-//
-// template>
-//     basic_istream&
-//   from_stream(basic_istream& is, const charT* fmt,
-//               year_month_day& ym, basic_string* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-// Effects: Attempts to parse the input stream is into the year_month_day ym using the format
-//         flags given in the NTCTS fmt as specified in 25.12. If the parse fails to decode
-//         a valid year_month_day, is.setstate(ios_- base::failbit) shall be called and ym shall
-//         not be modified. If %Z is used and successfully parsed, that value will be assigned
-//         to *abbrev if abbrev is non-null. If %z (or a modified variant) is used and
-//         successfully parsed, that value will be assigned to *offset if offset is non-null.
-
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_day = std::chrono::year_month_day;
-    using year           = std::chrono::year;
-    using month          = std::chrono::month;
-    using day            = std::chrono::day;
-
-    std::cout << year_month_day{year{2018}, month{3}, day{12}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp
index 0dc3adab81d71..cf3c109fb1351 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp
@@ -30,7 +30,7 @@ int main(int, char**)
     ASSERT_NOEXCEPT(               std::declval().day());
     ASSERT_SAME_TYPE(day, decltype(std::declval().day()));
 
-//  Some months have a 31st
+    // Some months have a 31st
     static_assert( year_month_day_last{year{2020}, month_day_last{month{ 1}}}.day() == day{31}, "");
     static_assert( year_month_day_last{year{2020}, month_day_last{month{ 2}}}.day() == day{29}, "");
     static_assert( year_month_day_last{year{2020}, month_day_last{month{ 3}}}.day() == day{31}, "");
@@ -48,5 +48,5 @@ int main(int, char**)
     assert((year_month_day_last{year{2020}, month_day_last{month{ 2}}}.day() == day{29}));
     assert((year_month_day_last{year{2021}, month_day_last{month{ 2}}}.day() == day{28}));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp
index 4e4a59b9f220b..54b470750ec64 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp
@@ -44,31 +44,31 @@ int main(int, char**)
         year_month_day_last{year{1234}, month_day_last{January}},
         true, false), "");
 
-//  different month
+    // different month
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{January}},
         year_month_day_last{year{1234}, month_day_last{February}},
         false, true), "");
 
-//  different year
+    // different year
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{January}},
         year_month_day_last{year{1235}, month_day_last{January}},
         false, true), "");
 
-//  different month
+    // different month
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{January}},
         year_month_day_last{year{1234}, month_day_last{February}},
         false, true), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{February}},
         year_month_day_last{year{1235}, month_day_last{January}},
         false, true), "");
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -76,13 +76,13 @@ int main(int, char**)
                 year_month_day_last{year{1234}, month_day_last{month{j}}},
                 i == j, i < j )));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons6(
             year_month_day_last{year{i}, month_day_last{January}},
             year_month_day_last{year{j}, month_day_last{January}},
             i == j, i < j )));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp
index d8d58b0506023..59dca841e7f78 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp
@@ -75,7 +75,7 @@ int main(int, char**)
     ASSERT_SAME_TYPE(year_month_day_last, decltype(std::declval() - std::declval()));
 
     static_assert(testConstexprMonths(year_month_day_last{year{1234}, month_day_last{December}}), "");
-//  TODO test wrapping
+    // TODO test wrapping
     year_month_day_last ym{year{1234}, month_day_last{December}};
     for (unsigned i = 0; i <= 10; ++i)
     {
@@ -86,5 +86,5 @@ int main(int, char**)
     }
 
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 1476fc824bb96..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class year_month_day_last;
-
-// template
-//   basic_ostream&
-//   operator<<(basic_ostream& os, const year_month_day_last& ymdl);
-//
-// Returns: os << ymdl.year() << '/' << ymdl.month_day_last().
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_day_last = std::chrono::year_month_day_last;
-    using year                = std::chrono::year;
-    using month               = std::chrono::month;
-    using month_day_last      = std::chrono::month_day_last;
-
-    std::cout << year_month_day_last{year{2018}, month_day_last{month{3}}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp
index ea25715211f01..51d1942d3905b 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp
@@ -52,8 +52,8 @@ int main(int, char**)
     static_assert( year_month_weekday{sd} == ymwd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_weekday ymwd{year{1940}, month{1},weekday_indexed{std::chrono::Tuesday, 1}};
     constexpr local_days sd{ymwd};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp
index 52ca0fc061375..9bfa0472f7bce 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp
@@ -52,8 +52,8 @@ int main(int, char**)
     static_assert( year_month_weekday{sd} == ymwd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_weekday ymwd{year{1940}, month{1},weekday_indexed{std::chrono::Tuesday, 1}};
     constexpr sys_days sd{ymwd};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp
index 438fb48bd9d13..be0a84cae535e 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp
@@ -42,51 +42,51 @@ int main(int, char**)
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 1}},
         true), "");
 
-//  different day
+    // different day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 2}},
         false), "");
 
-//  different month
+    // different month
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January,  weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 1}},
         false), "");
 
-//  different year
+    // different year
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1235}, January, weekday_indexed{Tuesday, 1}},
         false), "");
 
 
-//  different month and day
+    // different month and day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January,  weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 2}},
         false), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1235}, January,  weekday_indexed{Tuesday, 1}},
         false), "");
 
-//  different year and day
+    // different year and day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 2}},
         year_month_weekday{year{1235}, January, weekday_indexed{Tuesday, 1}},
         false), "");
 
-//  different year, month and day
+    // different year, month and day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 2}},
         year_month_weekday{year{1235}, January,  weekday_indexed{Tuesday, 1}},
         false), "");
 
 
-//  same year, different days
+    // same year, different days
     for (unsigned i = 1; i < 28; ++i)
         for (unsigned j = 1; j < 28; ++j)
             assert((testComparisons2(
@@ -94,7 +94,7 @@ int main(int, char**)
                 year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, j}},
                 i == j)));
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons2(
@@ -102,13 +102,13 @@ int main(int, char**)
                 year_month_weekday{year{1234}, month{j}, weekday_indexed{Tuesday, 1}},
                 i == j)));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons2(
             year_month_weekday{year{i}, January, weekday_indexed{Tuesday, 1}},
             year_month_weekday{year{j}, January, weekday_indexed{Tuesday, 1}},
             i == j)));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 21660825d0e0b..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class year_month_weekday;
-
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const year_month_weekday& ym);
-//
-// Returns: os << ym.year() << '/' << ym.month().
-//
-//
-// template
-//     basic_ostream&
-//     to_stream(basic_ostream& os, const charT* fmt, const year_month_weekday& ym);
-//
-// Effects: Streams ym into os using the format specified by the NTCTS fmt. fmt encoding follows the rules specified in 25.11.
-//
-// template>
-//     basic_istream&
-//   from_stream(basic_istream& is, const charT* fmt,
-//               year_month_weekday& ym, basic_string* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-// Effects: Attempts to parse the input stream is into the year_month_weekday ym using the format
-//         flags given in the NTCTS fmt as specified in 25.12. If the parse fails to decode
-//         a valid year_month_weekday, is.setstate(ios_- base::failbit) shall be called and ym shall
-//         not be modified. If %Z is used and successfully parsed, that value will be assigned
-//         to *abbrev if abbrev is non-null. If %z (or a modified variant) is used and
-//         successfully parsed, that value will be assigned to *offset if offset is non-null.
-
-
-
-#include 
-#include 
-#include 
-#include 
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_weekday = std::chrono::year_month_weekday;
-    using year                = std::chrono::year;
-    using month               = std::chrono::month;
-    using weekday             = std::chrono::weekday;
-
-    std::cout << year_month_weekday{year{2018}, month{3}, weekday{4}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp
index 2e9dbad81b82f..5c008c76b21d2 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp
@@ -43,51 +43,51 @@ int main(int, char**)
         year_month_weekday_last{year{1234}, January, weekday_last{Tuesday}},
         true), "");
 
-//  different day
+    // different day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January, weekday_last{Tuesday}},
         year_month_weekday_last{year{1234}, January, weekday_last{Wednesday}},
         false), "");
 
-//  different month
+    // different month
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January,  weekday_last{Tuesday}},
         year_month_weekday_last{year{1234}, February, weekday_last{Tuesday}},
         false), "");
 
-//  different year
+    // different year
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January, weekday_last{Tuesday}},
         year_month_weekday_last{year{1235}, January, weekday_last{Tuesday}},
         false), "");
 
 
-//  different month and day
+    // different month and day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January,  weekday_last{Tuesday}},
         year_month_weekday_last{year{1234}, February, weekday_last{Wednesday}},
         false), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, February, weekday_last{Tuesday}},
         year_month_weekday_last{year{1235}, January,  weekday_last{Tuesday}},
         false), "");
 
-//  different year and day
+    // different year and day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January, weekday_last{Wednesday}},
         year_month_weekday_last{year{1235}, January, weekday_last{Tuesday}},
         false), "");
 
-//  different year, month and day
+    // different year, month and day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, February, weekday_last{Wednesday}},
         year_month_weekday_last{year{1235}, January,  weekday_last{Tuesday}},
         false), "");
 
 
-//  same year, different days
+    // same year, different days
     for (unsigned i = 1; i < 28; ++i)
         for (unsigned j = 1; j < 28; ++j)
             assert((testComparisons2(
@@ -95,7 +95,7 @@ int main(int, char**)
                 year_month_weekday_last{year{1234}, January, weekday_last{weekday{j}}},
                 i == j)));
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons2(
@@ -103,13 +103,13 @@ int main(int, char**)
                 year_month_weekday_last{year{1234}, month{j}, weekday_last{Tuesday}},
                 i == j)));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons2(
             year_month_weekday_last{year{i}, January, weekday_last{Tuesday}},
             year_month_weekday_last{year{j}, January, weekday_last{Tuesday}},
             i == j)));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 3f792fd1bd81c..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// 
-// class year_month_weekday_last;
-
-// template
-//     basic_ostream&
-//     operator<<(basic_ostream& os, const year_month_weekday_last& ymwdl);
-//
-//   Returns: os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last().
-
-
-#include 
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_weekday_last = std::chrono::year_month_weekday_last;
-    using year                    = std::chrono::year;
-    using month                   = std::chrono::month;
-    using weekday                 = std::chrono::weekday;
-    using weekday_last            = std::chrono::weekday_last;
-
-    std::cout << year_month_weekday_last{year{2018}, month{3}, weekday_last{weekday{4}}};
-
-  return 0;
-}

From 57ebfea38c03e5cd2d0677eabd2abf761b336097 Mon Sep 17 00:00:00 2001
From: Pavel Labath 
Date: Thu, 20 Jan 2022 20:36:14 +0100
Subject: [PATCH 064/946] [lldb] Surround LLDB_API-defining code with #ifndef
 LLDB_API

This enables power-users to annotate lldb api functions with arbitrary
attributes. The motivation for this is being able to build liblldb as a
static library on windows (see discussion on D117564).

This should not be interpreted to mean that building liblldb is
supported in any way, but this does not cause any problems for us, and
can help users who really know what they are doing (or have no other
choice).
---
 lldb/include/lldb/API/SBDefines.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h
index d0ee09368d029..ecf1dc34d8c58 100644
--- a/lldb/include/lldb/API/SBDefines.h
+++ b/lldb/include/lldb/API/SBDefines.h
@@ -15,6 +15,7 @@
 #include "lldb/lldb-types.h"
 #include "lldb/lldb-versioning.h"
 
+#ifndef LLDB_API
 #if defined(_WIN32)
 #if defined(LLDB_IN_LIBLLDB)
 #define LLDB_API __declspec(dllexport)
@@ -24,6 +25,7 @@
 #else // defined (_WIN32)
 #define LLDB_API
 #endif
+#endif
 
 // Forward Declarations
 namespace lldb {

From 83d59e05b201760e3f364ff6316301d347cbad95 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea 
Date: Thu, 20 Jan 2022 14:53:18 -0500
Subject: [PATCH 065/946] Re-land [LLD] Remove global state in lldCommon

Move all variables at file-scope or function-static-scope into a hosting structure (lld::CommonLinkerContext) that lives at lldMain()-scope. Drivers will inherit from this structure and add their own global state, in the same way as for the existing COFFLinkerContext.

See discussion in https://lists.llvm.org/pipermail/llvm-dev/2021-June/151184.html

The previous land f860fe362282ed69b9d4503a20e5d20b9a041189 caused issues in https://lab.llvm.org/buildbot/#/builders/123/builds/8383, fixed by 22ee510dac9440a74b2e5b3fe3ff13ccdbf55af3.

Differential Revision: https://reviews.llvm.org/D108850
---
 clang/lib/Driver/ToolChains/MSVC.cpp          |  7 ++
 lld/COFF/COFFLinkerContext.h                  |  3 +-
 lld/COFF/Chunks.cpp                           |  3 +-
 lld/COFF/DLL.cpp                              |  4 +-
 lld/COFF/Driver.cpp                           | 70 +++++++------------
 lld/COFF/DriverUtils.cpp                      | 24 +++----
 lld/COFF/InputFiles.cpp                       | 26 +++----
 lld/COFF/LTO.cpp                              |  6 +-
 lld/COFF/MinGW.cpp                            |  7 +-
 lld/COFF/PDB.cpp                              | 13 ++--
 lld/COFF/SymbolTable.cpp                      |  2 +-
 lld/COFF/Writer.cpp                           |  2 +-
 lld/Common/CMakeLists.txt                     |  1 +
 lld/Common/CommonLinkerContext.cpp            | 45 ++++++++++++
 lld/Common/ErrorHandler.cpp                   | 69 +++++++++++-------
 lld/Common/Memory.cpp                         | 19 ++---
 lld/Common/TargetOptionsCommandFlags.cpp      |  3 -
 lld/ELF/AArch64ErrataFix.cpp                  |  8 +--
 lld/ELF/ARMErrataFix.cpp                      |  6 +-
 lld/ELF/Arch/PPC64.cpp                        |  5 +-
 lld/ELF/Driver.cpp                            | 42 ++++-------
 lld/ELF/DriverUtils.cpp                       |  7 +-
 lld/ELF/InputFiles.cpp                        | 28 ++++----
 lld/ELF/InputSection.cpp                      |  7 +-
 lld/ELF/LinkerScript.cpp                      |  6 +-
 lld/ELF/MarkLive.cpp                          |  6 +-
 lld/ELF/ScriptParser.cpp                      |  8 +--
 lld/ELF/SyntheticSections.cpp                 |  7 +-
 lld/ELF/Thunks.cpp                            | 57 +++++++--------
 lld/ELF/Writer.cpp                            |  6 +-
 lld/MachO/ConcatOutputSection.cpp             |  7 +-
 lld/MachO/Driver.cpp                          | 44 +++++-------
 lld/MachO/DriverUtils.cpp                     | 11 ++-
 lld/MachO/InputFiles.cpp                      | 32 +++++----
 lld/MachO/LTO.cpp                             |  4 +-
 lld/MachO/SyntheticSections.cpp               |  7 +-
 lld/MachO/Writer.cpp                          | 13 ++--
 lld/MinGW/Driver.cpp                          | 19 ++---
 lld/include/lld/Common/CommonLinkerContext.h  | 65 +++++++++++++++++
 lld/include/lld/Common/Driver.h               | 21 +++---
 lld/include/lld/Common/ErrorHandler.h         | 32 ++++++---
 lld/include/lld/Common/Memory.h               | 43 ++++++------
 lld/include/lld/Core/LinkingContext.h         |  3 +-
 lld/tools/lld/lld.cpp                         | 54 +++++++++-----
 lld/wasm/Driver.cpp                           | 33 ++++-----
 lld/wasm/InputFiles.cpp                       | 11 ++-
 lld/wasm/SymbolTable.cpp                      |  8 +--
 lld/wasm/Writer.cpp                           | 11 ++-
 .../llvm/DebugInfo/PDB/DIA/DIASupport.h       |  7 ++
 49 files changed, 522 insertions(+), 400 deletions(-)
 create mode 100644 lld/Common/CommonLinkerContext.cpp
 create mode 100644 lld/include/lld/Common/CommonLinkerContext.h

diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 7e8636adc2728..18cef288f018a 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -47,7 +47,14 @@
 // Make sure this comes before MSVCSetupApi.h
 #include 
 
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#endif
 #include "MSVCSetupApi.h"
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
 #include "llvm/Support/COM.h"
 _COM_SMARTPTR_TYPEDEF(ISetupConfiguration, __uuidof(ISetupConfiguration));
 _COM_SMARTPTR_TYPEDEF(ISetupConfiguration2, __uuidof(ISetupConfiguration2));
diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h
index e5223da86ef83..a3a6f94a94137 100644
--- a/lld/COFF/COFFLinkerContext.h
+++ b/lld/COFF/COFFLinkerContext.h
@@ -15,12 +15,13 @@
 #include "InputFiles.h"
 #include "SymbolTable.h"
 #include "Writer.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Timer.h"
 
 namespace lld {
 namespace coff {
 
-class COFFLinkerContext {
+class COFFLinkerContext : public CommonLinkerContext {
 public:
   COFFLinkerContext();
   COFFLinkerContext(const COFFLinkerContext &) = delete;
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 54cb8c99071de..6cabb22d98cf2 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -12,7 +12,6 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "Writer.h"
-#include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/COFF.h"
@@ -430,7 +429,7 @@ void SectionChunk::sortRelocations() {
     return;
   warn("some relocations in " + file->getName() + " are not sorted");
   MutableArrayRef newRelocs(
-      bAlloc.Allocate(relocsSize), relocsSize);
+      bAlloc().Allocate(relocsSize), relocsSize);
   memcpy(newRelocs.data(), relocsData, relocsSize * sizeof(coff_relocation));
   llvm::sort(newRelocs, cmpByVa);
   setRelocs(newRelocs);
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 6fec9df5617db..bfa2a6910e2b7 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -659,14 +659,14 @@ void DelayLoadContents::create(COFFLinkerContext &ctx, Defined *h) {
         // Add a syntentic symbol for this load thunk, using the "__imp_load"
         // prefix, in case this thunk needs to be added to the list of valid
         // call targets for Control Flow Guard.
-        StringRef symName = saver.save("__imp_load_" + extName);
+        StringRef symName = saver().save("__imp_load_" + extName);
         s->loadThunkSym =
             cast(ctx.symtab.addSynthetic(symName, t));
       }
     }
     thunks.push_back(tm);
     StringRef tmName =
-        saver.save("__tailMerge_" + syms[0]->getDLLName().lower());
+        saver().save("__tailMerge_" + syms[0]->getDLLName().lower());
     ctx.symtab.addSynthetic(tmName, tm);
     // Terminate with null values.
     addresses.push_back(make(8));
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 3076871fd98b5..1546291e16c6b 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -19,9 +19,7 @@
 #include "Writer.h"
 #include "lld/Common/Args.h"
 #include "lld/Common/Driver.h"
-#include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Filesystem.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Timer.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/Optional.h"
@@ -63,36 +61,22 @@ namespace coff {
 std::unique_ptr config;
 std::unique_ptr driver;
 
-bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS,
-          raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
+bool link(ArrayRef args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new COFFLinkerContext;
 
-  errorHandler().cleanupCallback = []() {
-    freeArena();
-  };
-
-  errorHandler().logName = args::getFilenameWithoutExe(args[0]);
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now"
-      " (use /errorlimit:0 to see all errors)";
-  errorHandler().exitEarly = canExitEarly;
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.logName = args::getFilenameWithoutExe(args[0]);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now"
+                                 " (use /errorlimit:0 to see all errors)";
 
-  COFFLinkerContext ctx;
   config = std::make_unique();
-  driver = std::make_unique(ctx);
+  driver = std::make_unique(*ctx);
 
   driver->linkerMain(args);
 
-  // Call exit() if we can to avoid calling destructors.
-  if (canExitEarly)
-    exitLld(errorCount() ? 1 : 0);
-
-  bool ret = errorCount() == 0;
-  if (!canExitEarly)
-    errorHandler().reset();
-  return ret;
+  return errorCount() == 0;
 }
 
 // Parse options of the form "old;new".
@@ -162,7 +146,7 @@ static std::future createFutureForFile(std::string path) {
 static StringRef mangle(StringRef sym) {
   assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN);
   if (config->machine == I386)
-    return saver.save("_" + sym);
+    return saver().save("_" + sym);
   return sym;
 }
 
@@ -358,9 +342,9 @@ void LinkerDriver::parseDirectives(InputFile *file) {
     Export exp = parseExport(e);
     if (config->machine == I386 && config->mingw) {
       if (!isDecorated(exp.name))
-        exp.name = saver.save("_" + exp.name);
+        exp.name = saver().save("_" + exp.name);
       if (!exp.extName.empty() && !isDecorated(exp.extName))
-        exp.extName = saver.save("_" + exp.extName);
+        exp.extName = saver().save("_" + exp.extName);
     }
     exp.directives = true;
     config->exports.push_back(exp);
@@ -442,11 +426,11 @@ StringRef LinkerDriver::doFindFile(StringRef filename) {
     SmallString<128> path = dir;
     sys::path::append(path, filename);
     if (sys::fs::exists(path.str()))
-      return saver.save(path.str());
+      return saver().save(path.str());
     if (!hasExt) {
       path.append(".obj");
       if (sys::fs::exists(path.str()))
-        return saver.save(path.str());
+        return saver().save(path.str());
     }
   }
   return filename;
@@ -483,7 +467,7 @@ StringRef LinkerDriver::doFindLibMinGW(StringRef filename) {
 
   SmallString<128> s = filename;
   sys::path::replace_extension(s, ".a");
-  StringRef libName = saver.save("lib" + s.str());
+  StringRef libName = saver().save("lib" + s.str());
   return doFindFile(libName);
 }
 
@@ -492,7 +476,7 @@ StringRef LinkerDriver::doFindLib(StringRef filename) {
   // Add ".lib" to Filename if that has no file extension.
   bool hasExt = filename.contains('.');
   if (!hasExt)
-    filename = saver.save(filename + ".lib");
+    filename = saver().save(filename + ".lib");
   StringRef ret = doFindFile(filename);
   // For MinGW, if the find above didn't turn up anything, try
   // looking for a MinGW formatted library name.
@@ -525,7 +509,7 @@ void LinkerDriver::addLibSearchPaths() {
   Optional envOpt = Process::GetEnv("LIB");
   if (!envOpt.hasValue())
     return;
-  StringRef env = saver.save(*envOpt);
+  StringRef env = saver().save(*envOpt);
   while (!env.empty()) {
     StringRef path;
     std::tie(path, env) = env.split(';');
@@ -873,8 +857,8 @@ static void parseModuleDefs(StringRef path) {
   driver->takeBuffer(std::move(mb));
 
   if (config->outputFile.empty())
-    config->outputFile = std::string(saver.save(m.OutputFile));
-  config->importName = std::string(saver.save(m.ImportName));
+    config->outputFile = std::string(saver().save(m.OutputFile));
+  config->importName = std::string(saver().save(m.ImportName));
   if (m.ImageBase)
     config->imageBase = m.ImageBase;
   if (m.StackReserve)
@@ -902,14 +886,14 @@ static void parseModuleDefs(StringRef path) {
     // DLL instead. This is supported by both MS and GNU linkers.
     if (!e1.ExtName.empty() && e1.ExtName != e1.Name &&
         StringRef(e1.Name).contains('.')) {
-      e2.name = saver.save(e1.ExtName);
-      e2.forwardTo = saver.save(e1.Name);
+      e2.name = saver().save(e1.ExtName);
+      e2.forwardTo = saver().save(e1.Name);
       config->exports.push_back(e2);
       continue;
     }
-    e2.name = saver.save(e1.Name);
-    e2.extName = saver.save(e1.ExtName);
-    e2.aliasTarget = saver.save(e1.AliasTarget);
+    e2.name = saver().save(e1.Name);
+    e2.extName = saver().save(e1.ExtName);
+    e2.aliasTarget = saver().save(e1.AliasTarget);
     e2.ordinal = e1.Ordinal;
     e2.noname = e1.Noname;
     e2.data = e1.Data;
@@ -1906,9 +1890,9 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) {
     Export e = parseExport(arg->getValue());
     if (config->machine == I386) {
       if (!isDecorated(e.name))
-        e.name = saver.save("_" + e.name);
+        e.name = saver().save("_" + e.name);
       if (!e.extName.empty() && !isDecorated(e.extName))
-        e.extName = saver.save("_" + e.extName);
+        e.extName = saver().save("_" + e.extName);
     }
     config->exports.push_back(e);
   }
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index 0921c8e27f5ae..ac0f1f972c798 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -48,17 +48,17 @@ const uint16_t RT_MANIFEST = 24;
 
 class Executor {
 public:
-  explicit Executor(StringRef s) : prog(saver.save(s)) {}
-  void add(StringRef s) { args.push_back(saver.save(s)); }
-  void add(std::string &s) { args.push_back(saver.save(s)); }
-  void add(Twine s) { args.push_back(saver.save(s)); }
-  void add(const char *s) { args.push_back(saver.save(s)); }
+  explicit Executor(StringRef s) : prog(saver().save(s)) {}
+  void add(StringRef s) { args.push_back(saver().save(s)); }
+  void add(std::string &s) { args.push_back(saver().save(s)); }
+  void add(Twine s) { args.push_back(saver().save(s)); }
+  void add(const char *s) { args.push_back(saver().save(s)); }
 
   void run() {
     ErrorOr exeOrErr = sys::findProgramByName(prog);
     if (auto ec = exeOrErr.getError())
       fatal("unable to find " + prog + " in PATH: " + ec.message());
-    StringRef exe = saver.save(*exeOrErr);
+    StringRef exe = saver().save(*exeOrErr);
     args.insert(args.begin(), exe);
 
     if (sys::ExecuteAndWait(args[0], args) != 0)
@@ -636,14 +636,14 @@ static StringRef killAt(StringRef sym, bool prefix) {
   sym = sym.substr(0, sym.find('@', 1));
   if (!sym.startswith("@")) {
     if (prefix && !sym.startswith("_"))
-      return saver.save("_" + sym);
+      return saver().save("_" + sym);
     return sym;
   }
   // For fastcall, remove the leading @ and replace it with an
   // underscore, if prefixes are used.
   sym = sym.substr(1);
   if (prefix)
-    sym = saver.save("_" + sym);
+    sym = saver().save("_" + sym);
   return sym;
 }
 
@@ -854,7 +854,7 @@ opt::InputArgList ArgParser::parse(ArrayRef argv) {
                                               argv.data() + argv.size());
   if (!args.hasArg(OPT_lldignoreenv))
     addLINK(expandedArgv);
-  cl::ExpandResponseFiles(saver, getQuotingStyle(args), expandedArgv);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(args), expandedArgv);
   args = optTable.ParseArgs(makeArrayRef(expandedArgv).drop_front(),
                             missingIndex, missingCount);
 
@@ -901,7 +901,7 @@ ParsedDirectives ArgParser::parseDirectives(StringRef s) {
   // Handle /EXPORT and /INCLUDE in a fast path. These directives can appear for
   // potentially every symbol in the object, so they must be handled quickly.
   SmallVector tokens;
-  cl::TokenizeWindowsCommandLineNoCopy(s, saver, tokens);
+  cl::TokenizeWindowsCommandLineNoCopy(s, saver(), tokens);
   for (StringRef tok : tokens) {
     if (tok.startswith_insensitive("/export:") ||
         tok.startswith_insensitive("-export:"))
@@ -914,7 +914,7 @@ ParsedDirectives ArgParser::parseDirectives(StringRef s) {
       // already copied quoted arguments for us, so those do not need to be
       // copied again.
       bool HasNul = tok.end() != s.end() && tok.data()[tok.size()] == '\0';
-      rest.push_back(HasNul ? tok.data() : saver.save(tok).data());
+      rest.push_back(HasNul ? tok.data() : saver().save(tok).data());
     }
   }
 
@@ -948,7 +948,7 @@ void ArgParser::addLINK(SmallVector &argv) {
 
 std::vector ArgParser::tokenize(StringRef s) {
   SmallVector tokens;
-  cl::TokenizeWindowsCommandLine(s, saver, tokens);
+  cl::TokenizeWindowsCommandLine(s, saver(), tokens);
   return std::vector(tokens.begin(), tokens.end());
 }
 
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 1d79d97acad26..0f3f5e0ffe7c5 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -15,8 +15,6 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "llvm-c/lto.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
@@ -905,7 +903,7 @@ ObjFile::getVariableLocation(StringRef var) {
   Optional> ret = dwarf->getVariableLoc(var);
   if (!ret)
     return None;
-  return std::make_pair(saver.save(ret->first), ret->second);
+  return std::make_pair(saver().save(ret->first), ret->second);
 }
 
 // Used only for DWARF debug info, which is not common (except in MinGW
@@ -940,8 +938,8 @@ void ImportFile::parse() {
     fatal("broken import library");
 
   // Read names and create an __imp_ symbol.
-  StringRef name = saver.save(StringRef(buf + sizeof(*hdr)));
-  StringRef impName = saver.save("__imp_" + name);
+  StringRef name = saver().save(StringRef(buf + sizeof(*hdr)));
+  StringRef impName = saver().save("__imp_" + name);
   const char *nameStart = buf + sizeof(coff_import_header) + name.size() + 1;
   dllName = std::string(StringRef(nameStart));
   StringRef extName;
@@ -995,11 +993,12 @@ BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
   // into consideration at LTO time (which very likely causes undefined
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
-  MemoryBufferRef mbref(
-      mb.getBuffer(),
-      saver.save(archiveName.empty() ? path
-                                     : archiveName + sys::path::filename(path) +
-                                           utostr(offsetInArchive)));
+  MemoryBufferRef mbref(mb.getBuffer(),
+                        saver().save(archiveName.empty()
+                                         ? path
+                                         : archiveName +
+                                               sys::path::filename(path) +
+                                               utostr(offsetInArchive)));
 
   obj = check(lto::InputFile::create(mbref));
 }
@@ -1035,6 +1034,7 @@ FakeSectionChunk ltoDataSectionChunk(<oDataSection.section);
 } // namespace
 
 void BitcodeFile::parse() {
+  llvm::StringSaver &saver = lld::saver();
   std::vector> comdat(obj->getComdatTable().size());
   for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
     // FIXME: Check nodeduplicate
@@ -1156,11 +1156,11 @@ void DLLFile::parse() {
     s->nameType = ImportNameType::IMPORT_NAME;
 
     if (coffObj->getMachine() == I386) {
-      s->symbolName = symbolName = saver.save("_" + symbolName);
+      s->symbolName = symbolName = saver().save("_" + symbolName);
       s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
     }
 
-    StringRef impName = saver.save("__imp_" + symbolName);
+    StringRef impName = saver().save("__imp_" + symbolName);
     ctx.symtab.addLazyDLLSymbol(this, s, impName);
     if (code)
       ctx.symtab.addLazyDLLSymbol(this, s, symbolName);
@@ -1179,7 +1179,7 @@ void DLLFile::makeImport(DLLFile::Symbol *s) {
 
   size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
   size_t size = sizeof(coff_import_header) + impSize;
-  char *buf = bAlloc.Allocate(size);
+  char *buf = bAlloc().Allocate(size);
   memset(buf, 0, size);
   char *p = buf;
   auto *imp = reinterpret_cast(p);
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index f117b62192c84..2dbe7b146402e 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -11,7 +11,7 @@
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "lld/Common/Args.h"
-#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
 #include "llvm/ADT/STLExtras.h"
@@ -209,8 +209,8 @@ std::vector BitcodeCompiler::compile(COFFLinkerContext &ctx) {
     // - foo.exe.lto.1.obj
     // - ...
     StringRef ltoObjName =
-        saver.save(Twine(config->outputFile) + ".lto" +
-                   (i == 0 ? Twine("") : Twine('.') + Twine(i)) + ".obj");
+        saver().save(Twine(config->outputFile) + ".lto" +
+                     (i == 0 ? Twine("") : Twine('.') + Twine(i)) + ".obj");
 
     // Get the native object contents either from the cache or from memory.  Do
     // not use the cached MemoryBuffer directly, or the PDB will not be
diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp
index 148ebe5eea66d..7a3a3853572f3 100644
--- a/lld/COFF/MinGW.cpp
+++ b/lld/COFF/MinGW.cpp
@@ -11,7 +11,6 @@
 #include "Driver.h"
 #include "InputFiles.h"
 #include "SymbolTable.h"
-#include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/Object/COFF.h"
@@ -184,8 +183,8 @@ void lld::coff::writeDefFile(StringRef name) {
 static StringRef mangle(Twine sym) {
   assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN);
   if (config->machine == I386)
-    return saver.save("_" + sym);
-  return saver.save(sym);
+    return saver().save("_" + sym);
+  return saver().save(sym);
 }
 
 // Handles -wrap option.
@@ -249,7 +248,7 @@ void lld::coff::wrapSymbols(COFFLinkerContext &ctx,
       // referenced it or not, though.)
       if (imp) {
         DefinedLocalImport *wrapimp = make(
-            saver.save("__imp_" + w.wrap->getName()), d);
+            saver().save("__imp_" + w.wrap->getName()), d);
         ctx.symtab.localImportChunks.push_back(wrapimp->getChunk());
         map[imp] = wrapimp;
       }
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index a4cef1d0df3b2..dea84eca5b121 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -16,7 +16,6 @@
 #include "Symbols.h"
 #include "TypeMerger.h"
 #include "Writer.h"
-#include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Timer.h"
 #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
@@ -75,7 +74,7 @@ class PDBLinker {
 
 public:
   PDBLinker(COFFLinkerContext &ctx)
-      : builder(bAlloc), tMerger(ctx, bAlloc), ctx(ctx) {
+      : builder(bAlloc()), tMerger(ctx, bAlloc()), ctx(ctx) {
     // This isn't strictly necessary, but link.exe usually puts an empty string
     // as the first "valid" string in the string table, so we do the same in
     // order to maintain as much byte-for-byte compatibility as possible.
@@ -501,7 +500,7 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
   case SymbolKind::S_LPROCREF: {
     // sym is a temporary object, so we have to copy and reallocate the record
     // to stabilize it.
-    uint8_t *mem = bAlloc.Allocate(sym.length());
+    uint8_t *mem = bAlloc().Allocate(sym.length());
     memcpy(mem, sym.data().data(), sym.length());
     builder.addGlobalSymbol(CVSymbol(makeArrayRef(mem, sym.length())));
     break;
@@ -1003,7 +1002,7 @@ static void warnUnusable(InputFile *f, Error e) {
 
 // Allocate memory for a .debug$S / .debug$F section and relocate it.
 static ArrayRef relocateDebugChunk(SectionChunk &debugChunk) {
-  uint8_t *buffer = bAlloc.Allocate(debugChunk.getSize());
+  uint8_t *buffer = bAlloc().Allocate(debugChunk.getSize());
   assert(debugChunk.getOutputSectionIdx() == 0 &&
          "debug sections should not be in output sections");
   debugChunk.writeTo(buffer);
@@ -1417,6 +1416,7 @@ static void addCommonLinkerModuleSymbols(StringRef path,
   ebs.Fields.push_back(path);
   ebs.Fields.push_back("cmd");
   ebs.Fields.push_back(argStr);
+  llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
       ons, bAlloc, CodeViewContainer::Pdb));
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
@@ -1448,7 +1448,7 @@ static void addLinkerModuleCoffGroup(PartialSection *sec,
     cgs.Characteristics |= llvm::COFF::IMAGE_SCN_MEM_WRITE;
 
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
-      cgs, bAlloc, CodeViewContainer::Pdb));
+      cgs, bAlloc(), CodeViewContainer::Pdb));
 }
 
 static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod,
@@ -1461,7 +1461,7 @@ static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod,
   sym.Rva = os.getRVA();
   sym.SectionNumber = os.sectionIndex;
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
-      sym, bAlloc, CodeViewContainer::Pdb));
+      sym, bAlloc(), CodeViewContainer::Pdb));
 
   // Skip COFF groups in MinGW because it adds a significant footprint to the
   // PDB, due to each function being in its own section
@@ -1536,6 +1536,7 @@ void PDBLinker::addImportFilesToPDB() {
     ts.Segment = thunkOS->sectionIndex;
     ts.Offset = thunkChunk->getRVA() - thunkOS->getRVA();
 
+    llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
     mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
         ons, bAlloc, CodeViewContainer::Pdb));
     mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 9ceac7af7f914..db2db9c9272eb 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -134,7 +134,7 @@ getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
   const DILineInfo &lineInfo = *optionalLineInfo;
   if (lineInfo.FileName == DILineInfo::BadString)
     return None;
-  return std::make_pair(saver.save(lineInfo.FileName), lineInfo.Line);
+  return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
 }
 
 static Optional>
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 4a41c541ee7f9..12db942f1db55 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -485,7 +485,7 @@ static bool createThunks(OutputSection *os, int margin) {
     MutableArrayRef newRelocs;
     if (originalRelocs.data() == curRelocs.data()) {
       newRelocs = makeMutableArrayRef(
-          bAlloc.Allocate(originalRelocs.size()),
+          bAlloc().Allocate(originalRelocs.size()),
           originalRelocs.size());
     } else {
       newRelocs = makeMutableArrayRef(
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 9fdc67be79012..1ae7da1f5f7f0 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -28,6 +28,7 @@ set_source_files_properties("${version_inc}"
 
 add_lld_library(lldCommon
   Args.cpp
+  CommonLinkerContext.cpp
   DWARF.cpp
   ErrorHandler.cpp
   Filesystem.cpp
diff --git a/lld/Common/CommonLinkerContext.cpp b/lld/Common/CommonLinkerContext.cpp
new file mode 100644
index 0000000000000..50ccbb37c7966
--- /dev/null
+++ b/lld/Common/CommonLinkerContext.cpp
@@ -0,0 +1,45 @@
+//===- CommonLinkerContext.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lld/Common/CommonLinkerContext.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+
+using namespace llvm;
+using namespace lld;
+
+// Reference to the current LLD instance. This is a temporary situation, until
+// we pass this context everywhere by reference, or we make it a thread_local,
+// as in https://reviews.llvm.org/D108850?id=370678 where each thread can be
+// associated with a LLD instance. Only then will LLD be free of global
+// state.
+static CommonLinkerContext *lctx;
+
+CommonLinkerContext::CommonLinkerContext() { lctx = this; }
+
+CommonLinkerContext::~CommonLinkerContext() {
+  assert(lctx);
+  // Explicitly call the destructors since we created the objects with placement
+  // new in SpecificAlloc::create().
+  for (auto &it : instances)
+    it.second->~SpecificAllocBase();
+  lctx = nullptr;
+}
+
+CommonLinkerContext &lld::commonContext() {
+  assert(lctx);
+  return *lctx;
+}
+
+bool lld::hasContext() { return lctx != nullptr; }
+
+void CommonLinkerContext::destroy() {
+  if (lctx == nullptr)
+    return;
+  delete lctx;
+}
diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp
index 399b6cac75470..15b3bd058ee9b 100644
--- a/lld/Common/ErrorHandler.cpp
+++ b/lld/Common/ErrorHandler.cpp
@@ -10,6 +10,7 @@
 
 #include "llvm/Support/Parallel.h"
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
@@ -18,51 +19,69 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
-#include 
 #include 
 
 using namespace llvm;
 using namespace lld;
 
-// The functions defined in this file can be called from multiple threads,
-// but lld::outs() or lld::errs() are not thread-safe. We protect them using a
-// mutex.
-static std::mutex mu;
-
-// We want to separate multi-line messages with a newline. `sep` is "\n"
-// if the last messages was multi-line. Otherwise "".
-static StringRef sep;
-
 static StringRef getSeparator(const Twine &msg) {
   if (StringRef(msg.str()).contains('\n'))
     return "\n";
   return "";
 }
 
-raw_ostream *lld::stdoutOS;
-raw_ostream *lld::stderrOS;
+ErrorHandler::~ErrorHandler() {
+  if (cleanupCallback)
+    cleanupCallback();
+}
+
+void ErrorHandler::initialize(llvm::raw_ostream &stdoutOS,
+                              llvm::raw_ostream &stderrOS, bool exitEarly,
+                              bool disableOutput) {
+  this->stdoutOS = &stdoutOS;
+  this->stderrOS = &stderrOS;
+  stderrOS.enable_colors(stderrOS.has_colors());
+  this->exitEarly = exitEarly;
+  this->disableOutput = disableOutput;
+}
 
-ErrorHandler &lld::errorHandler() {
-  static ErrorHandler handler;
-  return handler;
+void ErrorHandler::flushStreams() {
+  std::lock_guard lock(mu);
+  outs().flush();
+  errs().flush();
 }
 
+ErrorHandler &lld::errorHandler() { return context().e; }
+
 raw_ostream &lld::outs() {
-  if (errorHandler().disableOutput)
+  ErrorHandler &e = errorHandler();
+  return e.outs();
+}
+
+raw_ostream &lld::errs() {
+  ErrorHandler &e = errorHandler();
+  return e.errs();
+}
+
+raw_ostream &ErrorHandler::outs() {
+  if (disableOutput)
     return llvm::nulls();
   return stdoutOS ? *stdoutOS : llvm::outs();
 }
 
-raw_ostream &lld::errs() {
-  if (errorHandler().disableOutput)
+raw_ostream &ErrorHandler::errs() {
+  if (disableOutput)
     return llvm::nulls();
   return stderrOS ? *stderrOS : llvm::errs();
 }
 
 void lld::exitLld(int val) {
-  // Delete any temporary file, while keeping the memory mapping open.
-  if (errorHandler().outputBuffer)
-    errorHandler().outputBuffer->discard();
+  if (hasContext()) {
+    ErrorHandler &e = errorHandler();
+    // Delete any temporary file, while keeping the memory mapping open.
+    if (e.outputBuffer)
+      e.outputBuffer->discard();
+  }
 
   // Re-throw a possible signal or exception once/if it was catched by
   // safeLldMain().
@@ -75,11 +94,9 @@ void lld::exitLld(int val) {
   if (!CrashRecoveryContext::GetCurrent())
     llvm_shutdown();
 
-  {
-    std::lock_guard lock(mu);
-    lld::outs().flush();
-    lld::errs().flush();
-  }
+  if (hasContext())
+    lld::errorHandler().flushStreams();
+
   // When running inside safeLldMain(), restore the control flow back to the
   // CrashRecoveryContext. Otherwise simply use _exit(), meanning no cleanup,
   // since we want to avoid further crashes on shutdown.
diff --git a/lld/Common/Memory.cpp b/lld/Common/Memory.cpp
index c53e1d3e6cfc7..7c90ff1d799c8 100644
--- a/lld/Common/Memory.cpp
+++ b/lld/Common/Memory.cpp
@@ -7,16 +7,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 
 using namespace llvm;
 using namespace lld;
 
-BumpPtrAllocator lld::bAlloc;
-StringSaver lld::saver{bAlloc};
-std::vector lld::SpecificAllocBase::instances;
-
-void lld::freeArena() {
-  for (SpecificAllocBase *alloc : SpecificAllocBase::instances)
-    alloc->reset();
-  bAlloc.Reset();
+SpecificAllocBase *
+lld::SpecificAllocBase::getOrCreate(void *tag, size_t size, size_t align,
+                                    SpecificAllocBase *(&creator)(void *)) {
+  auto &instances = context().instances;
+  auto &instance = instances[tag];
+  if (instance == nullptr) {
+    void *storage = context().bAlloc.Allocate(size, align);
+    instance = creator(storage);
+  }
+  return instance;
 }
diff --git a/lld/Common/TargetOptionsCommandFlags.cpp b/lld/Common/TargetOptionsCommandFlags.cpp
index d39477ed89adc..b7749c4a20325 100644
--- a/lld/Common/TargetOptionsCommandFlags.cpp
+++ b/lld/Common/TargetOptionsCommandFlags.cpp
@@ -7,12 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lld/Common/TargetOptionsCommandFlags.h"
-
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/Target/TargetOptions.h"
 
-static llvm::codegen::RegisterCodeGenFlags CGF;
-
 llvm::TargetOptions lld::initTargetOptionsFromCodeGenFlags() {
   return llvm::codegen::InitTargetOptionsFromCodeGenFlags(llvm::Triple());
 }
diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp
index a1e276ea9c77e..d45edf9bd8ff4 100644
--- a/lld/ELF/AArch64ErrataFix.cpp
+++ b/lld/ELF/AArch64ErrataFix.cpp
@@ -33,7 +33,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/raw_ostream.h"
@@ -398,9 +398,9 @@ Patch843419Section::Patch843419Section(InputSection *p, uint64_t off)
       patchee(p), patcheeOffset(off) {
   this->parent = p->getParent();
   patchSym = addSyntheticLocal(
-      saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC, 0,
-      getSize(), *this);
-  addSyntheticLocal(saver.save("$x"), STT_NOTYPE, 0, 0, *this);
+      saver().save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC,
+      0, getSize(), *this);
+  addSyntheticLocal(saver().save("$x"), STT_NOTYPE, 0, 0, *this);
 }
 
 uint64_t Patch843419Section::getLDSTAddr() const {
diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp
index cfaa3109afe88..25b47b90cef81 100644
--- a/lld/ELF/ARMErrataFix.cpp
+++ b/lld/ELF/ARMErrataFix.cpp
@@ -22,7 +22,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/raw_ostream.h"
@@ -142,9 +142,9 @@ Patch657417Section::Patch657417Section(InputSection *p, uint64_t off,
       patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) {
   parent = p->getParent();
   patchSym = addSyntheticLocal(
-      saver.save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
+      saver().save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
       isARM ? 0 : 1, getSize(), *this);
-  addSyntheticLocal(saver.save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
+  addSyntheticLocal(saver().save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
 }
 
 uint64_t Patch657417Section::getBranchAddr() const {
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 69a9118ca30ea..d9e4fc97ea0be 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -11,8 +11,7 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Thunks.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
@@ -197,7 +196,7 @@ static bool addOptional(StringRef name, uint64_t value,
   Symbol *sym = symtab->find(name);
   if (!sym || sym->isDefined())
     return false;
-  sym->resolve(Defined{/*file=*/nullptr, saver.save(name), STB_GLOBAL,
+  sym->resolve(Defined{/*file=*/nullptr, saver().save(name), STB_GLOBAL,
                        STV_HIDDEN, STT_FUNC, value,
                        /*size=*/0, /*section=*/nullptr});
   defined.push_back(cast(sym));
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index abec642bed1b5..de26afddd28b8 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -77,14 +77,14 @@ std::unique_ptr elf::driver;
 static void setConfigs(opt::InputArgList &args);
 static void readConfigs(opt::InputArgList &args);
 
-bool elf::link(ArrayRef args, bool canExitEarly,
-               raw_ostream &stdoutOS, raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
-
-  errorHandler().cleanupCallback = []() {
-    freeArena();
-
+bool elf::link(ArrayRef args, llvm::raw_ostream &stdoutOS,
+               llvm::raw_ostream &stderrOS, bool exitEarly,
+               bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new CommonLinkerContext;
+
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.cleanupCallback = []() {
     inputSections.clear();
     outputSections.clear();
     memoryBuffers.clear();
@@ -106,13 +106,9 @@ bool elf::link(ArrayRef args, bool canExitEarly,
 
     SharedFile::vernauxNum = 0;
   };
-
-  errorHandler().logName = args::getFilenameWithoutExe(args[0]);
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now (use "
-      "-error-limit=0 to see all errors)";
-  errorHandler().exitEarly = canExitEarly;
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.logName = args::getFilenameWithoutExe(args[0]);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now (use "
+                                 "-error-limit=0 to see all errors)";
 
   config = std::make_unique();
   driver = std::make_unique();
@@ -126,15 +122,7 @@ bool elf::link(ArrayRef args, bool canExitEarly,
 
   driver->linkerMain(args);
 
-  // Exit immediately if we don't need to return to the caller.
-  // This saves time because the overhead of calling destructors
-  // for all globally-allocated objects is not negligible.
-  int hasError = errorCount() ? 1 : 0;
-  if (canExitEarly)
-    exitLld(hasError);
-  else
-    errorHandler().reset();
-  return !hasError;
+  return errorCount() == 0;
 }
 
 // Parses a linker -m option.
@@ -1258,7 +1246,7 @@ static void readConfigs(opt::InputArgList &args) {
 
   // Parse LTO options.
   if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq))
-    parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())),
+    parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())),
                      arg->getSpelling());
 
   for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus))
@@ -2071,9 +2059,9 @@ static std::vector addWrappedSymbols(opt::InputArgList &args) {
     if (!sym)
       continue;
 
-    Symbol *real = addUnusedUndefined(saver.save("__real_" + name));
+    Symbol *real = addUnusedUndefined(saver().save("__real_" + name));
     Symbol *wrap =
-        addUnusedUndefined(saver.save("__wrap_" + name), sym->binding);
+        addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
     v.push_back({sym, real, wrap});
 
     // We want to tell LTO not to inline symbols to be overwritten
diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp
index 54d2d0ae6fb9f..ac29b47abcc96 100644
--- a/lld/ELF/DriverUtils.cpp
+++ b/lld/ELF/DriverUtils.cpp
@@ -13,8 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Driver.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Reproduce.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/Optional.h"
@@ -102,7 +101,7 @@ static void concatLTOPluginOptions(SmallVectorImpl &args) {
   for (size_t i = 0, e = args.size(); i != e; ++i) {
     StringRef s = args[i];
     if ((s == "-plugin-opt" || s == "--plugin-opt") && i + 1 != e) {
-      v.push_back(saver.save(s + "=" + args[i + 1]).data());
+      v.push_back(saver().save(s + "=" + args[i + 1]).data());
       ++i;
     } else {
       v.push_back(args[i]);
@@ -125,7 +124,7 @@ opt::InputArgList ELFOptTable::parse(ArrayRef argv) {
 
   // Expand response files (arguments in the form of @)
   // and then parse the argument again.
-  cl::ExpandResponseFiles(saver, getQuotingStyle(args), vec);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(args), vec);
   concatLTOPluginOptions(vec);
   args = this->ParseArgs(vec, missingIndex, missingCount);
 
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index ca16a64e50836..4da371c619f40 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -13,9 +13,8 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/IR/LLVMContext.h"
@@ -111,7 +110,7 @@ Optional elf::readFile(StringRef path) {
   // The --chroot option changes our virtual root directory.
   // This is useful when you are dealing with files created by --reproduce.
   if (!config->chroot.empty() && path.startswith("/"))
-    path = saver.save(config->chroot + path);
+    path = saver().save(config->chroot + path);
 
   log(path);
   config->dependencyFiles.insert(llvm::CachedHashString(path));
@@ -1518,8 +1517,8 @@ template  void SharedFile::parse() {
         }
         StringRef verName = stringTable.data() + verneeds[idx];
         versionedNameBuffer.clear();
-        name =
-            saver.save((name + "@" + verName).toStringRef(versionedNameBuffer));
+        name = saver().save(
+            (name + "@" + verName).toStringRef(versionedNameBuffer));
       }
       Symbol *s = symtab.addSymbol(
           Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
@@ -1561,7 +1560,7 @@ template  void SharedFile::parse() {
         reinterpret_cast(verdefs[idx])->getAux()->vda_name;
     versionedNameBuffer.clear();
     name = (name + "@" + verName).toStringRef(versionedNameBuffer);
-    symtab.addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(),
+    symtab.addSymbol(SharedSymbol{*this, saver().save(name), sym.getBinding(),
                                   sym.st_other, sym.getType(), sym.st_value,
                                   sym.st_size, alignment, idx});
   }
@@ -1644,11 +1643,10 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
   // into consideration at LTO time (which very likely causes undefined
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
-  StringRef name =
-      archiveName.empty()
-          ? saver.save(path)
-          : saver.save(archiveName + "(" + path::filename(path) + " at " +
-                       utostr(offsetInArchive) + ")");
+  StringRef name = archiveName.empty()
+                       ? saver().save(path)
+                       : saver().save(archiveName + "(" + path::filename(path) +
+                                      " at " + utostr(offsetInArchive) + ")");
   MemoryBufferRef mbref(mb.getBuffer(), name);
 
   obj = CHECK(lto::InputFile::create(mbref), this);
@@ -1684,7 +1682,7 @@ createBitcodeSymbol(Symbol *&sym, const std::vector &keptComdats,
   if (sym) {
     name = sym->getName();
   } else {
-    name = saver.save(objSym.getName());
+    name = saver().save(objSym.getName());
     sym = symtab->insert(name);
   }
 
@@ -1734,8 +1732,8 @@ void BitcodeFile::parseLazy() {
   symbols.resize(obj->symbols().size());
   for (auto it : llvm::enumerate(obj->symbols()))
     if (!it.value().isUndefined())
-      symbols[it.index()] =
-          symtab.addSymbol(LazyObject{*this, saver.save(it.value().getName())});
+      symbols[it.index()] = symtab.addSymbol(
+          LazyObject{*this, saver().save(it.value().getName())});
 }
 
 void BinaryFile::parse() {
@@ -1753,6 +1751,8 @@ void BinaryFile::parse() {
     if (!isAlnum(s[i]))
       s[i] = '_';
 
+  llvm::StringSaver &saver = lld::saver();
+
   symtab->addSymbol(Defined{nullptr, saver.save(s + "_start"), STB_GLOBAL,
                             STV_DEFAULT, STT_OBJECT, 0, 0, section});
   symtab->addSymbol(Defined{nullptr, saver.save(s + "_end"), STB_GLOBAL,
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 5b6242646605f..e6ce2c399ac9d 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -18,8 +18,7 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Thunks.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
@@ -143,7 +142,7 @@ void InputSectionBase::uncompress() const {
   {
     static std::mutex mu;
     std::lock_guard lock(mu);
-    uncompressedBuf = bAlloc.Allocate(size);
+    uncompressedBuf = bAlloc().Allocate(size);
   }
 
   if (Error e = zlib::uncompress(toStringRef(rawData), uncompressedBuf, size))
@@ -237,7 +236,7 @@ template  void InputSectionBase::parseCompressedHeader() {
 
     // Restore the original section name.
     // (e.g. ".zdebug_info" -> ".debug_info")
-    name = saver.save("." + name.substr(2));
+    name = saver().save("." + name.substr(2));
     return;
   }
 
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 3dd3df80e36f4..bfb583453735e 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -19,7 +19,7 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Writer.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -64,8 +64,8 @@ static StringRef getOutputSectionName(const InputSectionBase *s) {
     if (InputSectionBase *rel = isec->getRelocatedSection()) {
       OutputSection *out = rel->getOutputSection();
       if (s->type == SHT_RELA)
-        return saver.save(".rela" + out->name);
-      return saver.save(".rel" + out->name);
+        return saver().save(".rela" + out->name);
+      return saver().save(".rel" + out->name);
     }
   }
 
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 4d3d79d4ee80b..597c0684b8b2b 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -27,7 +27,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Object/ELF.h"
@@ -308,8 +308,8 @@ template  void MarkLive::run() {
       // As a workaround for glibc libc.a before 2.34
       // (https://sourceware.org/PR27492), retain __libc_atexit and similar
       // sections regardless of zStartStopGC.
-      cNamedSections[saver.save("__start_" + sec->name)].push_back(sec);
-      cNamedSections[saver.save("__stop_" + sec->name)].push_back(sec);
+      cNamedSections[saver().save("__start_" + sec->name)].push_back(sec);
+      cNamedSections[saver().save("__stop_" + sec->name)].push_back(sec);
     }
   }
 
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index f26b6c41adf20..7331d1156f278 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -20,7 +20,7 @@
 #include "ScriptLexer.h"
 #include "Symbols.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
@@ -290,7 +290,7 @@ void ScriptParser::addFile(StringRef s) {
     SmallString<128> pathData;
     StringRef path = (config->sysroot + s).toStringRef(pathData);
     if (sys::fs::exists(path))
-      driver->addFile(saver.save(path), /*withLOption=*/false);
+      driver->addFile(saver().save(path), /*withLOption=*/false);
     else
       setError("cannot find " + s + " inside " + config->sysroot);
     return;
@@ -304,7 +304,7 @@ void ScriptParser::addFile(StringRef s) {
     if (config->sysroot.empty())
       driver->addFile(s.substr(1), /*withLOption=*/false);
     else
-      driver->addFile(saver.save(config->sysroot + "/" + s.substr(1)),
+      driver->addFile(saver().save(config->sysroot + "/" + s.substr(1)),
                       /*withLOption=*/false);
   } else if (s.startswith("-l")) {
     // Case 3: search in the list of library paths.
@@ -327,7 +327,7 @@ void ScriptParser::addFile(StringRef s) {
     } else {
       // Finally, search in the list of library paths.
       if (Optional path = findFromSearchPaths(s))
-        driver->addFile(saver.save(*path), /*withLOption=*/true);
+        driver->addFile(saver().save(*path), /*withLOption=*/true);
       else
         setError("unable to find " + s);
     }
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index c1eb53e9d44b0..87f4269b83917 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -22,9 +22,8 @@
 #include "Symbols.h"
 #include "Target.h"
 #include "Writer.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/SetOperations.h"
@@ -73,7 +72,7 @@ static ArrayRef getVersion() {
   // This is only for testing.
   StringRef s = getenv("LLD_VERSION");
   if (s.empty())
-    s = saver.save(Twine("Linker: ") + getLLDVersion());
+    s = saver().save(Twine("Linker: ") + getLLDVersion());
 
   // +1 to include the terminating '\0'.
   return {(const uint8_t *)s.data(), s.size() + 1};
@@ -255,7 +254,7 @@ MipsReginfoSection *MipsReginfoSection::create() {
 
 InputSection *elf::createInterpSection() {
   // StringSaver guarantees that the returned string ends with '\0'.
-  StringRef s = saver.save(config->dynamicLinker);
+  StringRef s = saver().save(config->dynamicLinker);
   ArrayRef contents = {(const uint8_t *)s.data(), s.size() + 1};
 
   return make(nullptr, SHF_ALLOC, SHT_PROGBITS, 1, contents,
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 38de4db191f45..ae740810acb57 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -27,8 +27,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Endian.h"
@@ -434,7 +433,7 @@ void AArch64ABSLongThunk::writeTo(uint8_t *buf) {
 }
 
 void AArch64ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__AArch64AbsLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__AArch64AbsLongThunk_" + destination.getName()),
             STT_FUNC, 0, isec);
   addSymbol("$x", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 8, isec);
@@ -460,8 +459,8 @@ void AArch64ADRPThunk::writeTo(uint8_t *buf) {
 }
 
 void AArch64ADRPThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__AArch64ADRPThunk_" + destination.getName()), STT_FUNC,
-            0, isec);
+  addSymbol(saver().save("__AArch64ADRPThunk_" + destination.getName()),
+            STT_FUNC, 0, isec);
   addSymbol("$x", STT_NOTYPE, 0, isec);
 }
 
@@ -560,7 +559,7 @@ void ARMV7ABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV7ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMv7ABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__ARMv7ABSLongThunk_" + destination.getName()),
             STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
 }
@@ -578,7 +577,7 @@ void ThumbV7ABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV7ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__Thumbv7ABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__Thumbv7ABSLongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
 }
@@ -599,8 +598,8 @@ void ARMV7PILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV7PILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMV7PILongThunk_" + destination.getName()), STT_FUNC,
-            0, isec);
+  addSymbol(saver().save("__ARMV7PILongThunk_" + destination.getName()),
+            STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
 }
 
@@ -620,7 +619,7 @@ void ThumbV7PILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV7PILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ThumbV7PILongThunk_" + destination.getName()),
+  addSymbol(saver().save("__ThumbV7PILongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
 }
@@ -635,7 +634,7 @@ void ARMV5ABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV5ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMv5ABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__ARMv5ABSLongThunk_" + destination.getName()),
             STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 4, isec);
@@ -661,8 +660,8 @@ void ARMV5PILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV5PILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMV5PILongThunk_" + destination.getName()), STT_FUNC,
-            0, isec);
+  addSymbol(saver().save("__ARMV5PILongThunk_" + destination.getName()),
+            STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 12, isec);
 }
@@ -691,7 +690,7 @@ void ThumbV6MABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV6MABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__Thumbv6MABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__Thumbv6MABSLongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 8, isec);
@@ -717,7 +716,7 @@ void ThumbV6MPILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV6MPILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__Thumbv6MPILongThunk_" + destination.getName()),
+  addSymbol(saver().save("__Thumbv6MPILongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 12, isec);
@@ -735,7 +734,7 @@ void MipsThunk::writeTo(uint8_t *buf) {
 }
 
 void MipsThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__LA25Thunk_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__LA25Thunk_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -758,8 +757,9 @@ void MicroMipsThunk::writeTo(uint8_t *buf) {
 }
 
 void MicroMipsThunk::addSymbols(ThunkSection &isec) {
-  Defined *d = addSymbol(
-      saver.save("__microLA25Thunk_" + destination.getName()), STT_FUNC, 0, isec);
+  Defined *d =
+      addSymbol(saver().save("__microLA25Thunk_" + destination.getName()),
+                STT_FUNC, 0, isec);
   d->stOther |= STO_MIPS_MICROMIPS;
 }
 
@@ -782,8 +782,9 @@ void MicroMipsR6Thunk::writeTo(uint8_t *buf) {
 }
 
 void MicroMipsR6Thunk::addSymbols(ThunkSection &isec) {
-  Defined *d = addSymbol(
-      saver.save("__microLA25Thunk_" + destination.getName()), STT_FUNC, 0, isec);
+  Defined *d =
+      addSymbol(saver().save("__microLA25Thunk_" + destination.getName()),
+                STT_FUNC, 0, isec);
   d->stOther |= STO_MIPS_MICROMIPS;
 }
 
@@ -843,7 +844,7 @@ void PPC32PltCallStub::addSymbols(ThunkSection &isec) {
   else
     os << ".plt_pic32.";
   os << destination.getName();
-  addSymbol(saver.save(os.str()), STT_FUNC, 0, isec);
+  addSymbol(saver().save(os.str()), STT_FUNC, 0, isec);
 }
 
 bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec,
@@ -852,7 +853,7 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec,
 }
 
 void PPC32LongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__LongThunk_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__LongThunk_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -896,8 +897,8 @@ void PPC64PltCallStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64PltCallStub::addSymbols(ThunkSection &isec) {
-  Defined *s = addSymbol(saver.save("__plt_" + destination.getName()), STT_FUNC,
-                         0, isec);
+  Defined *s = addSymbol(saver().save("__plt_" + destination.getName()),
+                         STT_FUNC, 0, isec);
   s->needsTocRestore = true;
   s->file = destination.file;
 }
@@ -947,7 +948,7 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64R2SaveStub::addSymbols(ThunkSection &isec) {
-  Defined *s = addSymbol(saver.save("__toc_save_" + destination.getName()),
+  Defined *s = addSymbol(saver().save("__toc_save_" + destination.getName()),
                          STT_FUNC, 0, isec);
   s->needsTocRestore = true;
 }
@@ -983,7 +984,7 @@ void PPC64R12SetupStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64R12SetupStub::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__gep_setup_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__gep_setup_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -1019,7 +1020,7 @@ void PPC64PCRelPLTStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64PCRelPLTStub::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__plt_pcrel_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__plt_pcrel_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -1035,7 +1036,7 @@ void PPC64LongBranchThunk::writeTo(uint8_t *buf) {
 }
 
 void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__long_branch_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__long_branch_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 9eefe5c84a114..2b14c07120c91 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -20,8 +20,8 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "lld/Common/Arrays.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Filesystem.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -2216,9 +2216,9 @@ void Writer::addStartStopSymbols(OutputSection *sec) {
   StringRef s = sec->name;
   if (!isValidCIdentifier(s))
     return;
-  addOptionalRegular(saver.save("__start_" + s), sec, 0,
+  addOptionalRegular(saver().save("__start_" + s), sec, 0,
                      config->zStartStopVisibility);
-  addOptionalRegular(saver.save("__stop_" + s), sec, -1,
+  addOptionalRegular(saver().save("__stop_" + s), sec, -1,
                      config->zStartStopVisibility);
 }
 
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index f63c2e6eb321c..4fae93469b5ff 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -13,8 +13,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -322,8 +321,8 @@ void ConcatOutputSection::finalize() {
       // get written are happy.
       thunkInfo.isec->live = true;
 
-      StringRef thunkName = saver.save(funcSym->getName() + ".thunk." +
-                                       std::to_string(thunkInfo.sequence++));
+      StringRef thunkName = saver().save(funcSym->getName() + ".thunk." +
+                                         std::to_string(thunkInfo.sequence++));
       r.referent = thunkInfo.sym = symtab->addDefined(
           thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0,
           /*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true,
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index a903add32e5ad..50f5c96c61f35 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -128,7 +128,7 @@ static Optional findFramework(StringRef name) {
         // only append suffix if realpath() succeeds
         Twine suffixed = location + suffix;
         if (fs::exists(suffixed))
-          return resolvedFrameworks[key] = saver.save(suffixed.str());
+          return resolvedFrameworks[key] = saver().save(suffixed.str());
       }
       // Suffix lookup failed, fall through to the no-suffix case.
     }
@@ -165,7 +165,7 @@ getSearchPaths(unsigned optionCode, InputArgList &args,
         path::append(buffer, path);
         // Do not warn about paths that are computed via the syslib roots
         if (fs::is_directory(buffer)) {
-          paths.push_back(saver.save(buffer.str()));
+          paths.push_back(saver().save(buffer.str()));
           found = true;
         }
       }
@@ -183,7 +183,7 @@ getSearchPaths(unsigned optionCode, InputArgList &args,
       SmallString<261> buffer(root);
       path::append(buffer, path);
       if (fs::is_directory(buffer))
-        paths.push_back(saver.save(buffer.str()));
+        paths.push_back(saver().save(buffer.str()));
     }
   }
   return paths;
@@ -1126,14 +1126,14 @@ static void referenceStubBinder() {
   symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr, /*isWeak=*/false);
 }
 
-bool macho::link(ArrayRef argsArr, bool canExitEarly,
-                 raw_ostream &stdoutOS, raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
-
-  errorHandler().cleanupCallback = []() {
-    freeArena();
+bool macho::link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS,
+                 llvm::raw_ostream &stderrOS, bool exitEarly,
+                 bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new CommonLinkerContext;
 
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.cleanupCallback = []() {
     resolvedFrameworks.clear();
     resolvedLibraries.clear();
     cachedReads.clear();
@@ -1154,17 +1154,15 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly,
     InputFile::resetIdCount();
   };
 
-  errorHandler().logName = args::getFilenameWithoutExe(argsArr[0]);
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.logName = args::getFilenameWithoutExe(argsArr[0]);
 
   MachOOptTable parser;
   InputArgList args = parser.parse(argsArr.slice(1));
 
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now "
-      "(use --error-limit=0 to see all errors)";
-  errorHandler().errorLimit = args::getInteger(args, OPT_error_limit_eq, 20);
-  errorHandler().verbose = args.hasArg(OPT_verbose);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now "
+                                 "(use --error-limit=0 to see all errors)";
+  ctx->e.errorLimit = args::getInteger(args, OPT_error_limit_eq, 20);
+  ctx->e.verbose = args.hasArg(OPT_verbose);
 
   if (args.hasArg(OPT_help_hidden)) {
     parser.printHelp(argsArr[0], /*showHidden=*/true);
@@ -1208,7 +1206,7 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly,
       // these are meaningful for our text based stripping
       if (config->osoPrefix.equals(".") || config->osoPrefix.endswith(sep))
         expanded += sep;
-      config->osoPrefix = saver.save(expanded.str());
+      config->osoPrefix = saver().save(expanded.str());
     }
   }
 
@@ -1496,7 +1494,7 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly,
 
     // Parse LTO options.
     if (const Arg *arg = args.getLastArg(OPT_mcpu))
-      parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())),
+      parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())),
                        arg->getSpelling());
 
     for (const Arg *arg : args.filtered(OPT_mllvm))
@@ -1587,11 +1585,5 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly,
 
     timeTraceProfilerCleanup();
   }
-
-  if (canExitEarly)
-    exitLld(errorCount() ? 1 : 0);
-
-  bool ret = errorCount() == 0;
-  errorHandler().reset();
-  return ret;
+  return errorCount() == 0;
 }
diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp
index 3c5440544614c..83940b54486ff 100644
--- a/lld/MachO/DriverUtils.cpp
+++ b/lld/MachO/DriverUtils.cpp
@@ -13,8 +13,7 @@
 #include "Target.h"
 
 #include "lld/Common/Args.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Reproduce.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
@@ -82,7 +81,7 @@ InputArgList MachOOptTable::parse(ArrayRef argv) {
 
   // Expand response files (arguments in the form of @)
   // and then parse the argument again.
-  cl::ExpandResponseFiles(saver, cl::TokenizeGNUCommandLine, vec);
+  cl::ExpandResponseFiles(saver(), cl::TokenizeGNUCommandLine, vec);
   InputArgList args = ParseArgs(vec, missingIndex, missingCount);
 
   // Handle -fatal_warnings early since it converts missing argument warnings
@@ -191,12 +190,12 @@ Optional macho::resolveDylibPath(StringRef dylibPath) {
   bool tbdExists = fs::exists(tbdPath);
   searchedDylib(tbdPath, tbdExists);
   if (tbdExists)
-    return saver.save(tbdPath.str());
+    return saver().save(tbdPath.str());
 
   bool dylibExists = fs::exists(dylibPath);
   searchedDylib(dylibPath, dylibExists);
   if (dylibExists)
-    return saver.save(dylibPath);
+    return saver().save(dylibPath);
   return {};
 }
 
@@ -261,7 +260,7 @@ macho::findPathCombination(const Twine &name,
       bool exists = fs::exists(location);
       searchedDylib(location, exists);
       if (exists)
-        return saver.save(location.str());
+        return saver().save(location.str());
     }
   }
   return {};
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 9ab5a18aefbc8..bbeb2dc09bf0f 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -56,9 +56,8 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Reproduce.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -210,6 +209,8 @@ Optional macho::readFile(StringRef path) {
     return cachedReads[key] = mbref;
   }
 
+  llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
+
   // Object files and archive files may be fat files, which contain multiple
   // real files for different CPU ISAs. Here, we search for a file that matches
   // with the current link target and returns it as a MemoryBufferRef.
@@ -241,7 +242,7 @@ Optional macho::readFile(StringRef path) {
 }
 
 InputFile::InputFile(Kind kind, const InterfaceFile &interface)
-    : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
+    : id(idCount++), fileKind(kind), name(saver().save(interface.getPath())) {}
 
 // Some sections comprise of fixed-size records, so instead of splitting them at
 // symbol boundaries, we split them based on size. Records are distinct from
@@ -1211,7 +1212,7 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
     // Find all the $ld$* symbols to process first.
     parseTrie(buf + c->export_off, c->export_size,
               [&](const Twine &name, uint64_t flags) {
-                StringRef savedName = saver.save(name);
+                StringRef savedName = saver().save(name);
                 if (handleLDSymbol(savedName))
                   return;
                 entries.push_back({savedName, flags});
@@ -1285,7 +1286,7 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
     umbrella = this;
   this->umbrella = umbrella;
 
-  installName = saver.save(interface.getInstallName());
+  installName = saver().save(interface.getInstallName());
   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
   currentVersion = interface.getCurrentVersion().rawValue();
 
@@ -1304,7 +1305,7 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
 
   exportingFile = isImplicitlyLinked(installName) ? this : umbrella;
   auto addSymbol = [&](const Twine &name) -> void {
-    StringRef savedName = saver.save(name);
+    StringRef savedName = saver().save(name);
     if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(savedName)))
       return;
 
@@ -1423,7 +1424,7 @@ void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
       config->platformInfo.minimum >= end)
     return;
 
-  this->installName = saver.save(installName);
+  this->installName = saver().save(installName);
 
   if (!compatVersion.empty()) {
     VersionTuple cVersion;
@@ -1445,7 +1446,7 @@ void DylibFile::handleLDInstallNameSymbol(StringRef name,
   if (!condition.consume_front("os") || version.tryParse(condition))
     warn("failed to parse os version, symbol '" + originalName + "' ignored");
   else if (version == config->platformInfo.minimum)
-    this->installName = saver.save(installName);
+    this->installName = saver().save(installName);
 }
 
 void DylibFile::handleLDHideSymbol(StringRef name, StringRef originalName) {
@@ -1550,7 +1551,7 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
 
 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
                                           BitcodeFile &file) {
-  StringRef name = saver.save(objSym.getName());
+  StringRef name = saver().save(objSym.getName());
 
   if (objSym.isUndefined())
     return symtab->addUndefined(name, &file, /*isWeakRef=*/objSym.isWeak());
@@ -1592,11 +1593,12 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
   // So, we append the archive name to disambiguate two members with the same
   // name from multiple different archives, and offset within the archive to
   // disambiguate two members of the same name from a single archive.
-  MemoryBufferRef mbref(
-      mb.getBuffer(),
-      saver.save(archiveName.empty() ? path
-                                     : archiveName + sys::path::filename(path) +
-                                           utostr(offsetInArchive)));
+  MemoryBufferRef mbref(mb.getBuffer(),
+                        saver().save(archiveName.empty()
+                                         ? path
+                                         : archiveName +
+                                               sys::path::filename(path) +
+                                               utostr(offsetInArchive)));
 
   obj = check(lto::InputFile::create(mbref));
   if (lazy)
@@ -1620,7 +1622,7 @@ void BitcodeFile::parseLazy() {
     const lto::InputFile::Symbol &objSym = it.value();
     if (!objSym.isUndefined()) {
       symbols[it.index()] =
-          symtab->addLazyObject(saver.save(objSym.getName()), *this);
+          symtab->addLazyObject(saver().save(objSym.getName()), *this);
       if (!lazy)
         break;
     }
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index c71ea33d28965..fd49a09229d11 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -14,7 +14,7 @@
 #include "Target.h"
 
 #include "lld/Common/Args.h"
-#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
 #include "llvm/LTO/Config.h"
@@ -148,7 +148,7 @@ std::vector BitcodeCompiler::compile() {
       modTime = getModTime(filePath);
     }
     ret.push_back(make(
-        MemoryBufferRef(buf[i], saver.save(filePath.str())), modTime, ""));
+        MemoryBufferRef(buf[i], saver().save(filePath.str())), modTime, ""));
   }
   for (std::unique_ptr &file : files)
     if (file)
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 7e4b83ec1903d..3fe551f5684b6 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -16,8 +16,7 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/EndianStream.h"
@@ -834,7 +833,7 @@ void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) {
   if (!dir.endswith(sep))
     dir += sep;
   stab.strx = stringTableSection.addString(
-      saver.save(dir + compileUnit->getUnitDIE().getShortName()));
+      saver().save(dir + compileUnit->getUnitDIE().getShortName()));
   stabs.emplace_back(std::move(stab));
 }
 
@@ -856,7 +855,7 @@ void SymtabSection::emitObjectFileStab(ObjFile *file) {
   if (!file->archiveName.empty())
     path.append({"(", file->getName(), ")"});
 
-  StringRef adjustedPath = saver.save(path.str());
+  StringRef adjustedPath = saver().save(path.str());
   adjustedPath.consume_front(config->osoPrefix);
 
   stab.strx = stringTableSection.addString(adjustedPath);
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 4c7d1c2eeb32b..c76dc691346e6 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -22,8 +22,7 @@
 #include "UnwindInfoSection.h"
 
 #include "lld/Common/Arrays.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/LEB128.h"
@@ -611,7 +610,7 @@ static bool needsBinding(const Symbol *sym) {
 }
 
 static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
-                                    const Reloc &r) {
+                                    const lld::macho::Reloc &r) {
   assert(sym->isLive());
   const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
 
@@ -644,7 +643,7 @@ void Writer::scanRelocations() {
       continue;
 
     for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
-      Reloc &r = *it;
+      lld::macho::Reloc &r = *it;
       if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
         // Skip over the following UNSIGNED relocation -- it's just there as the
         // minuend, and doesn't have the usual UNSIGNED semantics. We don't want
@@ -858,8 +857,8 @@ static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
   if (f->archiveName.empty())
     filename = path::filename(f->getName());
   else
-    filename = saver.save(path::filename(f->archiveName) + "(" +
-                          path::filename(f->getName()) + ")");
+    filename = saver().save(path::filename(f->archiveName) + "(" +
+                            path::filename(f->getName()) + ")");
   return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
 }
 
@@ -1216,7 +1215,7 @@ void macho::createSyntheticSections() {
 
   // This section contains space for just a single word, and will be used by
   // dyld to cache an address to the image loader it uses.
-  uint8_t *arr = bAlloc.Allocate(target->wordSize);
+  uint8_t *arr = bAlloc().Allocate(target->wordSize);
   memset(arr, 0, target->wordSize);
   in.imageLoaderCache = make(
       segment_names::data, section_names::data, /*file=*/nullptr,
diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index 7c6b865a2e398..10ff7cf5a7123 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -100,7 +100,7 @@ opt::InputArgList MinGWOptTable::parse(ArrayRef argv) {
   unsigned missingCount;
 
   SmallVector vec(argv.data(), argv.data() + argv.size());
-  cl::ExpandResponseFiles(saver, getQuotingStyle(), vec);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(), vec);
   opt::InputArgList args = this->ParseArgs(vec, missingIndex, missingCount);
 
   if (missingCount)
@@ -154,12 +154,11 @@ searchLibrary(StringRef name, ArrayRef searchPaths, bool bStatic) {
 
 // Convert Unix-ish command line arguments to Windows-ish ones and
 // then call coff::link.
-bool mingw::link(ArrayRef argsArr, bool canExitEarly,
-                 raw_ostream &stdoutOS, raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
-
-  stderrOS.enable_colors(stderrOS.has_colors());
+bool mingw::link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS,
+                 llvm::raw_ostream &stderrOS, bool exitEarly,
+                 bool disableOutput) {
+  auto *ctx = new CommonLinkerContext;
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
 
   MinGWOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
@@ -445,5 +444,9 @@ bool mingw::link(ArrayRef argsArr, bool canExitEarly,
   // Pass the actual binary name, to make error messages be printed with
   // the right prefix.
   vec[0] = argsArr[0];
-  return coff::link(vec, canExitEarly, stdoutOS, stderrOS);
+
+  // The context will be re-created in the COFF driver.
+  lld::CommonLinkerContext::destroy();
+
+  return coff::link(vec, stdoutOS, stderrOS, exitEarly, disableOutput);
 }
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
new file mode 100644
index 0000000000000..3954d38ded636
--- /dev/null
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -0,0 +1,65 @@
+//===- CommonLinkerContext.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Entry point for all global state in lldCommon. The objective is for LLD to be
+// used "as a library" in a thread-safe manner.
+//
+// Instead of program-wide globals or function-local statics, we prefer
+// aggregating all "global" states into a heap-based structure
+// (CommonLinkerContext). This also achieves deterministic initialization &
+// shutdown for all "global" states.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_COMMON_COMMONLINKINGCONTEXT_H
+#define LLD_COMMON_COMMONLINKINGCONTEXT_H
+
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/Support/StringSaver.h"
+
+namespace llvm {
+class raw_ostream;
+} // namespace llvm
+
+namespace lld {
+struct SpecificAllocBase;
+class CommonLinkerContext {
+public:
+  CommonLinkerContext();
+  virtual ~CommonLinkerContext();
+
+  static void destroy();
+
+  llvm::BumpPtrAllocator bAlloc;
+  llvm::StringSaver saver{bAlloc};
+  llvm::DenseMap instances;
+
+  ErrorHandler e;
+
+private:
+  llvm::codegen::RegisterCodeGenFlags cgf;
+};
+
+// Retrieve the global state. Currently only one state can exist per process,
+// but in the future we plan on supporting an arbitrary number of LLD instances
+// in a single process.
+CommonLinkerContext &commonContext();
+
+template  T &context() {
+  return static_cast(commonContext());
+}
+
+bool hasContext();
+
+inline llvm::StringSaver &saver() { return context().saver; }
+inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
+} // namespace lld
+
+#endif
diff --git a/lld/include/lld/Common/Driver.h b/lld/include/lld/Common/Driver.h
index 0e505a16463e7..91cb91b9f8082 100644
--- a/lld/include/lld/Common/Driver.h
+++ b/lld/include/lld/Common/Driver.h
@@ -9,6 +9,7 @@
 #ifndef LLD_COMMON_DRIVER_H
 #define LLD_COMMON_DRIVER_H
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -28,28 +29,28 @@ SafeReturn safeLldMain(int argc, const char **argv, llvm::raw_ostream &stdoutOS,
                        llvm::raw_ostream &stderrOS);
 
 namespace coff {
-bool link(llvm::ArrayRef args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace mingw {
-bool link(llvm::ArrayRef args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace elf {
-bool link(llvm::ArrayRef args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace macho {
-bool link(llvm::ArrayRef args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace wasm {
-bool link(llvm::ArrayRef args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 }
 
diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h
index d95a2537c1f2c..ce077290d60b3 100644
--- a/lld/include/lld/Common/ErrorHandler.h
+++ b/lld/include/lld/Common/ErrorHandler.h
@@ -73,6 +73,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileOutputBuffer.h"
+#include 
 
 namespace llvm {
 class DiagnosticInfo;
@@ -81,11 +82,6 @@ class raw_ostream;
 
 namespace lld {
 
-// We wrap stdout and stderr so that you can pass alternative stdout/stderr as
-// arguments to lld::*::link() functions.
-extern llvm::raw_ostream *stdoutOS;
-extern llvm::raw_ostream *stderrOS;
-
 llvm::raw_ostream &outs();
 llvm::raw_ostream &errs();
 
@@ -93,6 +89,11 @@ enum class ErrorTag { LibNotFound, SymbolNotFound };
 
 class ErrorHandler {
 public:
+  ~ErrorHandler();
+
+  void initialize(llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS,
+                  bool exitEarly, bool disableOutput);
+
   uint64_t errorCount = 0;
   uint64_t errorLimit = 20;
   StringRef errorLimitExceededMsg = "too many errors emitted, stopping now";
@@ -112,11 +113,9 @@ class ErrorHandler {
   void message(const Twine &msg, llvm::raw_ostream &s);
   void warn(const Twine &msg);
 
-  void reset() {
-    if (cleanupCallback)
-      cleanupCallback();
-    *this = ErrorHandler();
-  }
+  raw_ostream &outs();
+  raw_ostream &errs();
+  void flushStreams();
 
   std::unique_ptr outputBuffer;
 
@@ -126,6 +125,19 @@ class ErrorHandler {
   std::string getLocation(const Twine &msg);
   void reportDiagnostic(StringRef location, Colors c, StringRef diagKind,
                         const Twine &msg);
+
+  // We want to separate multi-line messages with a newline. `sep` is "\n"
+  // if the last messages was multi-line. Otherwise "".
+  llvm::StringRef sep;
+
+  // We wrap stdout and stderr so that you can pass alternative stdout/stderr as
+  // arguments to lld::*::link() functions. Since lld::outs() or lld::errs() can
+  // be indirectly called from multiple threads, we protect them using a mutex.
+  // In the future, we plan on supporting several concurent linker contexts,
+  // which explains why the mutex is not a global but part of this context.
+  std::mutex mu;
+  llvm::raw_ostream *stdoutOS{};
+  llvm::raw_ostream *stderrOS{};
 };
 
 /// Returns the default error handler.
diff --git a/lld/include/lld/Common/Memory.h b/lld/include/lld/Common/Memory.h
index f516a327cfb2b..0b2f474c30135 100644
--- a/lld/include/lld/Common/Memory.h
+++ b/lld/include/lld/Common/Memory.h
@@ -22,42 +22,41 @@
 #define LLD_COMMON_MEMORY_H
 
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/StringSaver.h"
-#include 
 
 namespace lld {
-
-// Use this arena if your object doesn't have a destructor.
-extern llvm::BumpPtrAllocator bAlloc;
-extern llvm::StringSaver saver;
-
-void freeArena();
-
-// These two classes are hack to keep track of all
-// SpecificBumpPtrAllocator instances.
+// A base class only used by the CommonLinkerContext to keep track of the
+// SpecificAlloc<> instances.
 struct SpecificAllocBase {
-  SpecificAllocBase() { instances.push_back(this); }
   virtual ~SpecificAllocBase() = default;
-  virtual void reset() = 0;
-  static std::vector instances;
+  static SpecificAllocBase *getOrCreate(void *tag, size_t size, size_t align,
+                                        SpecificAllocBase *(&creator)(void *));
 };
 
+// An arena of specific types T, created on-demand.
 template  struct SpecificAlloc : public SpecificAllocBase {
-  void reset() override { alloc.DestroyAll(); }
+  static SpecificAllocBase *create(void *storage) {
+    return new (storage) SpecificAlloc();
+  }
   llvm::SpecificBumpPtrAllocator alloc;
+  static int tag;
 };
 
-// Use a static local for these singletons so they are only registered if an
-// object of this instance is ever constructed. Otherwise we will create and
-// register ELF allocators for COFF and the reverse.
+// The address of this static member is only used as a key in
+// CommonLinkerContext::instances. Its value does not matter.
+template  int SpecificAlloc::tag = 0;
+
+// Creates the arena on-demand on the first call; or returns it, if it was
+// already created.
 template 
 inline llvm::SpecificBumpPtrAllocator &getSpecificAllocSingleton() {
-  static SpecificAlloc instance;
-  return instance.alloc;
+  SpecificAllocBase *instance = SpecificAllocBase::getOrCreate(
+      &SpecificAlloc::tag, sizeof(SpecificAlloc),
+      alignof(SpecificAlloc), SpecificAlloc::create);
+  return ((SpecificAlloc *)instance)->alloc;
 }
 
-// Use this arena if your object has a destructor.
-// Your destructor will be invoked from freeArena().
+// Creates new instances of T off a (almost) contiguous arena/object pool. The
+// instances are destroyed whenever lldMain() goes out of scope.
 template  T *make(U &&... args) {
   return new (getSpecificAllocSingleton().Allocate())
       T(std::forward(args)...);
diff --git a/lld/include/lld/Core/LinkingContext.h b/lld/include/lld/Core/LinkingContext.h
index e090ff9902318..091369e143190 100644
--- a/lld/include/lld/Core/LinkingContext.h
+++ b/lld/include/lld/Core/LinkingContext.h
@@ -9,6 +9,7 @@
 #ifndef LLD_CORE_LINKING_CONTEXT_H
 #define LLD_CORE_LINKING_CONTEXT_H
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Core/Node.h"
 #include "lld/Core/Reader.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -34,7 +35,7 @@ class SharedLibraryFile;
 /// The base class LinkingContext contains the options needed by core linking.
 /// Subclasses of LinkingContext have additional options needed by specific
 /// Writers.
-class LinkingContext {
+class LinkingContext : public CommonLinkerContext {
 public:
   virtual ~LinkingContext();
 
diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp
index cad97f2153c28..0a6439fff2a27 100644
--- a/lld/tools/lld/lld.cpp
+++ b/lld/tools/lld/lld.cpp
@@ -87,6 +87,8 @@ static bool isPETarget(std::vector &v) {
   // Expand response files (arguments in the form of @)
   // to allow detecting the -m argument from arguments in them.
   SmallVector expandedArgs(v.data(), v.data() + v.size());
+  BumpPtrAllocator a;
+  StringSaver saver(a);
   cl::ExpandResponseFiles(saver, getDefaultQuotingStyle(), expandedArgs);
   for (auto it = expandedArgs.begin(); it + 1 != expandedArgs.end(); ++it) {
     if (StringRef(*it) != "-m")
@@ -134,27 +136,42 @@ static Flavor parseFlavor(std::vector &v) {
   return parseProgname(arg0);
 }
 
+bool inTestOutputDisabled = false;
+
 /// Universal linker main(). This linker emulates the gnu, darwin, or
 /// windows linker based on the argv[0] or -flavor option.
 static int lldMain(int argc, const char **argv, llvm::raw_ostream &stdoutOS,
                    llvm::raw_ostream &stderrOS, bool exitEarly = true) {
   std::vector args(argv, argv + argc);
-  switch (parseFlavor(args)) {
-  case Gnu:
-    if (isPETarget(args))
-      return !mingw::link(args, exitEarly, stdoutOS, stderrOS);
-    return !elf::link(args, exitEarly, stdoutOS, stderrOS);
-  case WinLink:
-    return !coff::link(args, exitEarly, stdoutOS, stderrOS);
-  case Darwin:
-    return !macho::link(args, exitEarly, stdoutOS, stderrOS);
-  case Wasm:
-    return !lld::wasm::link(args, exitEarly, stdoutOS, stderrOS);
-  default:
-    die("lld is a generic driver.\n"
-        "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld"
-        " (WebAssembly) instead");
-  }
+  auto link = [&args]() {
+    Flavor f = parseFlavor(args);
+    if (f == Gnu && isPETarget(args))
+      return mingw::link;
+    else if (f == Gnu)
+      return elf::link;
+    else if (f == WinLink)
+      return coff::link;
+    else if (f == Darwin)
+      return macho::link;
+    else if (f == Wasm)
+      return lld::wasm::link;
+    else
+      die("lld is a generic driver.\n"
+          "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld"
+          " (WebAssembly) instead");
+  };
+  // Run the driver. If an error occurs, false will be returned.
+  bool r = link()(args, stdoutOS, stderrOS, exitEarly, inTestOutputDisabled);
+
+  // Call exit() if we can to avoid calling destructors.
+  if (exitEarly)
+    exitLld(!r ? 1 : 0);
+
+  // Delete the global context and clear the global context pointer, so that it
+  // cannot be accessed anymore.
+  CommonLinkerContext::destroy();
+
+  return !r ? 1 : 0;
 }
 
 // Similar to lldMain except that exceptions are caught.
@@ -176,7 +193,7 @@ SafeReturn lld::safeLldMain(int argc, const char **argv,
   // Cleanup memory and reset everything back in pristine condition. This path
   // is only taken when LLD is in test, or when it is used as a library.
   llvm::CrashRecoveryContext crc;
-  if (!crc.RunSafely([&]() { errorHandler().reset(); })) {
+  if (!crc.RunSafely([&]() { CommonLinkerContext::destroy(); })) {
     // The memory is corrupted beyond any possible recovery.
     return {r, /*canRunAgain=*/false};
   }
@@ -207,8 +224,7 @@ int main(int argc, const char **argv) {
 
   for (unsigned i = inTestVerbosity(); i > 0; --i) {
     // Disable stdout/stderr for all iterations but the last one.
-    if (i != 1)
-      errorHandler().disableOutput = true;
+    inTestOutputDisabled = (i != 1);
 
     // Execute one iteration.
     auto r = safeLldMain(argc, argv, llvm::outs(), llvm::errs());
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 59abfaadf3989..7523755806ab9 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -81,18 +81,15 @@ class LinkerDriver {
 };
 } // anonymous namespace
 
-bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS,
-          raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
+bool link(ArrayRef args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new CommonLinkerContext;
 
-  errorHandler().cleanupCallback = []() { freeArena(); };
-
-  errorHandler().logName = args::getFilenameWithoutExe(args[0]);
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now (use "
-      "-error-limit=0 to see all errors)";
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.logName = args::getFilenameWithoutExe(args[0]);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now (use "
+                                 "-error-limit=0 to see all errors)";
 
   config = make();
   symtab = make();
@@ -100,13 +97,7 @@ bool link(ArrayRef args, bool canExitEarly, raw_ostream &stdoutOS,
   initLLVM();
   LinkerDriver().linkerMain(args);
 
-  // Exit immediately if we don't need to return to the caller.
-  // This saves time because the overhead of calling destructors
-  // for all globally-allocated objects is not negligible.
-  if (canExitEarly)
-    exitLld(errorCount() ? 1 : 0);
-
-  return !errorCount();
+  return errorCount() == 0;
 }
 
 // Create prefix string literals used in Options.td
@@ -189,7 +180,7 @@ opt::InputArgList WasmOptTable::parse(ArrayRef argv) {
 
   // Expand response files (arguments in the form of @)
   // and then parse the argument again.
-  cl::ExpandResponseFiles(saver, getQuotingStyle(args), vec);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(args), vec);
   args = this->ParseArgs(vec, missingIndex, missingCount);
 
   handleColorDiagnostics(args);
@@ -760,8 +751,8 @@ static std::vector addWrappedSymbols(opt::InputArgList &args) {
     if (!sym)
       continue;
 
-    Symbol *real = addUndefined(saver.save("__real_" + name));
-    Symbol *wrap = addUndefined(saver.save("__wrap_" + name));
+    Symbol *real = addUndefined(saver().save("__real_" + name));
+    Symbol *wrap = addUndefined(saver().save("__wrap_" + name));
     v.push_back({sym, real, wrap});
 
     // We want to tell LTO not to inline symbols to be overwritten
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index e3a7f56ab884a..3e3b31ab1ee5e 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -12,8 +12,7 @@
 #include "InputElement.h"
 #include "OutputSegment.h"
 #include "SymbolTable.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Reproduce.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Wasm.h"
@@ -721,7 +720,7 @@ static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
 static Symbol *createBitcodeSymbol(const std::vector &keptComdats,
                                    const lto::InputFile::Symbol &objSym,
                                    BitcodeFile &f) {
-  StringRef name = saver.save(objSym.getName());
+  StringRef name = saver().save(objSym.getName());
 
   uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
   flags |= mapVisibility(objSym.getVisibility());
@@ -756,9 +755,9 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
   StringRef name = archiveName.empty()
-                       ? saver.save(path)
-                       : saver.save(archiveName + "(" + path::filename(path) +
-                                    " at " + utostr(offsetInArchive) + ")");
+                       ? saver().save(path)
+                       : saver().save(archiveName + "(" + path::filename(path) +
+                                      " at " + utostr(offsetInArchive) + ")");
   MemoryBufferRef mbref(mb.getBuffer(), name);
 
   obj = check(lto::InputFile::create(mbref));
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 63e66c145747e..ef1402248eec8 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -11,8 +11,7 @@
 #include "InputChunks.h"
 #include "InputElement.h"
 #include "WriterUtils.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/SetVector.h"
 
 #define DEBUG_TYPE "lld"
@@ -843,7 +842,7 @@ InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
 void SymbolTable::replaceWithUndefined(Symbol *sym) {
   // Add a synthetic dummy for weak undefined functions.  These dummies will
   // be GC'd if not used as the target of any "call" instructions.
-  StringRef debugName = saver.save("undefined_weak:" + toString(*sym));
+  StringRef debugName = saver().save("undefined_weak:" + toString(*sym));
   replaceWithUnreachable(sym, *sym->getSignature(), debugName);
   // Hide our dummy to prevent export.
   sym->setHidden(true);
@@ -941,7 +940,8 @@ void SymbolTable::handleSymbolVariants() {
       if (symbol != defined) {
         auto *f = cast(symbol);
         reportFunctionSignatureMismatch(symName, f, defined, false);
-        StringRef debugName = saver.save("signature_mismatch:" + toString(*f));
+        StringRef debugName =
+            saver().save("signature_mismatch:" + toString(*f));
         replaceWithUnreachable(f, *f->signature, debugName);
       }
     }
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index f7589b2cd684d..95f6483e9e591 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -17,8 +17,7 @@
 #include "SymbolTable.h"
 #include "SyntheticSections.h"
 #include "WriterUtils.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -190,7 +189,7 @@ void Writer::createRelocSections() {
     else if (sec->type == WASM_SEC_CODE)
       name = "reloc.CODE";
     else if (sec->type == WASM_SEC_CUSTOM)
-      name = saver.save("reloc." + sec->name);
+      name = saver().save("reloc." + sec->name);
     else
       llvm_unreachable(
           "relocations only supported for code, data, or custom sections");
@@ -389,8 +388,8 @@ static void addStartStopSymbols(const OutputSegment *seg) {
   LLVM_DEBUG(dbgs() << "addStartStopSymbols: " << name << "\n");
   uint64_t start = seg->startVA;
   uint64_t stop = start + seg->size;
-  symtab->addOptionalDataSymbol(saver.save("__start_" + name), start);
-  symtab->addOptionalDataSymbol(saver.save("__stop_" + name), stop);
+  symtab->addOptionalDataSymbol(saver().save("__start_" + name), start);
+  symtab->addOptionalDataSymbol(saver().save("__stop_" + name), stop);
 }
 
 void Writer::addSections() {
@@ -958,7 +957,7 @@ static void createFunction(DefinedFunction *func, StringRef bodyContent) {
     writeUleb128(os, bodyContent.size(), "function size");
     os << bodyContent;
   }
-  ArrayRef body = arrayRefFromStringRef(saver.save(functionBody));
+  ArrayRef body = arrayRefFromStringRef(saver().save(functionBody));
   cast(func->function)->setBody(body);
 }
 
diff --git a/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h b/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
index 1a7c2f3aeeaba..570b40c70578d 100644
--- a/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
+++ b/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
@@ -27,7 +27,14 @@
 
 // DIA headers must come after windows headers.
 #include 
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#endif
 #include 
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
 #include 
 
 #endif // LLVM_DEBUGINFO_PDB_DIA_DIASUPPORT_H

From d4baf3b1322b84816aa623d8e8cb45a49cb68b84 Mon Sep 17 00:00:00 2001
From: Tue Ly 
Date: Thu, 20 Jan 2022 14:43:09 -0500
Subject: [PATCH 066/946] [libc] Use get_round() instead of floating point
 tricks in generic hypot implementation.

The floating point tricks used to get rounding mode require -frounding-math flag, which behaves differently on aarch64.  Reverting back to use get_round instead.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D117824
---
 libc/src/__support/FPUtil/Hypot.h    | 8 ++------
 libc/src/math/generic/CMakeLists.txt | 4 ----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h
index 4b2987138d1bd..15b26798ccb54 100644
--- a/libc/src/__support/FPUtil/Hypot.h
+++ b/libc/src/__support/FPUtil/Hypot.h
@@ -144,9 +144,7 @@ static inline T hypot(T x, T y) {
   if ((x_bits.get_unbiased_exponent() >=
        y_bits.get_unbiased_exponent() + MantissaWidth::VALUE + 2) ||
       (y == 0)) {
-    // Check if the rounding mode is FE_UPWARD, will need -frounding-math so
-    // that the compiler does not optimize it away.
-    if ((y != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) {
+    if ((y != 0) && (get_round() == FE_UPWARD)) {
       UIntType out_bits = FPBits_t(abs(x)).uintval();
       return T(FPBits_t(++out_bits));
     }
@@ -154,9 +152,7 @@ static inline T hypot(T x, T y) {
   } else if ((y_bits.get_unbiased_exponent() >=
               x_bits.get_unbiased_exponent() + MantissaWidth::VALUE + 2) ||
              (x == 0)) {
-    // Check if the rounding mode is FE_UPWARD, will need -frounding-math so
-    // that the compiler does not optimize it away.
-    if ((x != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) {
+    if ((x != 0) && (get_round() == FE_UPWARD)) {
       UIntType out_bits = FPBits_t(abs(y)).uintval();
       return T(FPBits_t(++out_bits));
     }
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 59bca76fc5f84..c3914b8c45af3 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -955,8 +955,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.fputil
   COMPILE_OPTIONS
     -O3
-    -frounding-math
-    -Wno-c++17-extensions
 )
 
 add_entrypoint_object(
@@ -1005,8 +1003,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.fputil
   COMPILE_OPTIONS
     -O3
-    -frounding-math
-    -Wno-c++17-extensions
 )
 
 add_entrypoint_object(

From 8b4fa2c98e07997469f53bee30c0d24a61dc7c8c Mon Sep 17 00:00:00 2001
From: Nico Weber 
Date: Thu, 20 Jan 2022 14:59:30 -0500
Subject: [PATCH 067/946] clang: Auto-cleanup left-over file from before
 3da69fb5a26c7b on bots

---
 clang/test/Sema/test-wunaligned-access.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/test/Sema/test-wunaligned-access.c b/clang/test/Sema/test-wunaligned-access.c
index 909cda45f489b..74945a8539eb8 100644
--- a/clang/test/Sema/test-wunaligned-access.c
+++ b/clang/test/Sema/test-wunaligned-access.c
@@ -1,3 +1,6 @@
+// FIXME: Remove rm after a few days.
+// RUN: rm -f %S/test-wunaligned-access.ll
+
 // RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm -o %t
 // REQUIRES: arm-registered-target
 //

From 9122b5072aa77e98804b2bf8ca2a60d152198b2e Mon Sep 17 00:00:00 2001
From: Nico Weber 
Date: Thu, 20 Jan 2022 15:02:35 -0500
Subject: [PATCH 068/946] [llvm] Remove an old bot cleanup command

---
 llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 660595a380db9..d3ae8bb876224 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -1,7 +1,5 @@
 ; When EXPENSIVE_CHECKS are enabled, the machine verifier appears between each
 ; pass. Ignore it with 'grep -v'.
-; fixme: the following line is added to cleanup bots, will be removed in weeks.
-; RUN: rm -f %S/llc-pipeline.s
 ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \
 ; RUN:   | grep -v 'Verify generated machine code' | FileCheck -match-full-lines -strict-whitespace -check-prefix=GCN-O0 %s
 ; RUN: llc -O1 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \

From 608a9c0e7909e25ed8f121c356cd9c5897cdf22c Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot 
Date: Thu, 20 Jan 2022 20:02:49 +0000
Subject: [PATCH 069/946] [gn build] Port 63a991d03589

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 43703f603d794..3e28727f082ed 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -242,7 +242,6 @@ if (current_toolchain == default_toolchain) {
       "__format/formatter_integral.h",
       "__format/formatter_string.h",
       "__format/parser_std_format_spec.h",
-      "__function_like.h",
       "__functional/binary_function.h",
       "__functional/binary_negate.h",
       "__functional/bind.h",

From 14a2964698647cb46ee7602e2581177585c03170 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot 
Date: Thu, 20 Jan 2022 20:02:50 +0000
Subject: [PATCH 070/946] [gn build] Port 83d59e05b201

---
 llvm/utils/gn/secondary/lld/Common/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/lld/Common/BUILD.gn b/llvm/utils/gn/secondary/lld/Common/BUILD.gn
index ed87426362d6f..a50d5e4eda964 100644
--- a/llvm/utils/gn/secondary/lld/Common/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/Common/BUILD.gn
@@ -32,6 +32,7 @@ static_library("Common") {
   ]
   sources = [
     "Args.cpp",
+    "CommonLinkerContext.cpp",
     "DWARF.cpp",
     "ErrorHandler.cpp",
     "Filesystem.cpp",

From 860038e0d775632776abb5373539baa53693d749 Mon Sep 17 00:00:00 2001
From: Philip Reames 
Date: Thu, 20 Jan 2022 12:13:22 -0800
Subject: [PATCH 071/946] [SLP] Rename a couple lambdas to be more clearly
 separate from method names

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 43119b9c80cb3..d9d02cb56c447 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7246,7 +7246,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP,
   bool ReSchedule = false;
   LLVM_DEBUG(dbgs() << "SLP:  bundle: " << *S.OpValue << "\n");
 
-  auto &&TryScheduleBundle = [this, OldScheduleEnd, SLP](bool ReSchedule,
+  auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
                                                          ScheduleData *Bundle) {
     // The scheduling region got new instructions at the lower end (or it is a
     // new region for the first bundle). This makes it necessary to
@@ -7290,7 +7290,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP,
       // Otherwise the compiler may crash trying to incorrectly calculate
       // dependencies and emit instruction in the wrong order at the actual
       // scheduling.
-      TryScheduleBundle(/*ReSchedule=*/false, nullptr);
+      TryScheduleBundleImpl(/*ReSchedule=*/false, nullptr);
       return None;
     }
   }
@@ -7322,7 +7322,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP,
     PrevInBundle = BundleMember;
   }
   assert(Bundle && "Failed to find schedule bundle");
-  TryScheduleBundle(ReSchedule, Bundle);
+  TryScheduleBundleImpl(ReSchedule, Bundle);
   if (!Bundle->isReady()) {
     cancelScheduling(VL, S.OpValue);
     return None;
@@ -9618,7 +9618,7 @@ tryToVectorizeSequence(SmallVectorImpl &Incoming,
                        function_ref Limit,
                        function_ref Comparator,
                        function_ref AreCompatible,
-                       function_ref, bool)> TryToVectorize,
+                       function_ref, bool)> TryToVectorizeHelper,
                        bool LimitForRegisterSize) {
   bool Changed = false;
   // Sort by type, parent, operands.
@@ -9647,7 +9647,7 @@ tryToVectorizeSequence(SmallVectorImpl &Incoming,
     // same/alternate ops only, this may result in some extra final
     // vectorization.
     if (NumElts > 1 &&
-        TryToVectorize(makeArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
+        TryToVectorizeHelper(makeArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
       // Success start over because instructions might have been changed.
       Changed = true;
     } else if (NumElts < Limit(*IncIt) &&
@@ -9658,7 +9658,7 @@ tryToVectorizeSequence(SmallVectorImpl &Incoming,
     // Final attempt to vectorize instructions with the same types.
     if (Candidates.size() > 1 &&
         (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType())) {
-      if (TryToVectorize(Candidates, /*LimitForRegisterSize=*/false)) {
+      if (TryToVectorizeHelper(Candidates, /*LimitForRegisterSize=*/false)) {
         // Success start over because instructions might have been changed.
         Changed = true;
       } else if (LimitForRegisterSize) {
@@ -9669,7 +9669,7 @@ tryToVectorizeSequence(SmallVectorImpl &Incoming,
           while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
             ++SameTypeIt;
           unsigned NumElts = (SameTypeIt - It);
-          if (NumElts > 1 && TryToVectorize(makeArrayRef(It, NumElts),
+          if (NumElts > 1 && TryToVectorizeHelper(makeArrayRef(It, NumElts),
                                             /*LimitForRegisterSize=*/false))
             Changed = true;
           It = SameTypeIt;

From 82452be5cbd7fb48e36dd4f7b2b7eaf598d34bd6 Mon Sep 17 00:00:00 2001
From: Marek Kurdej 
Date: Thu, 20 Jan 2022 21:05:54 +0100
Subject: [PATCH 072/946] [clang-format] Refactor: add
 FormatToken::hasWhitespaceBefore(). NFC.

This factors out a pattern that comes up from time to time.

Reviewed By: MyDeveloperDay, HazardyKnusperkeks, owenpan

Differential Revision: https://reviews.llvm.org/D117769
---
 clang/lib/Format/Format.cpp              |  8 +++-----
 clang/lib/Format/FormatToken.h           |  6 ++++++
 clang/lib/Format/FormatTokenLexer.cpp    |  6 ++----
 clang/lib/Format/TokenAnnotator.cpp      | 15 ++++++---------
 clang/lib/Format/UnwrappedLineParser.cpp |  3 +--
 5 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 12020c945ea99..04e2915e3af69 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1928,7 +1928,7 @@ class Formatter : public TokenAnalyzer {
       if (hasCpp03IncompatibleFormat(Line->Children))
         return true;
       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
-        if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
+        if (!Tok->hasWhitespaceBefore()) {
           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
             return true;
           if (Tok->is(TT_TemplateCloser) &&
@@ -1947,10 +1947,8 @@ class Formatter : public TokenAnalyzer {
       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
         if (!Tok->is(TT_PointerOrReference))
           continue;
-        bool SpaceBefore =
-            Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
-        bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
-                          Tok->Next->WhitespaceRange.getEnd();
+        bool SpaceBefore = Tok->hasWhitespaceBefore();
+        bool SpaceAfter = Tok->Next->hasWhitespaceBefore();
         if (SpaceBefore && !SpaceAfter)
           ++AlignmentDiff;
         if (!SpaceBefore && SpaceAfter)
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 8377273263737..b087f9f120411 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -637,6 +637,12 @@ struct FormatToken {
     return WhitespaceRange.getEnd();
   }
 
+  /// Returns \c true if the range of whitespace immediately preceding the \c
+  /// Token is not empty.
+  bool hasWhitespaceBefore() const {
+    return WhitespaceRange.getBegin() != WhitespaceRange.getEnd();
+  }
+
   prec::Level getPrecedence() const {
     return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
                               /*CPlusPlus11=*/true);
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 04629fec1bcaf..e8b9b3d61c888 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -446,8 +446,7 @@ bool FormatTokenLexer::tryMergeLessLess() {
     return false;
 
   // Only merge if there currently is no whitespace between the two "<".
-  if (First[1]->WhitespaceRange.getBegin() !=
-      First[1]->WhitespaceRange.getEnd())
+  if (First[1]->hasWhitespaceBefore())
     return false;
 
   First[0]->Tok.setKind(tok::lessless);
@@ -468,8 +467,7 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef Kinds,
     return false;
   unsigned AddLength = 0;
   for (unsigned i = 1; i < Kinds.size(); ++i) {
-    if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
-                                       First[i]->WhitespaceRange.getEnd())
+    if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore())
       return false;
     AddLength += First[i]->TokenText.size();
   }
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 71f29e8c010e5..7fe0d319e5703 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3304,14 +3304,11 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
                                          const FormatToken &Right) {
   const FormatToken &Left = *Right.Previous;
-  auto HasExistingWhitespace = [&Right]() {
-    return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
-  };
 
   // If the token is finalized don't touch it (as it could be in a
   // clang-format-off section).
   if (Left.Finalized)
-    return HasExistingWhitespace();
+    return Right.hasWhitespaceBefore();
 
   if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
     return true; // Never ever merge two identifiers.
@@ -3373,7 +3370,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     // Preserve the existence of a space before a percent for cases like 0x%04x
     // and "%d %d"
     if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
-      return HasExistingWhitespace();
+      return Right.hasWhitespaceBefore();
   } else if (Style.isJson()) {
     if (Right.is(tok::colon))
       return false;
@@ -3554,7 +3551,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
       return true;
   }
   if (Left.is(TT_ImplicitStringLiteral))
-    return HasExistingWhitespace();
+    return Right.hasWhitespaceBefore();
   if (Line.Type == LT_ObjCMethodDecl) {
     if (Left.is(TT_ObjCMethodSpecifier))
       return true;
@@ -3639,11 +3636,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     return Style.SpaceAfterCStyleCast ||
            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
 
-  auto ShouldAddSpacesInAngles = [this, &HasExistingWhitespace]() {
+  auto ShouldAddSpacesInAngles = [this, &Right]() {
     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
       return true;
     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
-      return HasExistingWhitespace();
+      return Right.hasWhitespaceBefore();
     return false;
   };
 
@@ -3669,7 +3666,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     // Generally don't remove existing spaces between an identifier and "::".
     // The identifier might actually be a macro name such as ALWAYS_INLINE. If
     // this turns out to be too lenient, add analysis of the identifier itself.
-    return HasExistingWhitespace();
+    return Right.hasWhitespaceBefore();
   if (Right.is(tok::coloncolon) &&
       !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren))
     // Put a space between < and :: in vector< ::std::string >
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 69fe21cd87f01..f466111260962 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1042,8 +1042,7 @@ void UnwrappedLineParser::parsePPDefine() {
 
   nextToken();
   if (FormatTok->Tok.getKind() == tok::l_paren &&
-      FormatTok->WhitespaceRange.getBegin() ==
-          FormatTok->WhitespaceRange.getEnd()) {
+      !FormatTok->hasWhitespaceBefore()) {
     parseParens();
   }
   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)

From f84023a812b6499c506358912691b22a6424f897 Mon Sep 17 00:00:00 2001
From: Roger Kim 
Date: Thu, 20 Jan 2022 12:13:04 -0800
Subject: [PATCH 073/946] [lld][macho] Stop grouping symbols by sections in
 mapfile.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As per [Bug 50689](https://bugs.llvm.org/show_bug.cgi?id=50689),

```
2. getSectionSyms() puts all the symbols into a map of section -> symbols, but this seems unnecessary. This was likely copied from the ELF port, which prints a section header before the list of symbols it contains. But the Mach-O map file doesn't print these headers.
```

This diff removes `getSectionSyms()` and keeps all symbols in a flat vector.

What does ld64's mapfile look like?
```
$ llvm-mc -filetype=obj -triple=x86_64-apple-darwin test.s -o test.o
$ llvm-mc -filetype=obj -triple=x86_64-apple-darwin foo.s -o foo.o
$ ld -map map test.o foo.o -o out -L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -lSystem
```

```
[  0] linker synthesized
[  1] test.o
[  2] foo.o
0x100003FB7     0x00000001      __TEXT  __text
0x100003FB8     0x00000000      __TEXT  obj
0x100003FB8     0x00000048      __TEXT  __unwind_info
0x100004000     0x00000001      __DATA  __common
0x100003FB7     0x00000001      [  1] _main
0x100003FB8     0x00000000      [  2] _foo
0x100003FB8     0x00000048      [  0] compact unwind info
0x100004000     0x00000001      [  1] _number
```

Perf numbers when linking chromium framework on a 16-Core Intel Xeon W Mac Pro:
```
base           diff           difference (95% CI)
sys_time   1.406 ± 0.020  1.388 ± 0.019  [  -1.9% ..   -0.6%]
user_time  5.557 ± 0.023  5.914 ± 0.020  [  +6.2% ..   +6.6%]
wall_time  4.455 ± 0.041  4.436 ± 0.035  [  -0.8% ..   -0.0%]
samples    35             35
```

Reviewed By: #lld-macho, int3

Differential Revision: https://reviews.llvm.org/D114735
---
 lld/MachO/MapFile.cpp     | 37 +++++++------------------------------
 lld/test/MachO/map-file.s |  2 +-
 2 files changed, 8 insertions(+), 31 deletions(-)

diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 79471eecbd528..93abea2ed08b2 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -40,26 +40,6 @@ using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
-using SymbolMapTy = DenseMap>;
-
-// Returns a map from sections to their symbols.
-static SymbolMapTy getSectionSyms(ArrayRef syms) {
-  SymbolMapTy ret;
-  for (Defined *dr : syms)
-    ret[dr->isec].push_back(dr);
-
-  // Sort symbols by address. We want to print out symbols in the order they
-  // appear in the output file rather than the order they appeared in the input
-  // files.
-  for (auto &it : ret)
-    parallelSort(
-        it.second.begin(), it.second.end(), [](Defined *a, Defined *b) {
-          return a->getVA() != b->getVA() ? a->getVA() < b->getVA()
-                                          : a->getName() < b->getName();
-        });
-  return ret;
-}
-
 // Returns a list of all symbols that we want to print out.
 static std::vector getSymbols() {
   std::vector v;
@@ -126,7 +106,10 @@ void macho::writeMapFile() {
 
   // Collect symbol info that we want to print out.
   std::vector syms = getSymbols();
-  SymbolMapTy sectionSyms = getSectionSyms(syms);
+  parallelSort(syms.begin(), syms.end(), [](Defined *a, Defined *b) {
+    return a->getVA() != b->getVA() ? a->getVA() < b->getVA()
+                                    : a->getName() < b->getName();
+  });
   DenseMap symStr = getSymbolStrings(syms);
 
   // Dump table of sections
@@ -144,15 +127,9 @@ void macho::writeMapFile() {
   // Dump table of symbols
   os << "# Symbols:\n";
   os << "# Address\t    File  Name\n";
-  for (InputSection *isec : inputSections) {
-    auto symsIt = sectionSyms.find(isec);
-    assert(!shouldOmitFromOutput(isec) || (symsIt == sectionSyms.end()));
-    if (symsIt == sectionSyms.end())
-      continue;
-    for (Symbol *sym : symsIt->second) {
-      os << format("0x%08llX\t[%3u] %s\n", sym->getVA(),
-                   readerToFileOrdinal[sym->getFile()], symStr[sym].c_str());
-    }
+  for (Symbol *sym : syms) {
+    os << format("0x%08llX\t[%3u] %s\n", sym->getVA(),
+                 readerToFileOrdinal[sym->getFile()], symStr[sym].c_str());
   }
 
   // TODO: when we implement -dead_strip, we should dump dead stripped symbols
diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s
index 67a44eb2dcd32..85c23e763e9ef 100644
--- a/lld/test/MachO/map-file.s
+++ b/lld/test/MachO/map-file.s
@@ -47,8 +47,8 @@ _main:
 
 # CHECK-NEXT: # Symbols:
 # CHECK-NEXT: # Address        File  Name
-# CHECK-NEXT: 0x[[#NUMBER]]    [  1]  _number
 # CHECK-NEXT: 0x[[#MAIN]]      [  1]  _main
 # CHECK-NEXT: 0x[[#FOO]]       [  2]  _foo
+# CHECK-NEXT: 0x[[#NUMBER]]    [  1]  _number
 
 # MAPFILE: "name":"Total Write map file"

From b8d38e8b4fcab071c5c4cb698e154023d06de69e Mon Sep 17 00:00:00 2001
From: Casey Carter 
Date: Wed, 29 Dec 2021 15:58:25 -0800
Subject: [PATCH 074/946] [libcxx][test] view_interface need not derive from
 view_base

... after LWG-3549.

Differential Revision: https://reviews.llvm.org/D117608
---
 .../ranges/range.utility/view.interface/view.interface.pass.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
index bc2bc95c1c2d2..ebc443ebe08d3 100644
--- a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
+++ b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
@@ -32,8 +32,6 @@ static_assert(!ValidViewInterfaceType);
 static_assert(!ValidViewInterfaceType);
 static_assert( ValidViewInterfaceType);
 
-static_assert(std::derived_from, std::ranges::view_base>);
-
 using InputIter = cpp20_input_iterator;
 
 struct InputRange : std::ranges::view_interface {

From d0cace5087145b6bd8c833cc25d3e6d08442326c Mon Sep 17 00:00:00 2001
From: Mogball 
Date: Thu, 20 Jan 2022 20:17:14 +0000
Subject: [PATCH 075/946] [mlir][pdl] Some ops are missing `NoSideEffect`

Querying or building constraints on types, operands, results, and attributes are side-effect free in both the matcher and rewriter. The ops should be marked as such.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117826
---
 mlir/include/mlir/Dialect/PDL/IR/PDLOps.td | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
index 87918c3e4cb35..4a97c17da4aca 100644
--- a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
+++ b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
@@ -123,7 +123,7 @@ def PDL_ApplyNativeRewriteOp
 // pdl::AttributeOp
 //===----------------------------------------------------------------------===//
 
-def PDL_AttributeOp : PDL_Op<"attribute"> {
+def PDL_AttributeOp : PDL_Op<"attribute", [NoSideEffect]> {
   let summary = "Define an input attribute in a pattern";
   let description = [{
     `pdl.attribute` operations capture named attribute edges into an operation.
@@ -191,7 +191,8 @@ def PDL_EraseOp : PDL_Op<"erase", [HasParent<"pdl::RewriteOp">]> {
 // pdl::OperandOp
 //===----------------------------------------------------------------------===//
 
-def PDL_OperandOp : PDL_Op<"operand", [HasParent<"pdl::PatternOp">]> {
+def PDL_OperandOp
+    : PDL_Op<"operand", [HasParent<"pdl::PatternOp">, NoSideEffect]> {
   let summary = "Define an external input operand in a pattern";
   let description = [{
     `pdl.operand` operations capture external operand edges into an operation
@@ -228,7 +229,8 @@ def PDL_OperandOp : PDL_Op<"operand", [HasParent<"pdl::PatternOp">]> {
 // pdl::OperandsOp
 //===----------------------------------------------------------------------===//
 
-def PDL_OperandsOp : PDL_Op<"operands", [HasParent<"pdl::PatternOp">]> {
+def PDL_OperandsOp
+    : PDL_Op<"operands", [HasParent<"pdl::PatternOp">, NoSideEffect]> {
   let summary = "Define a range of input operands in a pattern";
   let description = [{
     `pdl.operands` operations capture external operand range edges into an
@@ -495,7 +497,7 @@ def PDL_ReplaceOp : PDL_Op<"replace", [
 // pdl::ResultOp
 //===----------------------------------------------------------------------===//
 
-def PDL_ResultOp : PDL_Op<"result"> {
+def PDL_ResultOp : PDL_Op<"result", [NoSideEffect]> {
   let summary = "Extract a result from an operation";
   let description = [{
     `pdl.result` operations extract result edges from an operation node within
@@ -528,7 +530,7 @@ def PDL_ResultOp : PDL_Op<"result"> {
 // pdl::ResultsOp
 //===----------------------------------------------------------------------===//
 
-def PDL_ResultsOp : PDL_Op<"results"> {
+def PDL_ResultsOp : PDL_Op<"results", [NoSideEffect]> {
   let summary = "Extract a result group from an operation";
   let description = [{
     `pdl.results` operations extract a result group from an operation within a
@@ -631,7 +633,7 @@ def PDL_RewriteOp : PDL_Op<"rewrite", [
 // pdl::TypeOp
 //===----------------------------------------------------------------------===//
 
-def PDL_TypeOp : PDL_Op<"type"> {
+def PDL_TypeOp : PDL_Op<"type", [NoSideEffect]> {
   let summary = "Define a type handle within a pattern";
   let description = [{
     `pdl.type` operations capture result type constraints of `Attributes`,
@@ -659,7 +661,7 @@ def PDL_TypeOp : PDL_Op<"type"> {
 // pdl::TypesOp
 //===----------------------------------------------------------------------===//
 
-def PDL_TypesOp : PDL_Op<"types"> {
+def PDL_TypesOp : PDL_Op<"types", [NoSideEffect]> {
   let summary = "Define a range of type handles within a pattern";
   let description = [{
     `pdl.types` operations capture result type constraints of `Value`s, and

From 7c471b56f2c22b984847100b318b01e31bf5f9cb Mon Sep 17 00:00:00 2001
From: Mogball 
Date: Thu, 20 Jan 2022 20:17:26 +0000
Subject: [PATCH 076/946] [mlir][pdl] OperationOp should not be side-effect
 free

Unbound OperationOp in the matcher (i.e. one with no uses) is already disallowed by the verifier. However, an OperationOp in the rewriter is not side-effect free -- it's creating an op!

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117825
---
 mlir/include/mlir/Dialect/PDL/IR/PDLOps.td     |  3 +--
 .../pdl-to-pdl-interp-rewriter.mlir            | 18 ++++++++++++++++++
 mlir/test/Dialect/PDL/canonicalize.mlir        | 10 ++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 mlir/test/Dialect/PDL/canonicalize.mlir

diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
index 4a97c17da4aca..258ad41292cc1 100644
--- a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
+++ b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
@@ -268,8 +268,7 @@ def PDL_OperandsOp
 // pdl::OperationOp
 //===----------------------------------------------------------------------===//
 
-def PDL_OperationOp
-    : PDL_Op<"operation", [AttrSizedOperandSegments, NoSideEffect]> {
+def PDL_OperationOp : PDL_Op<"operation", [AttrSizedOperandSegments]> {
   let summary = "Define an operation within a pattern";
   let description = [{
     `pdl.operation` operations define operation nodes within a pattern. Within
diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
index d2f135864071f..f9415b3c45802 100644
--- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
+++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
@@ -202,3 +202,21 @@ module @apply_native_rewrite {
     }
   }
 }
+
+// -----
+
+// CHECK-LABEL: module @unbound_rewrite_op
+module @unbound_rewrite_op {
+  // CHECK: module @rewriters
+  // CHECK:   func @pdl_generated_rewriter()
+  // CHECK:     %[[UNUSED:.*]] = pdl_interp.create_operation "bar.op"
+  // CHECK:     pdl_interp.finalize
+  pdl.pattern : benefit(1) {
+    %root = pdl.operation "foo.op"
+    pdl.rewrite %root {
+      %unused = pdl.operation "bar.op"
+    }
+  }
+}
+
+// -----
diff --git a/mlir/test/Dialect/PDL/canonicalize.mlir b/mlir/test/Dialect/PDL/canonicalize.mlir
new file mode 100644
index 0000000000000..5cb08acd884e3
--- /dev/null
+++ b/mlir/test/Dialect/PDL/canonicalize.mlir
@@ -0,0 +1,10 @@
+// RUN: mlir-opt -canonicalize %s | FileCheck %s
+
+pdl.pattern @operation_op : benefit(1) {
+  %root = pdl.operation "foo.op"
+  pdl.rewrite %root {
+    // CHECK: pdl.operation "bar.unused"
+    %unused_rewrite = pdl.operation "bar.unused"
+    pdl.erase %root
+  }
+}

From e99835ffedc23cc132c8a9c769c85ac20b66cb3a Mon Sep 17 00:00:00 2001
From: Mogball 
Date: Thu, 20 Jan 2022 20:17:40 +0000
Subject: [PATCH 077/946] [mlir][pdl] Make `pdl` the default dialect when
 parsing/printing

PDLDialect being a somewhat user-facing dialect and whose ops contain exclusively other PDL ops in their regions can take advantage of `OpAsmOpInterface` to provide nicer IR.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117828
---
 mlir/include/mlir/Dialect/PDL/IR/PDLOps.td    |   7 +-
 mlir/lib/Dialect/PDL/IR/PDL.cpp               |  10 +
 .../pdl-to-pdl-interp-matcher.mlir            | 298 +++++++++---------
 .../pdl-to-pdl-interp-rewriter.mlir           | 122 +++----
 mlir/test/Dialect/PDL/canonicalize.mlir       |  10 +-
 mlir/test/Dialect/PDL/invalid.mlir            | 136 ++++----
 mlir/test/Dialect/PDL/ops.mlir                | 140 ++++----
 mlir/test/python/dialects/pdl_ops.py          | 152 ++++-----
 8 files changed, 444 insertions(+), 431 deletions(-)

diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
index 258ad41292cc1..a094381e81632 100644
--- a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
+++ b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
@@ -15,6 +15,7 @@
 
 include "mlir/Dialect/PDL/IR/PDLTypes.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
 
 //===----------------------------------------------------------------------===//
@@ -402,7 +403,8 @@ def PDL_OperationOp : PDL_Op<"operation", [AttrSizedOperandSegments]> {
 //===----------------------------------------------------------------------===//
 
 def PDL_PatternOp : PDL_Op<"pattern", [
-    IsolatedFromAbove, SingleBlock, Symbol
+    IsolatedFromAbove, SingleBlock, Symbol,
+    DeclareOpInterfaceMethods
   ]> {
   let summary = "Define a rewrite pattern";
   let description = [{
@@ -573,7 +575,8 @@ def PDL_ResultsOp : PDL_Op<"results", [NoSideEffect]> {
 
 def PDL_RewriteOp : PDL_Op<"rewrite", [
      Terminator, HasParent<"pdl::PatternOp">, NoTerminator, NoRegionArguments,
-     SingleBlock, AttrSizedOperandSegments
+     SingleBlock, AttrSizedOperandSegments,
+     DeclareOpInterfaceMethods
   ]> {
   let summary = "Specify the rewrite of a matched pattern";
   let description = [{
diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp
index 95a3fb742fa11..e0f6753743e13 100644
--- a/mlir/lib/Dialect/PDL/IR/PDL.cpp
+++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp
@@ -355,6 +355,11 @@ RewriteOp PatternOp::getRewriter() {
   return cast(body().front().getTerminator());
 }
 
+/// The default dialect is `pdl`.
+StringRef PatternOp::getDefaultDialect() {
+  return PDLDialect::getDialectNamespace();
+}
+
 //===----------------------------------------------------------------------===//
 // pdl::ReplaceOp
 //===----------------------------------------------------------------------===//
@@ -431,6 +436,11 @@ static LogicalResult verify(RewriteOp op) {
   return success();
 }
 
+/// The default dialect is `pdl`.
+StringRef RewriteOp::getDefaultDialect() {
+  return PDLDialect::getDialectNamespace();
+}
+
 //===----------------------------------------------------------------------===//
 // pdl::TypeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir
index fd6cfe5fa7c5f..d9a8706471fe4 100644
--- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir
+++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir
@@ -27,8 +27,8 @@ module @simple {
   // CHECK:     pdl_interp.apply_rewrite "rewriter"(%[[REWRITE_ROOT]]
   // CHECK:     pdl_interp.finalize
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root with "rewriter"
+    %root = operation "foo.op"
+    rewrite %root with "rewriter"
   }
 }
 
@@ -48,11 +48,11 @@ module @attributes {
   // CHECK-DAG:   %[[ATTR1_TYPE:.*]] = pdl_interp.get_attribute_type of %[[ATTR1]]
   // CHECK-DAG:   pdl_interp.check_type %[[ATTR1_TYPE]] is i64
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i64
-    %attr = pdl.attribute 10 : i64
-    %attr1 = pdl.attribute : %type
-    %root = pdl.operation {"attr" = %attr, "attr1" = %attr1}
-    pdl.rewrite %root with "rewriter"
+    %type = type : i64
+    %attr = attribute 10 : i64
+    %attr1 = attribute : %type
+    %root = operation {"attr" = %attr, "attr1" = %attr1}
+    rewrite %root with "rewriter"
   }
 }
 
@@ -67,13 +67,13 @@ module @constraints {
   // CHECK:       pdl_interp.apply_constraint "multi_constraint" [true](%[[INPUT]], %[[INPUT1]], %[[RESULT]]
 
   pdl.pattern : benefit(1) {
-    %input0 = pdl.operand
-    %input1 = pdl.operand
-    %root = pdl.operation(%input0, %input1 : !pdl.value, !pdl.value)
-    %result0 = pdl.result 0 of %root
+    %input0 = operand
+    %input1 = operand
+    %root = operation(%input0, %input1 : !pdl.value, !pdl.value)
+    %result0 = result 0 of %root
 
     pdl.apply_native_constraint "multi_constraint"[true](%input0, %input1, %result0 : !pdl.value, !pdl.value, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    rewrite %root with "rewriter"
   }
 }
 
@@ -94,10 +94,10 @@ module @inputs {
   // CHECK-DAG:  %[[INPUT1:.*]] = pdl_interp.get_operand 1 of %[[ROOT]]
   // CHECK-DAG:  pdl_interp.are_equal %[[INPUT]], %[[INPUT1]] : !pdl.value
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i64
-    %input = pdl.operand : %type
-    %root = pdl.operation(%input, %input : !pdl.value, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    %type = type : i64
+    %input = operand : %type
+    %root = operation(%input, %input : !pdl.value, !pdl.value)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -124,11 +124,11 @@ module @variadic_inputs {
   // CHECK-DAG:  %[[INPUT2:.*]] = pdl_interp.get_operands 2 of %[[ROOT]] : !pdl.value
   // CHECK-DAG:  pdl_interp.are_equal %[[INPUT]], %[[INPUT2]] : !pdl.value
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %inputs = pdl.operands : %types
-    %input = pdl.operand
-    %root = pdl.operation(%input, %inputs, %input : !pdl.value, !pdl.range, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %inputs = operands : %types
+    %input = operand
+    %root = operation(%input, %inputs, %input : !pdl.value, !pdl.range, !pdl.value)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -147,10 +147,10 @@ module @single_operand_range {
   // The operand count is unknown, so there is no need to check for it.
   // CHECK-NOT: pdl_interp.check_operand_count
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %operands = pdl.operands : %types
-    %root = pdl.operation(%operands : !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %operands = operands : %types
+    %root = operation(%operands : !pdl.range)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -171,10 +171,10 @@ module @results {
   // access for it.
   // CHECK-NOT:   pdl_interp.get_result 1 of %[[ROOT]]
   pdl.pattern : benefit(1) {
-    %type1 = pdl.type : i32
-    %type2 = pdl.type
-    %root = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %type1 = type : i32
+    %type2 = type
+    %root = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -201,10 +201,10 @@ module @variadic_results {
   // CHECK-DAG:  %[[RESULT2:.*]] = pdl_interp.get_results 2 of %[[ROOT]] : !pdl.value
   // CHECK-DAG:   pdl_interp.is_not_null %[[RESULT2]] : !pdl.value
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %type = pdl.type
-    %root = pdl.operation -> (%type, %types, %type : !pdl.type, !pdl.range, !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %type = type
+    %root = operation -> (%type, %types, %type : !pdl.type, !pdl.range, !pdl.type)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -223,9 +223,9 @@ module @single_result_range {
   // The result count is unknown, so there is no need to check for it.
   // CHECK-NOT: pdl_interp.check_result_count
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %root = pdl.operation -> (%types : !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %root = operation -> (%types : !pdl.range)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -251,14 +251,14 @@ module @results_as_operands {
   // CHECK-DAG:   pdl_interp.are_equal %[[DEF_OP_0]], %[[DEF_OP_1]]
 
   pdl.pattern : benefit(1) {
-    %type1 = pdl.type : i32
-    %type2 = pdl.type
-    %inputOp = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-    %result1 = pdl.result 0 of %inputOp
-    %result2 = pdl.result 1 of %inputOp
-
-    %root = pdl.operation(%result1, %result2 : !pdl.value, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    %type1 = type : i32
+    %type2 = type
+    %inputOp = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+    %result1 = result 0 of %inputOp
+    %result2 = result 1 of %inputOp
+
+    %root = operation(%result1, %result2 : !pdl.value, !pdl.value)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -274,12 +274,12 @@ module @single_result_range_as_operands {
   // CHECK-DAG:  pdl_interp.are_equal %[[RESULTS]], %[[OPERANDS]] : !pdl.range
 
   pdl.pattern : benefit(1) {
-    %types = pdl.types
-    %inputOp = pdl.operation -> (%types : !pdl.range)
-    %results = pdl.results of %inputOp
+    %types = types
+    %inputOp = operation -> (%types : !pdl.range)
+    %results = results of %inputOp
 
-    %root = pdl.operation(%results : !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %root = operation(%results : !pdl.range)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -292,14 +292,14 @@ module @switch_single_result_type {
   // CHECK:   %[[RESULT_TYPE:.*]] = pdl_interp.get_value_type of %[[RESULT]]
   // CHECK:   pdl_interp.switch_type %[[RESULT_TYPE]] to [i32, i64]
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i32
-    %root = pdl.operation -> (%type : !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %type = type : i32
+    %root = operation -> (%type : !pdl.type)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i64
-    %root = pdl.operation -> (%type : !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %type = type : i64
+    %root = operation -> (%type : !pdl.type)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -312,14 +312,14 @@ module @switch_result_types {
   // CHECK:   %[[RESULT_TYPES:.*]] = pdl_interp.get_value_type of %[[RESULTS]]
   // CHECK:   pdl_interp.switch_types %[[RESULT_TYPES]] to {{\[\[}}i32], [i64, i32]]
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i32]
-    %root = pdl.operation -> (%types : !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i32]
+    %root = operation -> (%types : !pdl.range)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64, i32]
-    %root = pdl.operation -> (%types : !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64, i32]
+    %root = operation -> (%types : !pdl.range)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -337,17 +337,17 @@ module @switch_operand_count_at_least {
   // CHECK: ^[[PATTERN_1_NEXT_BLOCK]]:
   // CHECK-NEXT: {{.*}} -> ^{{.*}}, ^bb2
   pdl.pattern : benefit(1) {
-    %operand = pdl.operand
-    %operands = pdl.operands
-    %root = pdl.operation(%operand, %operands : !pdl.value, !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %operand = operand
+    %operands = operands
+    %root = operation(%operand, %operands : !pdl.value, !pdl.range)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %operand = pdl.operand
-    %operand2 = pdl.operand
-    %operands = pdl.operands
-    %root = pdl.operation(%operand, %operand2, %operands : !pdl.value, !pdl.value, !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %operand = operand
+    %operand2 = operand
+    %operands = operands
+    %root = operation(%operand, %operand2, %operands : !pdl.value, !pdl.value, !pdl.range)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -366,17 +366,17 @@ module @switch_result_count_at_least {
   // CHECK-NEXT: pdl_interp.get_result
   // CHECK-NEXT: pdl_interp.is_not_null {{.*}} -> ^{{.*}}, ^[[PATTERN_2_BLOCK]]
   pdl.pattern : benefit(1) {
-    %type = pdl.type
-    %types = pdl.types
-    %root = pdl.operation -> (%type, %types : !pdl.type, !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %type = type
+    %types = types
+    %root = operation -> (%type, %types : !pdl.type, !pdl.range)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %type = pdl.type
-    %type2 = pdl.type
-    %types = pdl.types
-    %root = pdl.operation -> (%type, %type2, %types : !pdl.type, !pdl.type, !pdl.range)
-    pdl.rewrite %root with "rewriter"
+    %type = type
+    %type2 = type
+    %types = types
+    %root = operation -> (%type, %type2, %types : !pdl.type, !pdl.type, !pdl.range)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -396,16 +396,16 @@ module @predicate_ordering {
   // CHECK: pdl_interp.apply_constraint "typeConstraint" [](%[[RESULT_TYPE]]
 
   pdl.pattern : benefit(1) {
-    %resultType = pdl.type
+    %resultType = type
     pdl.apply_native_constraint "typeConstraint"[](%resultType : !pdl.type)
-    %root = pdl.operation -> (%resultType : !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %root = operation -> (%resultType : !pdl.type)
+    rewrite %root with "rewriter"
   }
 
   pdl.pattern : benefit(1) {
-    %resultType = pdl.type
-    %apply = pdl.operation -> (%resultType : !pdl.type)
-    pdl.rewrite %apply with "rewriter"
+    %resultType = type
+    %apply = operation -> (%resultType : !pdl.type)
+    rewrite %apply with "rewriter"
   }
 }
 
@@ -435,16 +435,16 @@ module @multi_root {
   // CHECK-DAG:   pdl_interp.is_not_null %[[ROOT2]] : !pdl.operation
 
   pdl.pattern @rewrite_multi_root : benefit(1) {
-    %input1 = pdl.operand
-    %input2 = pdl.operand
-    %type = pdl.type
-    %op1 = pdl.operation(%input1 : !pdl.value) -> (%type : !pdl.type)
-    %val1 = pdl.result 0 of %op1
-    %root1 = pdl.operation(%val1 : !pdl.value)
-    %op2 = pdl.operation(%input2 : !pdl.value) -> (%type : !pdl.type)
-    %val2 = pdl.result 0 of %op2
-    %root2 = pdl.operation(%val1, %val2 : !pdl.value, !pdl.value)
-    pdl.rewrite %root1 with "rewriter"(%root2 : !pdl.operation)
+    %input1 = operand
+    %input2 = operand
+    %type = type
+    %op1 = operation(%input1 : !pdl.value) -> (%type : !pdl.type)
+    %val1 = result 0 of %op1
+    %root1 = operation(%val1 : !pdl.value)
+    %op2 = operation(%input2 : !pdl.value) -> (%type : !pdl.type)
+    %val2 = result 0 of %op2
+    %root2 = operation(%val1, %val2 : !pdl.value, !pdl.value)
+    rewrite %root1 with "rewriter"(%root2 : !pdl.operation)
   }
 }
 
@@ -467,13 +467,13 @@ module @overlapping_roots {
   // CHECK-DAG: pdl_interp.is_not_null %[[INPUT2]] : !pdl.value
 
   pdl.pattern @rewrite_overlapping_roots : benefit(1) {
-    %input1 = pdl.operand
-    %input2 = pdl.operand
-    %type = pdl.type
-    %op = pdl.operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
-    %val = pdl.result 0 of %op
-    %root = pdl.operation(%val : !pdl.value)
-    pdl.rewrite with "rewriter"(%root : !pdl.operation)
+    %input1 = operand
+    %input2 = operand
+    %type = type
+    %op = operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
+    %val = result 0 of %op
+    %root = operation(%val : !pdl.value)
+    rewrite with "rewriter"(%root : !pdl.operation)
   }
 }
 
@@ -499,13 +499,13 @@ module @force_overlapped_root {
   // CHECK-DAG:   pdl_interp.check_operand_count of %[[OP]] is 1
 
   pdl.pattern @rewrite_forced_overlapped_root : benefit(1) {
-    %input1 = pdl.operand
-    %input2 = pdl.operand
-    %type = pdl.type
-    %root = pdl.operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
-    %val = pdl.result 0 of %root
-    %op = pdl.operation(%val : !pdl.value)
-    pdl.rewrite %root with "rewriter"(%op : !pdl.operation)
+    %input1 = operand
+    %input2 = operand
+    %type = type
+    %root = operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
+    %val = result 0 of %root
+    %op = operation(%val : !pdl.value)
+    rewrite %root with "rewriter"(%op : !pdl.operation)
   }
 }
 
@@ -527,11 +527,11 @@ module @variadic_results_all {
   // CHECK-DAG:   pdl_interp.is_not_null %[[OP]]
   // CHECK-DAG:   pdl_interp.check_result_count of %[[OP]] is 0
   pdl.pattern @variadic_results_all : benefit(1) {
-    %types = pdl.types
-    %root = pdl.operation -> (%types : !pdl.range)
-    %vals = pdl.results of %root
-    %op = pdl.operation(%vals : !pdl.range)
-    pdl.rewrite %root with "rewriter"(%op : !pdl.operation)
+    %types = types
+    %root = operation -> (%types : !pdl.range)
+    %vals = results of %root
+    %op = operation(%vals : !pdl.range)
+    rewrite %root with "rewriter"(%op : !pdl.operation)
   }
 }
 
@@ -562,14 +562,14 @@ module @variadic_results_at {
   // CHECK-DAG:   pdl_interp.check_operand_count of %[[OP]] is 0
   // CHECK-DAG:   pdl_interp.check_result_count of %[[OP]] is at_least 1
   pdl.pattern @variadic_results_at : benefit(1) {
-    %type = pdl.type
-    %types = pdl.types
-    %val = pdl.operand
-    %op = pdl.operation -> (%types, %type : !pdl.range, !pdl.type)
-    %vals = pdl.results 0 of %op -> !pdl.range
-    %root1 = pdl.operation(%vals, %val : !pdl.range, !pdl.value)
-    %root2 = pdl.operation(%val, %vals : !pdl.value, !pdl.range)
-    pdl.rewrite with "rewriter"(%root1, %root2 : !pdl.operation, !pdl.operation)
+    %type = type
+    %types = types
+    %val = operand
+    %op = operation -> (%types, %type : !pdl.range, !pdl.type)
+    %vals = results 0 of %op -> !pdl.range
+    %root1 = operation(%vals, %val : !pdl.range, !pdl.value)
+    %root2 = operation(%val, %vals : !pdl.value, !pdl.range)
+    rewrite with "rewriter"(%root1, %root2 : !pdl.operation, !pdl.operation)
   }
 }
 
@@ -583,11 +583,11 @@ module @attribute_literal {
 
   // Check the correct lowering of an attribute that hasn't been bound.
   pdl.pattern : benefit(1) {
-    %attr = pdl.attribute 10
+    %attr = attribute 10
     pdl.apply_native_constraint "constraint"(%attr: !pdl.attribute)
 
-    %root = pdl.operation
-    pdl.rewrite %root with "rewriter"
+    %root = operation
+    rewrite %root with "rewriter"
   }
 }
 
@@ -602,12 +602,12 @@ module @type_literal {
 
   // Check the correct lowering of a type that hasn't been bound.
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i32
-    %types = pdl.types : [i32, i64]
+    %type = type : i32
+    %types = types : [i32, i64]
     pdl.apply_native_constraint "constraint"(%type, %types: !pdl.type, !pdl.range)
 
-    %root = pdl.operation
-    pdl.rewrite %root with "rewriter"
+    %root = operation
+    rewrite %root with "rewriter"
   }
 }
 
@@ -638,16 +638,16 @@ module @common_connector {
   // CHECK-DAG:     pdl_interp.are_equal %[[ROOTB_OP]], %[[VAL0]] : !pdl.value
   // CHECK-DAG    } -> ^[[CONTA:.*]]
   pdl.pattern @common_connector : benefit(1) {
-      %type = pdl.type
-      %op = pdl.operation -> (%type, %type : !pdl.type, !pdl.type)
-      %val0 = pdl.result 0 of %op
-      %val1 = pdl.result 1 of %op
-      %rootA = pdl.operation (%val0 : !pdl.value)
-      %rootB = pdl.operation (%val0 : !pdl.value)
-      %inter = pdl.operation (%val1 : !pdl.value) -> (%type : !pdl.type)
-      %val2 = pdl.result 0 of %inter
-      %rootC = pdl.operation (%val2 : !pdl.value)
-      pdl.rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
+      %type = type
+      %op = operation -> (%type, %type : !pdl.type, !pdl.type)
+      %val0 = result 0 of %op
+      %val1 = result 1 of %op
+      %rootA = operation (%val0 : !pdl.value)
+      %rootB = operation (%val0 : !pdl.value)
+      %inter = operation (%val1 : !pdl.value) -> (%type : !pdl.type)
+      %val2 = result 0 of %inter
+      %rootC = operation (%val2 : !pdl.value)
+      rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
   }
 }
 
@@ -679,15 +679,15 @@ module @common_connector_range {
   // CHECK-DAG:     pdl_interp.are_equal %[[ROOTB_OPS]], %[[VALS0]] : !pdl.range
   // CHECK-DAG    } -> ^[[CONTA:.*]]
   pdl.pattern @common_connector_range : benefit(1) {
-    %types = pdl.types
-    %op = pdl.operation -> (%types, %types : !pdl.range, !pdl.range)
-    %vals0 = pdl.results 0 of %op -> !pdl.range
-    %vals1 = pdl.results 1 of %op -> !pdl.range
-    %rootA = pdl.operation (%vals0 : !pdl.range)
-    %rootB = pdl.operation (%vals0 : !pdl.range)
-    %inter = pdl.operation (%vals1 : !pdl.range) -> (%types : !pdl.range)
-    %vals2 = pdl.results of %inter
-    %rootC = pdl.operation (%vals2 : !pdl.range)
-    pdl.rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
+    %types = types
+    %op = operation -> (%types, %types : !pdl.range, !pdl.range)
+    %vals0 = results 0 of %op -> !pdl.range
+    %vals1 = results 1 of %op -> !pdl.range
+    %rootA = operation (%vals0 : !pdl.range)
+    %rootB = operation (%vals0 : !pdl.range)
+    %inter = operation (%vals1 : !pdl.range) -> (%types : !pdl.range)
+    %vals2 = results of %inter
+    %rootC = operation (%vals2 : !pdl.range)
+    rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
   }
 }
diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
index f9415b3c45802..8ca771f87f657 100644
--- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
+++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
@@ -8,9 +8,9 @@ module @external {
   // CHECK:   func @pdl_generated_rewriter(%[[ROOT:.*]]: !pdl.operation, %[[INPUT:.*]]: !pdl.value)
   // CHECK:     pdl_interp.apply_rewrite "rewriter" [true](%[[ROOT]], %[[INPUT]] : !pdl.operation, !pdl.value)
   pdl.pattern : benefit(1) {
-    %input = pdl.operand
-    %root = pdl.operation "foo.op"(%input : !pdl.value)
-    pdl.rewrite %root with "rewriter"[true](%input : !pdl.value)
+    %input = operand
+    %root = operation "foo.op"(%input : !pdl.value)
+    rewrite %root with "rewriter"[true](%input : !pdl.value)
   }
 }
 
@@ -23,9 +23,9 @@ module @erase {
   // CHECK:     pdl_interp.erase %[[ROOT]]
   // CHECK:     pdl_interp.finalize
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root {
-      pdl.erase %root
+    %root = operation "foo.op"
+    rewrite %root {
+      erase %root
     }
   }
 }
@@ -39,12 +39,12 @@ module @operation_attributes {
   // CHECK:     %[[ATTR1:.*]] = pdl_interp.create_attribute true
   // CHECK:     pdl_interp.create_operation "foo.op" {"attr" = %[[ATTR]], "attr1" = %[[ATTR1]]}
   pdl.pattern : benefit(1) {
-    %attr = pdl.attribute
-    %root = pdl.operation "foo.op" {"attr" = %attr}
-    pdl.rewrite %root {
-      %attr1 = pdl.attribute true
-      %newOp = pdl.operation "foo.op" {"attr" = %attr, "attr1" = %attr1}
-      pdl.erase %root
+    %attr = attribute
+    %root = operation "foo.op" {"attr" = %attr}
+    rewrite %root {
+      %attr1 = attribute true
+      %newOp = operation "foo.op" {"attr" = %attr, "attr1" = %attr1}
+      erase %root
     }
   }
 }
@@ -59,14 +59,14 @@ module @operation_operands {
   // CHECK:     %[[OPERAND1:.*]] = pdl_interp.get_result 0 of %[[NEWOP]]
   // CHECK:     pdl_interp.create_operation "foo.op2"(%[[OPERAND1]] : !pdl.value)
   pdl.pattern : benefit(1) {
-    %operand = pdl.operand
-    %root = pdl.operation "foo.op"(%operand : !pdl.value)
-    pdl.rewrite %root {
-      %type = pdl.type : i32
-      %newOp = pdl.operation "foo.op"(%operand : !pdl.value) -> (%type : !pdl.type)
-      %result = pdl.result 0 of %newOp
-      %newOp1 = pdl.operation "foo.op2"(%result : !pdl.value)
-      pdl.erase %root
+    %operand = operand
+    %root = operation "foo.op"(%operand : !pdl.value)
+    rewrite %root {
+      %type = type : i32
+      %newOp = operation "foo.op"(%operand : !pdl.value) -> (%type : !pdl.type)
+      %result = result 0 of %newOp
+      %newOp1 = operation "foo.op2"(%result : !pdl.value)
+      erase %root
     }
   }
 }
@@ -81,13 +81,13 @@ module @operation_infer_types_from_replaceop {
   // CHECK:     %[[RESULT_TYPES:.*]] = pdl_interp.get_value_type of %[[RESULTS]]
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[RESULT_TYPES]] : !pdl.range)
   pdl.pattern : benefit(1) {
-    %rootType = pdl.type
-    %rootType1 = pdl.type
-    %root = pdl.operation "foo.op" -> (%rootType, %rootType1 : !pdl.type, !pdl.type)
-    pdl.rewrite %root {
-      %newType1 = pdl.type
-      %newOp = pdl.operation "foo.op" -> (%rootType, %newType1 : !pdl.type, !pdl.type)
-      pdl.replace %root with %newOp
+    %rootType = type
+    %rootType1 = type
+    %root = operation "foo.op" -> (%rootType, %rootType1 : !pdl.type, !pdl.type)
+    rewrite %root {
+      %newType1 = type
+      %newOp = operation "foo.op" -> (%rootType, %newType1 : !pdl.type, !pdl.type)
+      replace %root with %newOp
     }
   }
 }
@@ -100,11 +100,11 @@ module @operation_infer_types_from_otherop_individual_results {
   // CHECK:   func @pdl_generated_rewriter(%[[TYPE:.*]]: !pdl.type, %[[TYPES:.*]]: !pdl.range
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[TYPE]], %[[TYPES]] : !pdl.type, !pdl.range)
   pdl.pattern : benefit(1) {
-    %rootType = pdl.type
-    %rootTypes = pdl.types
-    %root = pdl.operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range)
+    %rootType = type
+    %rootTypes = types
+    %root = operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range)
     }
   }
 }
@@ -117,10 +117,10 @@ module @operation_infer_types_from_otherop_results {
   // CHECK:   func @pdl_generated_rewriter(%[[TYPES:.*]]: !pdl.range
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[TYPES]] : !pdl.range)
   pdl.pattern : benefit(1) {
-    %rootTypes = pdl.types
-    %root = pdl.operation "foo.op" -> (%rootTypes : !pdl.range)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%rootTypes : !pdl.range)
+    %rootTypes = types
+    %root = operation "foo.op" -> (%rootTypes : !pdl.range)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%rootTypes : !pdl.range)
     }
   }
 }
@@ -135,11 +135,11 @@ module @replace_with_op {
   // CHECK:     %[[RESULTS:.*]] = pdl_interp.get_results of %[[NEWOP]]
   // CHECK:     pdl_interp.replace %[[ROOT]] with (%[[RESULTS]] : !pdl.range)
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i32
-    %root = pdl.operation "foo.op" -> (%type : !pdl.type)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%type : !pdl.type)
-      pdl.replace %root with %newOp
+    %type = type : i32
+    %root = operation "foo.op" -> (%type : !pdl.type)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%type : !pdl.type)
+      replace %root with %newOp
     }
   }
 }
@@ -156,14 +156,14 @@ module @replace_with_values {
   // CHECK:     %[[RESULTS_2:.*]] = pdl_interp.get_results 2 of %[[NEWOP]] : !pdl.value
   // CHECK:     pdl_interp.replace %[[ROOT]] with (%[[RESULT]], %[[RESULTS]], %[[RESULTS_2]] : !pdl.value, !pdl.range, !pdl.value)
   pdl.pattern : benefit(1) {
-    %types = pdl.types
-    %root = pdl.operation "foo.op" -> (%types : !pdl.range)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%types : !pdl.range)
-      %newResult = pdl.result 0 of %newOp
-      %newResults = pdl.results 1 of %newOp -> !pdl.range
-      %newResults2 = pdl.results 2 of %newOp -> !pdl.value
-      pdl.replace %root with (%newResult, %newResults, %newResults2 : !pdl.value, !pdl.range, !pdl.value)
+    %types = types
+    %root = operation "foo.op" -> (%types : !pdl.range)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%types : !pdl.range)
+      %newResult = result 0 of %newOp
+      %newResults = results 1 of %newOp -> !pdl.range
+      %newResults2 = results 2 of %newOp -> !pdl.value
+      replace %root with (%newResult, %newResults, %newResults2 : !pdl.value, !pdl.range, !pdl.value)
     }
   }
 }
@@ -177,10 +177,10 @@ module @replace_with_no_results {
   // CHECK:     pdl_interp.create_operation "foo.op"
   // CHECK:     pdl_interp.erase %[[ROOT]]
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op"
-      pdl.replace %root with %newOp
+    %root = operation "foo.op"
+    rewrite %root {
+      %newOp = operation "foo.op"
+      replace %root with %newOp
     }
   }
 }
@@ -194,11 +194,11 @@ module @apply_native_rewrite {
   // CHECK:     %[[TYPE:.*]] = pdl_interp.apply_rewrite "functor" [true](%[[ROOT]] : !pdl.operation) : !pdl.type
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[TYPE]] : !pdl.type)
   pdl.pattern : benefit(1) {
-    %type = pdl.type
-    %root = pdl.operation "foo.op" -> (%type : !pdl.type)
-    pdl.rewrite %root {
-      %newType = pdl.apply_native_rewrite "functor"[true](%root : !pdl.operation) : !pdl.type
-      %newOp = pdl.operation "foo.op" -> (%newType : !pdl.type)
+    %type = type
+    %root = operation "foo.op" -> (%type : !pdl.type)
+    rewrite %root {
+      %newType = apply_native_rewrite "functor"[true](%root : !pdl.operation) : !pdl.type
+      %newOp = operation "foo.op" -> (%newType : !pdl.type)
     }
   }
 }
@@ -212,9 +212,9 @@ module @unbound_rewrite_op {
   // CHECK:     %[[UNUSED:.*]] = pdl_interp.create_operation "bar.op"
   // CHECK:     pdl_interp.finalize
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root {
-      %unused = pdl.operation "bar.op"
+    %root = operation "foo.op"
+    rewrite %root {
+      %unused = operation "bar.op"
     }
   }
 }
diff --git a/mlir/test/Dialect/PDL/canonicalize.mlir b/mlir/test/Dialect/PDL/canonicalize.mlir
index 5cb08acd884e3..94688a281625b 100644
--- a/mlir/test/Dialect/PDL/canonicalize.mlir
+++ b/mlir/test/Dialect/PDL/canonicalize.mlir
@@ -1,10 +1,10 @@
 // RUN: mlir-opt -canonicalize %s | FileCheck %s
 
 pdl.pattern @operation_op : benefit(1) {
-  %root = pdl.operation "foo.op"
-  pdl.rewrite %root {
-    // CHECK: pdl.operation "bar.unused"
-    %unused_rewrite = pdl.operation "bar.unused"
-    pdl.erase %root
+  %root = operation "foo.op"
+  rewrite %root {
+    // CHECK: operation "bar.unused"
+    %unused_rewrite = operation "bar.unused"
+    erase %root
   }
 }
diff --git a/mlir/test/Dialect/PDL/invalid.mlir b/mlir/test/Dialect/PDL/invalid.mlir
index 17b7370292b0c..2c630269f80d4 100644
--- a/mlir/test/Dialect/PDL/invalid.mlir
+++ b/mlir/test/Dialect/PDL/invalid.mlir
@@ -5,11 +5,11 @@
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected at least one argument}}
   "pdl.apply_native_constraint"() {name = "foo", params = []} : () -> ()
-  pdl.rewrite %op with "rewriter"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -19,8 +19,8 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
+  %op = operation "foo.op"
+  rewrite %op {
     // expected-error@below {{expected at least one argument}}
     "pdl.apply_native_rewrite"() {name = "foo", params = []} : () -> ()
   }
@@ -33,34 +33,34 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %type = pdl.type
+  %type = type
 
   // expected-error@below {{expected only one of [`type`, `value`] to be set}}
-  %attr = pdl.attribute : %type 10
+  %attr = attribute : %type 10
 
-  %op = pdl.operation "foo.op" {"attr" = %attr} -> (%type : !pdl.type)
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op" {"attr" = %attr} -> (%type : !pdl.type)
+  rewrite %op with "rewriter"
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
-    %type = pdl.type
+  %op = operation "foo.op"
+  rewrite %op {
+    %type = type
 
     // expected-error@below {{expected constant value when specified within a `pdl.rewrite`}}
-    %attr = pdl.attribute : %type
+    %attr = attribute : %type
   }
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
+  %op = operation "foo.op"
+  rewrite %op {
     // expected-error@below {{expected constant value when specified within a `pdl.rewrite`}}
-    %attr = pdl.attribute
+    %attr = attribute
   }
 }
 
@@ -68,10 +68,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.attribute
+  %unused = attribute
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -82,10 +82,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.operand
+  %unused = operand
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -96,10 +96,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.operands
+  %unused = operands
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -109,10 +109,10 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
+  %op = operation "foo.op"
+  rewrite %op {
     // expected-error@below {{must have an operation name when nested within a `pdl.rewrite`}}
-    %newOp = pdl.operation
+    %newOp = operation
   }
 }
 
@@ -124,19 +124,19 @@ pdl.pattern : benefit(1) {
     attributeNames = ["attr"],
     operand_segment_sizes = dense<0> : vector<3xi32>
   } : () -> (!pdl.operation)
-  pdl.rewrite %op with "rewriter"
+  rewrite %op with "rewriter"
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
-    %type = pdl.type
+  %op = operation "foo.op"
+  rewrite %op {
+    %type = type
 
     // expected-error@below {{op must have inferable or constrained result types when nested within `pdl.rewrite`}}
     // expected-note@below {{result type #0 was not constrained}}
-    %newOp = pdl.operation "foo.op" -> (%type : !pdl.type)
+    %newOp = operation "foo.op" -> (%type : !pdl.type)
   }
 }
 
@@ -144,10 +144,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.operation "foo.op"
+  %unused = operation "foo.op"
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -166,7 +166,7 @@ pdl.pattern : benefit(1) {
 
 // expected-error@below {{the pattern must contain at least one `pdl.operation`}}
 pdl.pattern : benefit(1) {
-  pdl.rewrite with "foo"
+  rewrite with "foo"
 }
 
 // -----
@@ -175,44 +175,44 @@ pdl.pattern : benefit(1) {
   // expected-note@below {{see non-`pdl` operation defined here}}
   "test.foo.other_op"() : () -> ()
 
-  %root = pdl.operation "foo.op"
-  pdl.rewrite %root with "foo"
+  %root = operation "foo.op"
+  rewrite %root with "foo"
 }
 
 // -----
 // expected-error@below {{the operations must form a connected component}}
 pdl.pattern : benefit(1) {
-  %op1 = pdl.operation "foo.op"
-  %op2 = pdl.operation "bar.op"
+  %op1 = operation "foo.op"
+  %op2 = operation "bar.op"
   // expected-note@below {{see a disconnected value / operation here}}
-  %val = pdl.result 0 of %op2
-  pdl.rewrite %op1 with "foo"(%val : !pdl.value)
+  %val = result 0 of %op2
+  rewrite %op1 with "foo"(%val : !pdl.value)
 }
 
 // -----
 // expected-error@below {{the operations must form a connected component}}
 pdl.pattern : benefit(1) {
-  %type = pdl.type
-  %op1 = pdl.operation "foo.op" -> (%type : !pdl.type)
-  %val = pdl.result 0 of %op1
-  %op2 = pdl.operation "bar.op"(%val : !pdl.value)
+  %type = type
+  %op1 = operation "foo.op" -> (%type : !pdl.type)
+  %val = result 0 of %op1
+  %op2 = operation "bar.op"(%val : !pdl.value)
   // expected-note@below {{see a disconnected value / operation here}}
-  %op3 = pdl.operation "baz.op"
-  pdl.rewrite {
-    pdl.erase %op1
-    pdl.erase %op2
-    pdl.erase %op3
+  %op3 = operation "baz.op"
+  rewrite {
+    erase %op1
+    erase %op2
+    erase %op3
   }
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %type = pdl.type : i32
-  %root = pdl.operation "foo.op" -> (%type : !pdl.type)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%type : !pdl.type)
-    %newResult = pdl.result 0 of %newOp
+  %type = type : i32
+  %root = operation "foo.op" -> (%type : !pdl.type)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%type : !pdl.type)
+    %newResult = result 0 of %newOp
 
     // expected-error@below {{expected no replacement values to be provided when the replacement operation is present}}
     "pdl.replace"(%root, %newOp, %newResult) {
@@ -228,10 +228,10 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %root = pdl.operation "foo.op"
+  %root = operation "foo.op"
   // expected-error@below {{expected `pdl.range` result type when no index is specified, but got: '!pdl.value'}}
   %results = "pdl.results"(%root) : (!pdl.operation) -> !pdl.value
-  pdl.rewrite %root with "rewriter"
+  rewrite %root with "rewriter"
 }
 
 // -----
@@ -241,7 +241,7 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected rewrite region to be non-empty if external name is not specified}}
   "pdl.rewrite"(%op) ({}) {
@@ -252,7 +252,7 @@ pdl.pattern : benefit(1) {
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected no external arguments when the rewrite is specified inline}}
   "pdl.rewrite"(%op, %op) ({
@@ -265,7 +265,7 @@ pdl.pattern : benefit(1) {
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected no external constant parameters when the rewrite is specified inline}}
   "pdl.rewrite"(%op) ({
@@ -278,7 +278,7 @@ pdl.pattern : benefit(1) {
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected rewrite region to be empty when rewrite is external}}
   "pdl.rewrite"(%op) ({
@@ -297,10 +297,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.type
+  %unused = type
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -311,8 +311,8 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.types
+  %unused = types
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
diff --git a/mlir/test/Dialect/PDL/ops.mlir b/mlir/test/Dialect/PDL/ops.mlir
index 9c7daf46a0907..1e2261a3c2a44 100644
--- a/mlir/test/Dialect/PDL/ops.mlir
+++ b/mlir/test/Dialect/PDL/ops.mlir
@@ -6,68 +6,68 @@
 
 pdl.pattern @operations : benefit(1) {
   // Operation with attributes and results.
-  %attribute = pdl.attribute
-  %type = pdl.type
-  %op0 = pdl.operation {"attr" = %attribute} -> (%type : !pdl.type)
+  %attribute = attribute
+  %type = type
+  %op0 = operation {"attr" = %attribute} -> (%type : !pdl.type)
   %op0_result = pdl.result 0 of %op0
 
   // Operation with input.
-  %input = pdl.operand
-  %root = pdl.operation(%op0_result, %input : !pdl.value, !pdl.value)
-  pdl.rewrite %root with "rewriter"
+  %input = operand
+  %root = operation(%op0_result, %input : !pdl.value, !pdl.value)
+  rewrite %root with "rewriter"
 }
 
 // -----
 
 pdl.pattern @rewrite_with_args : benefit(1) {
-  %input = pdl.operand
-  %root = pdl.operation(%input : !pdl.value)
-  pdl.rewrite %root with "rewriter"(%input : !pdl.value)
+  %input = operand
+  %root = operation(%input : !pdl.value)
+  rewrite %root with "rewriter"(%input : !pdl.value)
 }
 
 // -----
 
 pdl.pattern @rewrite_with_params : benefit(1) {
-  %root = pdl.operation
-  pdl.rewrite %root with "rewriter"["I am param"]
+  %root = operation
+  rewrite %root with "rewriter"["I am param"]
 }
 
 // -----
 
 pdl.pattern @rewrite_with_args_and_params : benefit(1) {
-  %input = pdl.operand
-  %root = pdl.operation(%input : !pdl.value)
-  pdl.rewrite %root with "rewriter"["I am param"](%input : !pdl.value)
+  %input = operand
+  %root = operation(%input : !pdl.value)
+  rewrite %root with "rewriter"["I am param"](%input : !pdl.value)
 }
 
 // -----
 
 pdl.pattern @rewrite_multi_root_optimal : benefit(2) {
-  %input1 = pdl.operand
-  %input2 = pdl.operand
-  %type = pdl.type
-  %op1 = pdl.operation(%input1 : !pdl.value) -> (%type : !pdl.type)
-  %val1 = pdl.result 0 of %op1
-  %root1 = pdl.operation(%val1 : !pdl.value)
-  %op2 = pdl.operation(%input2 : !pdl.value) -> (%type : !pdl.type)
-  %val2 = pdl.result 0 of %op2
-  %root2 = pdl.operation(%val1, %val2 : !pdl.value, !pdl.value)
-  pdl.rewrite with "rewriter"["I am param"](%root1, %root2 : !pdl.operation, !pdl.operation)
+  %input1 = operand
+  %input2 = operand
+  %type = type
+  %op1 = operation(%input1 : !pdl.value) -> (%type : !pdl.type)
+  %val1 = result 0 of %op1
+  %root1 = operation(%val1 : !pdl.value)
+  %op2 = operation(%input2 : !pdl.value) -> (%type : !pdl.type)
+  %val2 = result 0 of %op2
+  %root2 = operation(%val1, %val2 : !pdl.value, !pdl.value)
+  rewrite with "rewriter"["I am param"](%root1, %root2 : !pdl.operation, !pdl.operation)
 }
 
 // -----
 
 pdl.pattern @rewrite_multi_root_forced : benefit(2) {
-  %input1 = pdl.operand
-  %input2 = pdl.operand
-  %type = pdl.type
-  %op1 = pdl.operation(%input1 : !pdl.value) -> (%type : !pdl.type)
-  %val1 = pdl.result 0 of %op1
-  %root1 = pdl.operation(%val1 : !pdl.value)
-  %op2 = pdl.operation(%input2 : !pdl.value) -> (%type : !pdl.type)
-  %val2 = pdl.result 0 of %op2
-  %root2 = pdl.operation(%val1, %val2 : !pdl.value, !pdl.value)
-  pdl.rewrite %root1 with "rewriter"["I am param"](%root2 : !pdl.operation)
+  %input1 = operand
+  %input2 = operand
+  %type = type
+  %op1 = operation(%input1 : !pdl.value) -> (%type : !pdl.type)
+  %val1 = result 0 of %op1
+  %root1 = operation(%val1 : !pdl.value)
+  %op2 = operation(%input2 : !pdl.value) -> (%type : !pdl.type)
+  %val2 = result 0 of %op2
+  %root2 = operation(%val1, %val2 : !pdl.value, !pdl.value)
+  rewrite %root1 with "rewriter"["I am param"](%root2 : !pdl.operation)
 }
 
 // -----
@@ -75,13 +75,13 @@ pdl.pattern @rewrite_multi_root_forced : benefit(2) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from a pdl.replace.
 pdl.pattern @infer_type_from_operation_replace : benefit(1) {
-  %type1 = pdl.type : i32
-  %type2 = pdl.type
-  %root = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-  pdl.rewrite %root {
-    %type3 = pdl.type
-    %newOp = pdl.operation "foo.op" -> (%type1, %type3 : !pdl.type, !pdl.type)
-    pdl.replace %root with %newOp
+  %type1 = type : i32
+  %type2 = type
+  %root = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+  rewrite %root {
+    %type3 = type
+    %newOp = operation "foo.op" -> (%type1, %type3 : !pdl.type, !pdl.type)
+    replace %root with %newOp
   }
 }
 
@@ -90,11 +90,11 @@ pdl.pattern @infer_type_from_operation_replace : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the result types of an operation within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %type1 = pdl.type : i32
-  %type2 = pdl.type
-  %root = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
+  %type1 = type : i32
+  %type2 = type
+  %root = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
   }
 }
 
@@ -103,11 +103,11 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the result types of an operation within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %types = pdl.types
-  %root = pdl.operation -> (%types : !pdl.range)
-  pdl.rewrite %root {
-    %otherTypes = pdl.types : [i32, i64]
-    %newOp = pdl.operation "foo.op" -> (%types, %otherTypes : !pdl.range, !pdl.range)
+  %types = types
+  %root = operation -> (%types : !pdl.range)
+  rewrite %root {
+    %otherTypes = types : [i32, i64]
+    %newOp = operation "foo.op" -> (%types, %otherTypes : !pdl.range, !pdl.range)
   }
 }
 
@@ -116,13 +116,13 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the type of an operand within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %type1 = pdl.type
-  %type2 = pdl.type
-  %operand1 = pdl.operand : %type1
-  %operand2 = pdl.operand : %type2
-  %root = pdl.operation (%operand1, %operand2 : !pdl.value, !pdl.value)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
+  %type1 = type
+  %type2 = type
+  %operand1 = operand : %type1
+  %operand2 = operand : %type2
+  %root = operation (%operand1, %operand2 : !pdl.value, !pdl.value)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
   }
 }
 
@@ -131,29 +131,29 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the types of operands within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %types = pdl.types
-  %operands = pdl.operands : %types
-  %root = pdl.operation (%operands : !pdl.range)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%types : !pdl.range)
+  %types = types
+  %operands = operands : %types
+  %root = operation (%operands : !pdl.range)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%types : !pdl.range)
   }
 }
 
 // -----
 
 pdl.pattern @apply_rewrite_with_no_results : benefit(1) {
-  %root = pdl.operation
-  pdl.rewrite %root {
-    pdl.apply_native_rewrite "NativeRewrite"(%root : !pdl.operation)
+  %root = operation
+  rewrite %root {
+    apply_native_rewrite "NativeRewrite"(%root : !pdl.operation)
   }
 }
 
 // -----
 
 pdl.pattern @attribute_with_dict : benefit(1) {
-  %root = pdl.operation
-  pdl.rewrite %root {
-    %attr = pdl.attribute {some_unit_attr} attributes {pdl.special_attribute}
-    pdl.apply_native_rewrite "NativeRewrite"(%attr : !pdl.attribute)
+  %root = operation
+  rewrite %root {
+    %attr = attribute {some_unit_attr} attributes {pdl.special_attribute}
+    apply_native_rewrite "NativeRewrite"(%attr : !pdl.attribute)
   }
 }
diff --git a/mlir/test/python/dialects/pdl_ops.py b/mlir/test/python/dialects/pdl_ops.py
index 9b5ce4c533b8e..2388ccadab2e3 100644
--- a/mlir/test/python/dialects/pdl_ops.py
+++ b/mlir/test/python/dialects/pdl_ops.py
@@ -16,13 +16,13 @@ def constructAndPrintInModule(f):
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @operations : benefit(1)  {
-# CHECK:     %0 = pdl.attribute
-# CHECK:     %1 = pdl.type
-# CHECK:     %2 = pdl.operation  {"attr" = %0} -> (%1 : !pdl.type)
-# CHECK:     %3 = pdl.result 0 of %2
-# CHECK:     %4 = pdl.operand
-# CHECK:     %5 = pdl.operation(%3, %4 : !pdl.value, !pdl.value)
-# CHECK:     pdl.rewrite %5 with "rewriter"
+# CHECK:     %0 = attribute
+# CHECK:     %1 = type
+# CHECK:     %2 = operation  {"attr" = %0} -> (%1 : !pdl.type)
+# CHECK:     %3 = result 0 of %2
+# CHECK:     %4 = operand
+# CHECK:     %5 = operation(%3, %4 : !pdl.value, !pdl.value)
+# CHECK:     rewrite %5 with "rewriter"
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -40,9 +40,9 @@ def test_operations():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_with_args : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operation(%0 : !pdl.value)
-# CHECK:     pdl.rewrite %1 with "rewriter"(%0 : !pdl.value)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operation(%0 : !pdl.value)
+# CHECK:     rewrite %1 with "rewriter"(%0 : !pdl.value)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -55,8 +55,8 @@ def test_rewrite_with_args():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_with_params : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0 with "rewriter" ["I am param"]
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0 with "rewriter" ["I am param"]
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -68,9 +68,9 @@ def test_rewrite_with_params():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_with_args_and_params : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operation(%0 : !pdl.value)
-# CHECK:     pdl.rewrite %1 with "rewriter" ["I am param"](%0 : !pdl.value)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operation(%0 : !pdl.value)
+# CHECK:     rewrite %1 with "rewriter" ["I am param"](%0 : !pdl.value)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -83,16 +83,16 @@ def test_rewrite_with_args_and_params():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_multi_root_optimal : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operand
-# CHECK:     %2 = pdl.type
-# CHECK:     %3 = pdl.operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %4 = pdl.result 0 of %3
-# CHECK:     %5 = pdl.operation(%4 : !pdl.value)
-# CHECK:     %6 = pdl.operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %7 = pdl.result 0 of %6
-# CHECK:     %8 = pdl.operation(%4, %7 : !pdl.value, !pdl.value)
-# CHECK:     pdl.rewrite with "rewriter" ["I am param"](%5, %8 : !pdl.operation, !pdl.operation)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operand
+# CHECK:     %2 = type
+# CHECK:     %3 = operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %4 = result 0 of %3
+# CHECK:     %5 = operation(%4 : !pdl.value)
+# CHECK:     %6 = operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %7 = result 0 of %6
+# CHECK:     %8 = operation(%4, %7 : !pdl.value, !pdl.value)
+# CHECK:     rewrite with "rewriter" ["I am param"](%5, %8 : !pdl.operation, !pdl.operation)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -112,16 +112,16 @@ def test_rewrite_multi_root_optimal():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_multi_root_forced : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operand
-# CHECK:     %2 = pdl.type
-# CHECK:     %3 = pdl.operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %4 = pdl.result 0 of %3
-# CHECK:     %5 = pdl.operation(%4 : !pdl.value)
-# CHECK:     %6 = pdl.operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %7 = pdl.result 0 of %6
-# CHECK:     %8 = pdl.operation(%4, %7 : !pdl.value, !pdl.value)
-# CHECK:     pdl.rewrite %5 with "rewriter" ["I am param"](%8 : !pdl.operation)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operand
+# CHECK:     %2 = type
+# CHECK:     %3 = operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %4 = result 0 of %3
+# CHECK:     %5 = operation(%4 : !pdl.value)
+# CHECK:     %6 = operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %7 = result 0 of %6
+# CHECK:     %8 = operation(%4, %7 : !pdl.value, !pdl.value)
+# CHECK:     rewrite %5 with "rewriter" ["I am param"](%8 : !pdl.operation)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -141,13 +141,13 @@ def test_rewrite_multi_root_forced():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_add_body : benefit(1)  {
-# CHECK:     %0 = pdl.type : i32
-# CHECK:     %1 = pdl.type
-# CHECK:     %2 = pdl.operation  -> (%0, %1 : !pdl.type, !pdl.type)
-# CHECK:     pdl.rewrite %2  {
-# CHECK:       %3 = pdl.type
-# CHECK:       %4 = pdl.operation "foo.op"  -> (%0, %3 : !pdl.type, !pdl.type)
-# CHECK:       pdl.replace %2 with %4
+# CHECK:     %0 = type : i32
+# CHECK:     %1 = type
+# CHECK:     %2 = operation  -> (%0, %1 : !pdl.type, !pdl.type)
+# CHECK:     rewrite %2  {
+# CHECK:       %3 = type
+# CHECK:       %4 = operation "foo.op"  -> (%0, %3 : !pdl.type, !pdl.type)
+# CHECK:       replace %2 with %4
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -166,11 +166,11 @@ def test_rewrite_add_body():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_type : benefit(1)  {
-# CHECK:     %0 = pdl.type : i32
-# CHECK:     %1 = pdl.type
-# CHECK:     %2 = pdl.operation  -> (%0, %1 : !pdl.type, !pdl.type)
-# CHECK:     pdl.rewrite %2  {
-# CHECK:       %3 = pdl.operation "foo.op"  -> (%0, %1 : !pdl.type, !pdl.type)
+# CHECK:     %0 = type : i32
+# CHECK:     %1 = type
+# CHECK:     %2 = operation  -> (%0, %1 : !pdl.type, !pdl.type)
+# CHECK:     rewrite %2  {
+# CHECK:       %3 = operation "foo.op"  -> (%0, %1 : !pdl.type, !pdl.type)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -187,11 +187,11 @@ def test_rewrite_type():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_types : benefit(1)  {
-# CHECK:     %0 = pdl.types
-# CHECK:     %1 = pdl.operation  -> (%0 : !pdl.range)
-# CHECK:     pdl.rewrite %1  {
-# CHECK:       %2 = pdl.types : [i32, i64]
-# CHECK:       %3 = pdl.operation "foo.op"  -> (%0, %2 : !pdl.range, !pdl.range)
+# CHECK:     %0 = types
+# CHECK:     %1 = operation  -> (%0 : !pdl.range)
+# CHECK:     rewrite %1  {
+# CHECK:       %2 = types : [i32, i64]
+# CHECK:       %3 = operation "foo.op"  -> (%0, %2 : !pdl.range, !pdl.range)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -208,11 +208,11 @@ def test_rewrite_types():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_operands : benefit(1)  {
-# CHECK:     %0 = pdl.types
-# CHECK:     %1 = pdl.operands : %0
-# CHECK:     %2 = pdl.operation(%1 : !pdl.range)
-# CHECK:     pdl.rewrite %2  {
-# CHECK:       %3 = pdl.operation "foo.op"  -> (%0 : !pdl.range)
+# CHECK:     %0 = types
+# CHECK:     %1 = operands : %0
+# CHECK:     %2 = operation(%1 : !pdl.range)
+# CHECK:     rewrite %2  {
+# CHECK:       %3 = operation "foo.op"  -> (%0 : !pdl.range)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -229,9 +229,9 @@ def test_rewrite_operands():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @native_rewrite : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0  {
-# CHECK:       pdl.apply_native_rewrite "NativeRewrite"(%0 : !pdl.operation)
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0  {
+# CHECK:       apply_native_rewrite "NativeRewrite"(%0 : !pdl.operation)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -246,10 +246,10 @@ def test_native_rewrite():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @attribute_with_value : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0  {
-# CHECK:       %1 = pdl.attribute "value"
-# CHECK:       pdl.apply_native_rewrite "NativeRewrite"(%1 : !pdl.attribute)
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0  {
+# CHECK:       %1 = attribute "value"
+# CHECK:       apply_native_rewrite "NativeRewrite"(%1 : !pdl.attribute)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -265,9 +265,9 @@ def test_attribute_with_value():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @erase : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0  {
-# CHECK:       pdl.erase %0
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0  {
+# CHECK:       erase %0
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -282,11 +282,11 @@ def test_erase():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @operation_results : benefit(1)  {
-# CHECK:     %0 = pdl.types
-# CHECK:     %1 = pdl.operation  -> (%0 : !pdl.range)
-# CHECK:     %2 = pdl.results of %1
-# CHECK:     %3 = pdl.operation(%2 : !pdl.range)
-# CHECK:     pdl.rewrite %3 with "rewriter"
+# CHECK:     %0 = types
+# CHECK:     %1 = operation  -> (%0 : !pdl.range)
+# CHECK:     %2 = results of %1
+# CHECK:     %3 = operation(%2 : !pdl.range)
+# CHECK:     rewrite %3 with "rewriter"
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -302,10 +302,10 @@ def test_operation_results():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern : benefit(1)  {
-# CHECK:     %0 = pdl.type
-# CHECK:     pdl.apply_native_constraint "typeConstraint" [](%0 : !pdl.type)
-# CHECK:     %1 = pdl.operation  -> (%0 : !pdl.type)
-# CHECK:     pdl.rewrite %1 with "rewrite"
+# CHECK:     %0 = type
+# CHECK:     apply_native_constraint "typeConstraint" [](%0 : !pdl.type)
+# CHECK:     %1 = operation  -> (%0 : !pdl.type)
+# CHECK:     rewrite %1 with "rewrite"
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule

From 7a275dc35411b8c3f510166f40c225cd10dc5eec Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Thu, 20 Jan 2022 11:49:35 -0800
Subject: [PATCH 078/946] [RISCV] Remove Zvlsseg extension.

This string no longer appears in the Vector Extension specification.
The segment load/store instructions are just part of the vector
instruction set.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D117724
---
 clang/include/clang/Basic/RISCVVTypes.def     |    4 +-
 clang/include/clang/Basic/riscv_vector.td     |    6 +-
 .../RISCV/rvv-intrinsics-overloaded/vloxseg.c |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vlseg.c   |    4 +-
 .../RISCV/rvv-intrinsics-overloaded/vlsegff.c |    4 +-
 .../RISCV/rvv-intrinsics-overloaded/vlsseg.c  |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vluxseg.c |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsoxseg.c |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsseg.c   |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vssseg.c  |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsuxseg.c |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vloxseg.c    |    2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vlseg.c |    4 +-
 .../CodeGen/RISCV/rvv-intrinsics/vlsegff.c    |    4 +-
 .../CodeGen/RISCV/rvv-intrinsics/vlsseg.c     |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vluxseg.c    |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsoxseg.c    |    2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsseg.c |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vssseg.c     |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsuxseg.c    |    2 +-
 clang/test/Driver/riscv-arch.c                |   19 -
 .../test/Preprocessor/riscv-target-features.c |    2 -
 clang/utils/TableGen/RISCVVEmitter.cpp        |   11 +-
 llvm/lib/Support/RISCVISAInfo.cpp             |   13 +-
 llvm/lib/Target/RISCV/RISCV.td                |   10 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td      |   12 +-
 llvm/lib/Target/RISCV/RISCVSchedRocket.td     |    2 +-
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td    |    2 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h        |    2 -
 llvm/test/CodeGen/RISCV/attributes.ll         |   12 +-
 llvm/test/MC/RISCV/attribute-arch-invalid.s   |    4 +-
 llvm/test/MC/RISCV/attribute-arch.s           |   40 +-
 llvm/test/MC/RISCV/rvv/zvlsseg.s              | 1018 ++++++++---------
 33 files changed, 576 insertions(+), 625 deletions(-)

diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def
index f6ef62a646363..1d4024dfb20d3 100644
--- a/clang/include/clang/Basic/RISCVVTypes.def
+++ b/clang/include/clang/Basic/RISCVVTypes.def
@@ -30,8 +30,8 @@
 //
 // - ElBits is the size of one element in bits (SEW).
 //
-// - NF is the number of fields (NFIELDS) used in the Zvlsseg instructions
-//   (TODO).
+// - NF is the number of fields (NFIELDS) used in the Load/Store Segment
+//   instructions (TODO).
 //
 // - IsSigned is true for vectors of signed integer elements and
 //   for vectors of floating-point elements.
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index 03e16be96abee..28c57cc6afeeb 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -215,10 +215,10 @@ class RVVBuiltin RequiredExtensions = [];
 
-  // Number of fields for Zvlsseg.
+  // Number of fields for Load/Store Segment instructions.
   int NF = 1;
 }
 
@@ -1567,7 +1567,6 @@ defm vle32ff: RVVVLEFFBuiltin<["i", "f"]>;
 defm vle64ff: RVVVLEFFBuiltin<["l", "d"]>;
 
 // 7.8 Vector Load/Store Segment Instructions
-let RequiredExtensions = ["Zvlsseg"] in {
 defm : RVVUnitStridedSegLoad<"vlseg">;
 defm : RVVUnitStridedSegLoadFF<"vlseg">;
 defm : RVVStridedSegLoad<"vlsseg">;
@@ -1577,7 +1576,6 @@ defm : RVVUnitStridedSegStore<"vsseg">;
 defm : RVVStridedSegStore<"vssseg">;
 defm : RVVIndexedSegStore<"vsuxseg">;
 defm : RVVIndexedSegStore<"vsoxseg">;
-}
 
 // 12. Vector Integer Arithmetic Instructions
 // 12.1. Vector Single-Width Integer Add and Subtract
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
index 9cb3325218561..0df229dc803da 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
index a0f04dc4375b4..ac49608830570 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
index f2b9e445b605a..22df2b948634b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
index 82c4c57982acb..4f79022bcfd92 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
index a2dbaaf48084f..8db5e8039cafe 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
index 5f41c6bcd28ec..d3454be64ca57 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
index 662ee6b7dbaed..b72721984afa8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
index 8952c2d02ba03..2a95d46f3ae92 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
index f6bfd7844f5a0..3a338ad821e27 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
index 41f4f6997dffc..f278bb76ae630 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
index 11cd459f552ff..deaaf0ed5ff93 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
index 20a79659f72dc..fba2c179a4446 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
index 5584d841789dc..cfdf8275483f8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
index bd499de5a1683..b83ff9833e7b9 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
index 03b8667883d71..ac8c4a412e7d0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
index 6f3927c22ae01..0829100f58ca4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
index 488a93c5f3a52..b91c023aeede7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
index aa559161eade1..a6bdc1ad883f3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index c81a87c8182ef..656abde4f75e4 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -416,25 +416,6 @@
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s
 // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v"
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-NOFLAG %s
-// RV32-EXPERIMENTAL-ZVLSSEG-NOFLAG: error: invalid arch name 'rv32iv0p10_zvlsseg'
-// RV32-EXPERIMENTAL-ZVLSSEG-NOFLAG: requires '-menable-experimental-extensions'
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-NOVERS %s
-// RV32-EXPERIMENTAL-ZVLSSEG-NOVERS: error: invalid arch name 'rv32iv0p10_zvlsseg'
-// RV32-EXPERIMENTAL-ZVLSSEG-NOVERS: experimental extension requires explicit version number
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg0p1 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-BADVERS %s
-// RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: error: invalid arch name 'rv32iv0p10_zvlsseg0p1'
-// RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: unsupported version number 0.1 for experimental extension 'zvlsseg'
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg0p10 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS %s
-// RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS: "-target-feature" "+experimental-zvlsseg"
-
 // RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p10 -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-NOFLAG %s
 // RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv0p10_zvl32b0p10'
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index ba310229f14e5..c69285f6e3996 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -31,7 +31,6 @@
 // CHECK-NOT: __riscv_zfh
 // CHECK-NOT: __riscv_v
 // CHECK-NOT: __riscv_vector
-// CHECK-NOT: __riscv_zvlsseg
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32im -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-M-EXT %s
@@ -220,7 +219,6 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
 // CHECK-V-EXT: __riscv_v 10000{{$}}
 // CHECK-V-EXT: __riscv_vector 1
-// CHECK-V-EXT: __riscv_zvlsseg 10000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izfhmin1p0 -x c -E -dM %s \
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index d3f1d63185f4a..84da6a5901a43 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -140,8 +140,7 @@ enum RISCVExtension : uint8_t {
   F = 1 << 1,
   D = 1 << 2,
   Zfh = 1 << 3,
-  Zvlsseg = 1 << 4,
-  RV64 = 1 << 5,
+  RV64 = 1 << 4,
 };
 
 // TODO refactor RVVIntrinsic class design after support all intrinsic
@@ -445,8 +444,8 @@ void RVVType::initBuiltinStr() {
     return;
   }
   BuiltinStr = "q" + utostr(Scale.getValue()) + BuiltinStr;
-  // Pointer to vector types. Defined for Zvlsseg load intrinsics.
-  // Zvlsseg load intrinsics have pointer type arguments to store the loaded
+  // Pointer to vector types. Defined for segment load intrinsics.
+  // segment load intrinsics have pointer type arguments to store the loaded
   // vector values.
   if (IsPointer)
     BuiltinStr += "*";
@@ -797,8 +796,6 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
       RISCVExtensions |= RISCVExtension::D;
   }
   for (auto Extension : RequiredExtensions) {
-    if (Extension == "Zvlsseg")
-      RISCVExtensions |= RISCVExtension::Zvlsseg;
     if (Extension == "RV64")
       RISCVExtensions |= RISCVExtension::RV64;
   }
@@ -1311,8 +1308,6 @@ bool RVVEmitter::emitExtDefStr(uint8_t Extents, raw_ostream &OS) {
     OS << LS << "defined(__riscv_d)";
   if (Extents & RISCVExtension::Zfh)
     OS << LS << "defined(__riscv_zfh)";
-  if (Extents & RISCVExtension::Zvlsseg)
-    OS << LS << "defined(__riscv_zvlsseg)";
   if (Extents & RISCVExtension::RV64)
     OS << LS << "(__riscv_xlen == 64)";
   OS << "\n";
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index d1d222d3c0eb0..fc52fc6803439 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -66,7 +66,6 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
     {"zbr", RISCVExtensionVersion{0, 93}},
     {"zbt", RISCVExtensionVersion{0, 93}},
 
-    {"zvlsseg", RISCVExtensionVersion{0, 10}},
     {"zvl32b", RISCVExtensionVersion{0, 10}},
     {"zvl64b", RISCVExtensionVersion{0, 10}},
     {"zvl128b", RISCVExtensionVersion{0, 10}},
@@ -301,9 +300,7 @@ void RISCVISAInfo::toFeatures(
     if (ExtName == "i")
       continue;
 
-    if (ExtName == "zvlsseg") {
-      Features.push_back("+experimental-zvlsseg");
-    } else if (isExperimentalExtension(ExtName)) {
+    if (isExperimentalExtension(ExtName)) {
       Features.push_back(StrAlloc("+experimental-" + ExtName));
     } else {
       Features.push_back(StrAlloc("+" + ExtName));
@@ -691,7 +688,6 @@ Error RISCVISAInfo::checkDependency() {
   bool HasE = Exts.count("e") == 1;
   bool HasD = Exts.count("d") == 1;
   bool HasF = Exts.count("f") == 1;
-  bool HasZvlsseg = Exts.count("zvlsseg") == 1;
   bool HasVector = Exts.count("zve32x") == 1;
   bool HasZve32f = Exts.count("zve32f") == 1;
   bool HasZve64d = Exts.count("zve64d") == 1;
@@ -710,11 +706,6 @@ Error RISCVISAInfo::checkDependency() {
     return createStringError(errc::invalid_argument,
                              "d requires f extension to also be specified");
 
-  if (HasZvlsseg && !HasVector)
-    return createStringError(
-        errc::invalid_argument,
-        "zvlsseg requires v or zve* extension to also be specified");
-
   // FIXME: Consider Zfinx in the future
   if (HasZve32f && !HasF)
     return createStringError(
@@ -745,7 +736,7 @@ static const char *ImpliedExtsZve64d[] = {"zve64f"};
 static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
 static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
 static const char *ImpliedExtsZve32f[] = {"zve32x"};
-static const char *ImpliedExtsZve32x[] = {"zvlsseg", "zvl32b"};
+static const char *ImpliedExtsZve32x[] = {"zvl32b"};
 static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"};
 static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"};
 static const char *ImpliedExtsZvl16384b[] = {"zvl8192b"};
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index fda400e490a02..36c7263235ab5 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -161,19 +161,11 @@ foreach i = { 6-15 } in {
                         [!cast("FeatureStdExtZvl"#!srl(I, 1)#"b")]>;
 }
 
-def FeatureStdExtZvlsseg
-    : SubtargetFeature<"experimental-zvlsseg", "HasStdExtZvlsseg", "true",
-                       "'Zvlsseg' (Vector segment load/store instructions)",
-                       []>;
-def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">,
-                                 AssemblerPredicate<(all_of FeatureStdExtZvlsseg),
-                                 "'Zvlsseg' (Vector segment load/store instructions)">;
-
 def FeatureStdExtZve32x
     : SubtargetFeature<"experimental-zve32x", "HasStdExtZve32x", "true",
                        "'Zve32x' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW)",
-                       [FeatureStdExtZvlsseg, FeatureStdExtZvl32b]>;
+                       [FeatureStdExtZvl32b]>;
 def HasStdExtZve32x : Predicate<"SubTarget->hasStdExtZve32x()">,
                                  AssemblerPredicate<(all_of FeatureStdExtZve32x),
                                  "'Zve32x' (Vector Extensions for Embedded Processors "
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index afdd7c4e8b3a8..306024a3e4fd4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1461,7 +1461,7 @@ foreach n = [2, 4, 8] in {
 } // hasSideEffects = 0, mayLoad = 0, mayStore = 0
 } // Predicates = [HasVInstructions]
 
-let Predicates = [HasStdExtZvlsseg] in {
+let Predicates = [HasVInstructions] in {
   foreach nf=2-8 in {
     foreach eew = [8, 16, 32] in {
       defvar w = !cast("LSWidth"#eew);
@@ -1494,9 +1494,9 @@ let Predicates = [HasStdExtZvlsseg] in {
                              "vsoxseg"#nf#"ei"#eew#".v">;
     }
   }
-} // Predicates = [HasStdExtZvlsseg]
+} // Predicates = [HasVInstructions]
 
-let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64] in {
+let Predicates = [HasVInstructionsI64] in {
   foreach nf=2-8 in {
     // Vector Unit-strided Segment Instructions
     def VLSEG#nf#E64_V :
@@ -1512,8 +1512,8 @@ let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64] in {
     def VSSSEG#nf#E64_V :
       VStridedSegmentStore;
   }
-} // Predicates = [HasStdExtZvlsseg, HasVInstructionsI64]
-let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64, IsRV64] in {
+} // Predicates = [HasVInstructionsI64]
+let Predicates = [HasVInstructionsI64, IsRV64] in {
   foreach nf=2-8 in {
     // Vector Indexed Segment Instructions
     def VLUXSEG#nf#EI64_V :
@@ -1529,6 +1529,6 @@ let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64, IsRV64] in {
       VIndexedSegmentStore;
   }
-} // Predicates = [HasStdExtZvlsseg, HasVInstructionsI64, IsRV64]
+} // Predicates = [HasVInstructionsI64, IsRV64]
 
 include "RISCVInstrInfoVPseudos.td"
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 2d80d9d670403..4655015a9d1ec 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasStdExtV, HasVInstructions, HasVInstructionsI64, HasStdExtZvlsseg];
+  let UnsupportedFeatures = [HasStdExtV, HasVInstructions, HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index b39082f153543..3b3e2699d6b60 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg];
+  let UnsupportedFeatures = [HasStdExtV];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index f8eb8e000a6a3..d55affd0539be 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -81,7 +81,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool HasStdExtZve64x = false;
   bool HasStdExtZve64f = false;
   bool HasStdExtZve64d = false;
-  bool HasStdExtZvlsseg = false;
   bool HasStdExtZfhmin = false;
   bool HasStdExtZfh = false;
   bool HasRV64 = false;
@@ -160,7 +159,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZve64x() const { return HasStdExtZve64x; }
   bool hasStdExtZve64f() const { return HasStdExtZve64f; }
   bool hasStdExtZve64d() const { return HasStdExtZve64d; }
-  bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; }
   bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; }
   bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
   bool hasStdExtZfh() const { return HasStdExtZfh; }
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index f04eb26282ca5..59790c9967111 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -18,7 +18,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV32ZBS %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV32ZBT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s
-; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f,+experimental-zvlsseg %s -o - | FileCheck --check-prefix=RV32COMBINED %s
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s
@@ -37,7 +37,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV64ZBS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV64ZBT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s
-; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f,+experimental-zvlsseg %s -o - | FileCheck --check-prefix=RV64COMBINED %s
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
 
 
 ; RV32M: .attribute 5, "rv32i2p0_m2p0"
@@ -57,8 +57,8 @@
 ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93"
 ; RV32ZBS: .attribute 5, "rv32i2p0_zbs1p0"
 ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93"
-; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
-; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64A: .attribute 5, "rv64i2p0_a2p0"
@@ -77,8 +77,8 @@
 ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93"
 ; RV64ZBS: .attribute 5, "rv64i2p0_zbs1p0"
 ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93"
-; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
-; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
diff --git a/llvm/test/MC/RISCV/attribute-arch-invalid.s b/llvm/test/MC/RISCV/attribute-arch-invalid.s
index 933de2963dbbc..1dd5621d128ba 100644
--- a/llvm/test/MC/RISCV/attribute-arch-invalid.s
+++ b/llvm/test/MC/RISCV/attribute-arch-invalid.s
@@ -26,5 +26,5 @@
 .attribute arch, "rv32izbt"
 # CHECK: error: invalid arch name 'rv32izbt', experimental extension requires explicit version number `zbt`
 
-.attribute arch, "rv32ivzvlsseg"
-# CHECK: error: invalid arch name 'rv32ivzvlsseg', experimental extension requires explicit version number `v`
+.attribute arch, "rv32iv"
+# CHECK: error: invalid arch name 'rv32iv', experimental extension requires explicit version number `v`
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 54e496c61f4ad..3d7be42caeb56 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -36,7 +36,7 @@
 ## Experimental extensions require version string to be explicitly specified
 
 .attribute arch, "rv32iv0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32izba1p0"
 # CHECK: attribute      5, "rv32i2p0_zba1p0"
@@ -74,56 +74,56 @@
 .attribute arch, "rv32ifzfh1p0"
 # CHECK: attribute      5, "rv32i2p0_f2p0_zfh1p0_zfhmin1p0"
 
-.attribute arch, "rv32iv0p10zvlsseg0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+.attribute arch, "rv32iv0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl32b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl64b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl128b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl256b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl512b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl1024b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl2048b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl4096b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl8192b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32iv0p10zvl16384b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32iv0p10zvl32768b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32iv0p10zvl65536b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl65536b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl65536b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32i_zve32x0p10"
-# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zvl32b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zvl32b0p10"
 
 .attribute arch, "rv32if_zve32f0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zvl32b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zvl32b0p10"
 
 .attribute arch, "rv32i_zve64x0p10"
-# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zve64x0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32if_zve64f0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32ifd_zve64d0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
diff --git a/llvm/test/MC/RISCV/rvv/zvlsseg.s b/llvm/test/MC/RISCV/rvv/zvlsseg.s
index 99f5e157131fe..6845839fcaa33 100644
--- a/llvm/test/MC/RISCV/rvv/zvlsseg.s
+++ b/llvm/test/MC/RISCV/rvv/zvlsseg.s
@@ -1,3036 +1,3034 @@
 # RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
-# RUN:   --mattr=+experimental-zvlsseg --riscv-no-aliases \
+# RUN:   --riscv-no-aliases \
 # RUN:   | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v \
-# RUN:   --mattr=+experimental-zvlsseg %s \
-# RUN:   | llvm-objdump -d --mattr=+experimental-v --mattr=+experimental-zvlsseg -M no-aliases - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN:   | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v \
-# RUN:   --mattr=+experimental-zvlsseg %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vlseg2e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 20 
 
 vlseg2e8.v v8, (a0)
 # CHECK-INST: vlseg2e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 22 
 
 vlseg2e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 20 
 
 vlseg2e16.v v8, (a0)
 # CHECK-INST: vlseg2e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 22 
 
 vlseg2e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 20 
 
 vlseg2e32.v v8, (a0)
 # CHECK-INST: vlseg2e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 22 
 
 vlseg2e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 20 
 
 vlseg2e64.v v8, (a0)
 # CHECK-INST: vlseg2e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 22 
 
 vlseg2e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 21 
 
 vlseg2e8ff.v v8, (a0)
 # CHECK-INST: vlseg2e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 23 
 
 vlseg2e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 21 
 
 vlseg2e16ff.v v8, (a0)
 # CHECK-INST: vlseg2e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 23 
 
 vlseg2e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 21 
 
 vlseg2e32ff.v v8, (a0)
 # CHECK-INST: vlseg2e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 23 
 
 vlseg2e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 21 
 
 vlseg2e64ff.v v8, (a0)
 # CHECK-INST: vlseg2e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 23 
 
 vlsseg2e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 28 
 
 vlsseg2e8.v v8, (a0), a1
 # CHECK-INST: vlsseg2e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 2a 
 
 vlsseg2e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 28 
 
 vlsseg2e16.v v8, (a0), a1
 # CHECK-INST: vlsseg2e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 2a 
 
 vlsseg2e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 28 
 
 vlsseg2e32.v v8, (a0), a1
 # CHECK-INST: vlsseg2e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 2a 
 
 vlsseg2e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 28 
 
 vlsseg2e64.v v8, (a0), a1
 # CHECK-INST: vlsseg2e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 2a 
 
 vluxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 24 
 
 vluxseg2ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 26 
 
 vluxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 24 
 
 vluxseg2ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 26 
 
 vluxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 24 
 
 vluxseg2ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 26 
 
 vluxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 24 
 
 vluxseg2ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 26 
 
 vloxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 2c 
 
 vloxseg2ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 2e 
 
 vloxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 2c 
 
 vloxseg2ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 2e 
 
 vloxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 2c 
 
 vloxseg2ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 2e 
 
 vloxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 2c 
 
 vloxseg2ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 2e 
 
 vlseg3e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 40 
 
 vlseg3e8.v v8, (a0)
 # CHECK-INST: vlseg3e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 42 
 
 vlseg3e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 40 
 
 vlseg3e16.v v8, (a0)
 # CHECK-INST: vlseg3e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 42 
 
 vlseg3e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 40 
 
 vlseg3e32.v v8, (a0)
 # CHECK-INST: vlseg3e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 42 
 
 vlseg3e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 40 
 
 vlseg3e64.v v8, (a0)
 # CHECK-INST: vlseg3e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 42 
 
 vlseg3e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 41 
 
 vlseg3e8ff.v v8, (a0)
 # CHECK-INST: vlseg3e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 43 
 
 vlseg3e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 41 
 
 vlseg3e16ff.v v8, (a0)
 # CHECK-INST: vlseg3e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 43 
 
 vlseg3e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 41 
 
 vlseg3e32ff.v v8, (a0)
 # CHECK-INST: vlseg3e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 43 
 
 vlseg3e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 41 
 
 vlseg3e64ff.v v8, (a0)
 # CHECK-INST: vlseg3e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 43 
 
 vlsseg3e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 48 
 
 vlsseg3e8.v v8, (a0), a1
 # CHECK-INST: vlsseg3e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 4a 
 
 vlsseg3e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 48 
 
 vlsseg3e16.v v8, (a0), a1
 # CHECK-INST: vlsseg3e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 4a 
 
 vlsseg3e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 48 
 
 vlsseg3e32.v v8, (a0), a1
 # CHECK-INST: vlsseg3e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 4a 
 
 vlsseg3e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 48 
 
 vlsseg3e64.v v8, (a0), a1
 # CHECK-INST: vlsseg3e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 4a 
 
 vluxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 44 
 
 vluxseg3ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 46 
 
 vluxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 44 
 
 vluxseg3ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 46 
 
 vluxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 44 
 
 vluxseg3ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 46 
 
 vluxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 44 
 
 vluxseg3ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 46 
 
 vloxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 4c 
 
 vloxseg3ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 4e 
 
 vloxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 4c 
 
 vloxseg3ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 4e 
 
 vloxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 4c 
 
 vloxseg3ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 4e 
 
 vloxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 4c 
 
 vloxseg3ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 4e 
 
 vlseg4e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 60 
 
 vlseg4e8.v v8, (a0)
 # CHECK-INST: vlseg4e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 62 
 
 vlseg4e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 60 
 
 vlseg4e16.v v8, (a0)
 # CHECK-INST: vlseg4e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 62 
 
 vlseg4e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 60 
 
 vlseg4e32.v v8, (a0)
 # CHECK-INST: vlseg4e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 62 
 
 vlseg4e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 60 
 
 vlseg4e64.v v8, (a0)
 # CHECK-INST: vlseg4e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 62 
 
 vlseg4e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 61 
 
 vlseg4e8ff.v v8, (a0)
 # CHECK-INST: vlseg4e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 63 
 
 vlseg4e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 61 
 
 vlseg4e16ff.v v8, (a0)
 # CHECK-INST: vlseg4e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 63 
 
 vlseg4e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 61 
 
 vlseg4e32ff.v v8, (a0)
 # CHECK-INST: vlseg4e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 63 
 
 vlseg4e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 61 
 
 vlseg4e64ff.v v8, (a0)
 # CHECK-INST: vlseg4e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 63 
 
 vlsseg4e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 68 
 
 vlsseg4e8.v v8, (a0), a1
 # CHECK-INST: vlsseg4e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 6a 
 
 vlsseg4e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 68 
 
 vlsseg4e16.v v8, (a0), a1
 # CHECK-INST: vlsseg4e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 6a 
 
 vlsseg4e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 68 
 
 vlsseg4e32.v v8, (a0), a1
 # CHECK-INST: vlsseg4e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 6a 
 
 vlsseg4e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 68 
 
 vlsseg4e64.v v8, (a0), a1
 # CHECK-INST: vlsseg4e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 6a 
 
 vluxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 64 
 
 vluxseg4ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 66 
 
 vluxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 64 
 
 vluxseg4ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 66 
 
 vluxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 64 
 
 vluxseg4ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 66 
 
 vluxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 64 
 
 vluxseg4ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 66 
 
 vloxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 6c 
 
 vloxseg4ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 6e 
 
 vloxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 6c 
 
 vloxseg4ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 6e 
 
 vloxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 6c 
 
 vloxseg4ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 6e 
 
 vloxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 6c 
 
 vloxseg4ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 6e 
 
 vlseg5e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 80 
 
 vlseg5e8.v v8, (a0)
 # CHECK-INST: vlseg5e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 82 
 
 vlseg5e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 80 
 
 vlseg5e16.v v8, (a0)
 # CHECK-INST: vlseg5e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 82 
 
 vlseg5e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 80 
 
 vlseg5e32.v v8, (a0)
 # CHECK-INST: vlseg5e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 82 
 
 vlseg5e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 80 
 
 vlseg5e64.v v8, (a0)
 # CHECK-INST: vlseg5e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 82 
 
 vlseg5e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 81 
 
 vlseg5e8ff.v v8, (a0)
 # CHECK-INST: vlseg5e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 83 
 
 vlseg5e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 81 
 
 vlseg5e16ff.v v8, (a0)
 # CHECK-INST: vlseg5e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 83 
 
 vlseg5e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 81 
 
 vlseg5e32ff.v v8, (a0)
 # CHECK-INST: vlseg5e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 83 
 
 vlseg5e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 81 
 
 vlseg5e64ff.v v8, (a0)
 # CHECK-INST: vlseg5e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 83 
 
 vlsseg5e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 88 
 
 vlsseg5e8.v v8, (a0), a1
 # CHECK-INST: vlsseg5e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 8a 
 
 vlsseg5e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 88 
 
 vlsseg5e16.v v8, (a0), a1
 # CHECK-INST: vlsseg5e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 8a 
 
 vlsseg5e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 88 
 
 vlsseg5e32.v v8, (a0), a1
 # CHECK-INST: vlsseg5e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 8a 
 
 vlsseg5e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 88 
 
 vlsseg5e64.v v8, (a0), a1
 # CHECK-INST: vlsseg5e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 8a 
 
 vluxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 84 
 
 vluxseg5ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 86 
 
 vluxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 84 
 
 vluxseg5ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 86 
 
 vluxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 84 
 
 vluxseg5ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 86 
 
 vluxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 84 
 
 vluxseg5ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 86 
 
 vloxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 8c 
 
 vloxseg5ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 8e 
 
 vloxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 8c 
 
 vloxseg5ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 8e 
 
 vloxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 8c 
 
 vloxseg5ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 8e 
 
 vloxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 8c 
 
 vloxseg5ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 8e 
 
 vlseg6e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a0 
 
 vlseg6e8.v v8, (a0)
 # CHECK-INST: vlseg6e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a2 
 
 vlseg6e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a0 
 
 vlseg6e16.v v8, (a0)
 # CHECK-INST: vlseg6e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a2 
 
 vlseg6e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a0 
 
 vlseg6e32.v v8, (a0)
 # CHECK-INST: vlseg6e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a2 
 
 vlseg6e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a0 
 
 vlseg6e64.v v8, (a0)
 # CHECK-INST: vlseg6e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a2 
 
 vlseg6e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a1 
 
 vlseg6e8ff.v v8, (a0)
 # CHECK-INST: vlseg6e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a3 
 
 vlseg6e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a1 
 
 vlseg6e16ff.v v8, (a0)
 # CHECK-INST: vlseg6e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a3 
 
 vlseg6e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a1 
 
 vlseg6e32ff.v v8, (a0)
 # CHECK-INST: vlseg6e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a3 
 
 vlseg6e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a1 
 
 vlseg6e64ff.v v8, (a0)
 # CHECK-INST: vlseg6e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a3 
 
 vlsseg6e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 a8 
 
 vlsseg6e8.v v8, (a0), a1
 # CHECK-INST: vlsseg6e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 aa 
 
 vlsseg6e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 a8 
 
 vlsseg6e16.v v8, (a0), a1
 # CHECK-INST: vlsseg6e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 aa 
 
 vlsseg6e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 a8 
 
 vlsseg6e32.v v8, (a0), a1
 # CHECK-INST: vlsseg6e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 aa 
 
 vlsseg6e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 a8 
 
 vlsseg6e64.v v8, (a0), a1
 # CHECK-INST: vlsseg6e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 aa 
 
 vluxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 a4 
 
 vluxseg6ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 a6 
 
 vluxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 a4 
 
 vluxseg6ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 a6 
 
 vluxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 a4 
 
 vluxseg6ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 a6 
 
 vluxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 a4 
 
 vluxseg6ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 a6 
 
 vloxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ac 
 
 vloxseg6ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ae 
 
 vloxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ac 
 
 vloxseg6ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ae 
 
 vloxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ac 
 
 vloxseg6ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ae 
 
 vloxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ac 
 
 vloxseg6ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ae 
 
 vlseg7e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c0 
 
 vlseg7e8.v v8, (a0)
 # CHECK-INST: vlseg7e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c2 
 
 vlseg7e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c0 
 
 vlseg7e16.v v8, (a0)
 # CHECK-INST: vlseg7e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c2 
 
 vlseg7e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c0 
 
 vlseg7e32.v v8, (a0)
 # CHECK-INST: vlseg7e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c2 
 
 vlseg7e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c0 
 
 vlseg7e64.v v8, (a0)
 # CHECK-INST: vlseg7e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c2 
 
 vlseg7e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c1 
 
 vlseg7e8ff.v v8, (a0)
 # CHECK-INST: vlseg7e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c3 
 
 vlseg7e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c1 
 
 vlseg7e16ff.v v8, (a0)
 # CHECK-INST: vlseg7e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c3 
 
 vlseg7e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c1 
 
 vlseg7e32ff.v v8, (a0)
 # CHECK-INST: vlseg7e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c3 
 
 vlseg7e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c1 
 
 vlseg7e64ff.v v8, (a0)
 # CHECK-INST: vlseg7e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c3 
 
 vlsseg7e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 c8 
 
 vlsseg7e8.v v8, (a0), a1
 # CHECK-INST: vlsseg7e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 ca 
 
 vlsseg7e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 c8 
 
 vlsseg7e16.v v8, (a0), a1
 # CHECK-INST: vlsseg7e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 ca 
 
 vlsseg7e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 c8 
 
 vlsseg7e32.v v8, (a0), a1
 # CHECK-INST: vlsseg7e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 ca 
 
 vlsseg7e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 c8 
 
 vlsseg7e64.v v8, (a0), a1
 # CHECK-INST: vlsseg7e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 ca 
 
 vluxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 c4 
 
 vluxseg7ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 c6 
 
 vluxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 c4 
 
 vluxseg7ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 c6 
 
 vluxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 c4 
 
 vluxseg7ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 c6 
 
 vluxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 c4 
 
 vluxseg7ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 c6 
 
 vloxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 cc 
 
 vloxseg7ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ce 
 
 vloxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 cc 
 
 vloxseg7ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ce 
 
 vloxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 cc 
 
 vloxseg7ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ce 
 
 vloxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 cc 
 
 vloxseg7ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ce 
 
 vlseg8e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e0 
 
 vlseg8e8.v v8, (a0)
 # CHECK-INST: vlseg8e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e2 
 
 vlseg8e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e0 
 
 vlseg8e16.v v8, (a0)
 # CHECK-INST: vlseg8e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e2 
 
 vlseg8e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e0 
 
 vlseg8e32.v v8, (a0)
 # CHECK-INST: vlseg8e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e2 
 
 vlseg8e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e0 
 
 vlseg8e64.v v8, (a0)
 # CHECK-INST: vlseg8e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e2 
 
 vlseg8e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e1 
 
 vlseg8e8ff.v v8, (a0)
 # CHECK-INST: vlseg8e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e3 
 
 vlseg8e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e1 
 
 vlseg8e16ff.v v8, (a0)
 # CHECK-INST: vlseg8e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e3 
 
 vlseg8e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e1 
 
 vlseg8e32ff.v v8, (a0)
 # CHECK-INST: vlseg8e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e3 
 
 vlseg8e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e1 
 
 vlseg8e64ff.v v8, (a0)
 # CHECK-INST: vlseg8e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e3 
 
 vlsseg8e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 e8 
 
 vlsseg8e8.v v8, (a0), a1
 # CHECK-INST: vlsseg8e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 ea 
 
 vlsseg8e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 e8 
 
 vlsseg8e16.v v8, (a0), a1
 # CHECK-INST: vlsseg8e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 ea 
 
 vlsseg8e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 e8 
 
 vlsseg8e32.v v8, (a0), a1
 # CHECK-INST: vlsseg8e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 ea 
 
 vlsseg8e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 e8 
 
 vlsseg8e64.v v8, (a0), a1
 # CHECK-INST: vlsseg8e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 ea 
 
 vluxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 e4 
 
 vluxseg8ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 e6 
 
 vluxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 e4 
 
 vluxseg8ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 e6 
 
 vluxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 e4 
 
 vluxseg8ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 e6 
 
 vluxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 e4 
 
 vluxseg8ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 e6 
 
 vloxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ec 
 
 vloxseg8ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ee 
 
 vloxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ec 
 
 vloxseg8ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ee 
 
 vloxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ec 
 
 vloxseg8ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ee 
 
 vloxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ec 
 
 vloxseg8ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ee 
 
 vsseg2e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 20 
 
 vsseg2e8.v v24, (a0)
 # CHECK-INST: vsseg2e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 22 
 
 vsseg2e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 20 
 
 vsseg2e16.v v24, (a0)
 # CHECK-INST: vsseg2e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 22 
 
 vsseg2e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 20 
 
 vsseg2e32.v v24, (a0)
 # CHECK-INST: vsseg2e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 22 
 
 vsseg2e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 20 
 
 vsseg2e64.v v24, (a0)
 # CHECK-INST: vsseg2e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 22 
 
 vssseg2e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 28 
 
 vssseg2e8.v v24, (a0), a1
 # CHECK-INST: vssseg2e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 2a 
 
 vssseg2e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 28 
 
 vssseg2e16.v v24, (a0), a1
 # CHECK-INST: vssseg2e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 2a 
 
 vssseg2e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 28 
 
 vssseg2e32.v v24, (a0), a1
 # CHECK-INST: vssseg2e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 2a 
 
 vssseg2e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 28 
 
 vssseg2e64.v v24, (a0), a1
 # CHECK-INST: vssseg2e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 2a 
 
 vsuxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 24 
 
 vsuxseg2ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 26 
 
 vsuxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 24 
 
 vsuxseg2ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 26 
 
 vsuxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 24 
 
 vsuxseg2ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 26 
 
 vsuxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 24 
 
 vsuxseg2ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 26 
 
 vsoxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 2c 
 
 vsoxseg2ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 2e 
 
 vsoxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 2c 
 
 vsoxseg2ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 2e 
 
 vsoxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 2c 
 
 vsoxseg2ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 2e 
 
 vsoxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 2c 
 
 vsoxseg2ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 2e 
 
 vsseg3e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 40 
 
 vsseg3e8.v v24, (a0)
 # CHECK-INST: vsseg3e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 42 
 
 vsseg3e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 40 
 
 vsseg3e16.v v24, (a0)
 # CHECK-INST: vsseg3e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 42 
 
 vsseg3e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 40 
 
 vsseg3e32.v v24, (a0)
 # CHECK-INST: vsseg3e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 42 
 
 vsseg3e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 40 
 
 vsseg3e64.v v24, (a0)
 # CHECK-INST: vsseg3e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 42 
 
 vssseg3e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 48 
 
 vssseg3e8.v v24, (a0), a1
 # CHECK-INST: vssseg3e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 4a 
 
 vssseg3e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 48 
 
 vssseg3e16.v v24, (a0), a1
 # CHECK-INST: vssseg3e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 4a 
 
 vssseg3e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 48 
 
 vssseg3e32.v v24, (a0), a1
 # CHECK-INST: vssseg3e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 4a 
 
 vssseg3e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 48 
 
 vssseg3e64.v v24, (a0), a1
 # CHECK-INST: vssseg3e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 4a 
 
 vsuxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 44 
 
 vsuxseg3ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 46 
 
 vsuxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 44 
 
 vsuxseg3ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 46 
 
 vsuxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 44 
 
 vsuxseg3ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 46 
 
 vsuxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 44 
 
 vsuxseg3ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 46 
 
 vsoxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 4c 
 
 vsoxseg3ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 4e 
 
 vsoxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 4c 
 
 vsoxseg3ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 4e 
 
 vsoxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 4c 
 
 vsoxseg3ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 4e 
 
 vsoxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 4c 
 
 vsoxseg3ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 4e 
 
 vsseg4e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 60 
 
 vsseg4e8.v v24, (a0)
 # CHECK-INST: vsseg4e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 62 
 
 vsseg4e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 60 
 
 vsseg4e16.v v24, (a0)
 # CHECK-INST: vsseg4e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 62 
 
 vsseg4e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 60 
 
 vsseg4e32.v v24, (a0)
 # CHECK-INST: vsseg4e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 62 
 
 vsseg4e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 60 
 
 vsseg4e64.v v24, (a0)
 # CHECK-INST: vsseg4e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 62 
 
 vssseg4e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 68 
 
 vssseg4e8.v v24, (a0), a1
 # CHECK-INST: vssseg4e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 6a 
 
 vssseg4e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 68 
 
 vssseg4e16.v v24, (a0), a1
 # CHECK-INST: vssseg4e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 6a 
 
 vssseg4e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 68 
 
 vssseg4e32.v v24, (a0), a1
 # CHECK-INST: vssseg4e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 6a 
 
 vssseg4e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 68 
 
 vssseg4e64.v v24, (a0), a1
 # CHECK-INST: vssseg4e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 6a 
 
 vsuxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 64 
 
 vsuxseg4ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 66 
 
 vsuxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 64 
 
 vsuxseg4ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 66 
 
 vsuxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 64 
 
 vsuxseg4ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 66 
 
 vsuxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 64 
 
 vsuxseg4ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 66 
 
 vsoxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 6c 
 
 vsoxseg4ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 6e 
 
 vsoxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 6c 
 
 vsoxseg4ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 6e 
 
 vsoxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 6c 
 
 vsoxseg4ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 6e 
 
 vsoxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 6c 
 
 vsoxseg4ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 6e 
 
 vsseg5e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 80 
 
 vsseg5e8.v v24, (a0)
 # CHECK-INST: vsseg5e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 82 
 
 vsseg5e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 80 
 
 vsseg5e16.v v24, (a0)
 # CHECK-INST: vsseg5e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 82 
 
 vsseg5e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 80 
 
 vsseg5e32.v v24, (a0)
 # CHECK-INST: vsseg5e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 82 
 
 vsseg5e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 80 
 
 vsseg5e64.v v24, (a0)
 # CHECK-INST: vsseg5e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 82 
 
 vssseg5e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 88 
 
 vssseg5e8.v v24, (a0), a1
 # CHECK-INST: vssseg5e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 8a 
 
 vssseg5e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 88 
 
 vssseg5e16.v v24, (a0), a1
 # CHECK-INST: vssseg5e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 8a 
 
 vssseg5e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 88 
 
 vssseg5e32.v v24, (a0), a1
 # CHECK-INST: vssseg5e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 8a 
 
 vssseg5e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 88 
 
 vssseg5e64.v v24, (a0), a1
 # CHECK-INST: vssseg5e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 8a 
 
 vsuxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 84 
 
 vsuxseg5ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 86 
 
 vsuxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 84 
 
 vsuxseg5ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 86 
 
 vsuxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 84 
 
 vsuxseg5ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 86 
 
 vsuxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 84 
 
 vsuxseg5ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 86 
 
 vsoxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 8c 
 
 vsoxseg5ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 8e 
 
 vsoxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 8c 
 
 vsoxseg5ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 8e 
 
 vsoxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 8c 
 
 vsoxseg5ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 8e 
 
 vsoxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 8c 
 
 vsoxseg5ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 8e 
 
 vsseg6e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 a0 
 
 vsseg6e8.v v24, (a0)
 # CHECK-INST: vsseg6e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 a2 
 
 vsseg6e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 a0 
 
 vsseg6e16.v v24, (a0)
 # CHECK-INST: vsseg6e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 a2 
 
 vsseg6e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 a0 
 
 vsseg6e32.v v24, (a0)
 # CHECK-INST: vsseg6e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 a2 
 
 vsseg6e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 a0 
 
 vsseg6e64.v v24, (a0)
 # CHECK-INST: vsseg6e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 a2 
 
 vssseg6e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 a8 
 
 vssseg6e8.v v24, (a0), a1
 # CHECK-INST: vssseg6e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 aa 
 
 vssseg6e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 a8 
 
 vssseg6e16.v v24, (a0), a1
 # CHECK-INST: vssseg6e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 aa 
 
 vssseg6e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 a8 
 
 vssseg6e32.v v24, (a0), a1
 # CHECK-INST: vssseg6e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 aa 
 
 vssseg6e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 a8 
 
 vssseg6e64.v v24, (a0), a1
 # CHECK-INST: vssseg6e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 aa 
 
 vsuxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 a4 
 
 vsuxseg6ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 a6 
 
 vsuxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 a4 
 
 vsuxseg6ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 a6 
 
 vsuxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 a4 
 
 vsuxseg6ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 a6 
 
 vsuxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 a4 
 
 vsuxseg6ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 a6 
 
 vsoxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ac 
 
 vsoxseg6ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ae 
 
 vsoxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ac 
 
 vsoxseg6ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ae 
 
 vsoxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ac 
 
 vsoxseg6ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ae 
 
 vsoxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ac 
 
 vsoxseg6ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ae 
 
 vsseg7e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 c0 
 
 vsseg7e8.v v24, (a0)
 # CHECK-INST: vsseg7e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 c2 
 
 vsseg7e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 c0 
 
 vsseg7e16.v v24, (a0)
 # CHECK-INST: vsseg7e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 c2 
 
 vsseg7e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 c0 
 
 vsseg7e32.v v24, (a0)
 # CHECK-INST: vsseg7e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 c2 
 
 vsseg7e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 c0 
 
 vsseg7e64.v v24, (a0)
 # CHECK-INST: vsseg7e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 c2 
 
 vssseg7e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 c8 
 
 vssseg7e8.v v24, (a0), a1
 # CHECK-INST: vssseg7e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 ca 
 
 vssseg7e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 c8 
 
 vssseg7e16.v v24, (a0), a1
 # CHECK-INST: vssseg7e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 ca 
 
 vssseg7e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 c8 
 
 vssseg7e32.v v24, (a0), a1
 # CHECK-INST: vssseg7e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 ca 
 
 vssseg7e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 c8 
 
 vssseg7e64.v v24, (a0), a1
 # CHECK-INST: vssseg7e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 ca 
 
 vsuxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 c4 
 
 vsuxseg7ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 c6 
 
 vsuxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 c4 
 
 vsuxseg7ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 c6 
 
 vsuxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 c4 
 
 vsuxseg7ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 c6 
 
 vsuxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 c4 
 
 vsuxseg7ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 c6 
 
 vsoxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 cc 
 
 vsoxseg7ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ce 
 
 vsoxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 cc 
 
 vsoxseg7ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ce 
 
 vsoxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 cc 
 
 vsoxseg7ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ce 
 
 vsoxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 cc 
 
 vsoxseg7ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ce 
 
 vsseg8e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 e0 
 
 vsseg8e8.v v24, (a0)
 # CHECK-INST: vsseg8e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 e2 
 
 vsseg8e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 e0 
 
 vsseg8e16.v v24, (a0)
 # CHECK-INST: vsseg8e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 e2 
 
 vsseg8e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 e0 
 
 vsseg8e32.v v24, (a0)
 # CHECK-INST: vsseg8e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 e2 
 
 vsseg8e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 e0 
 
 vsseg8e64.v v24, (a0)
 # CHECK-INST: vsseg8e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 e2 
 
 vssseg8e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 e8 
 
 vssseg8e8.v v24, (a0), a1
 # CHECK-INST: vssseg8e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 ea 
 
 vssseg8e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 e8 
 
 vssseg8e16.v v24, (a0), a1
 # CHECK-INST: vssseg8e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 ea 
 
 vssseg8e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 e8 
 
 vssseg8e32.v v24, (a0), a1
 # CHECK-INST: vssseg8e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 ea 
 
 vssseg8e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 e8 
 
 vssseg8e64.v v24, (a0), a1
 # CHECK-INST: vssseg8e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 ea 
 
 vsuxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 e4 
 
 vsuxseg8ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 e6 
 
 vsuxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 e4 
 
 vsuxseg8ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 e6 
 
 vsuxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 e4 
 
 vsuxseg8ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 e6 
 
 vsuxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 e4 
 
 vsuxseg8ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 e6 
 
 vsoxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ec 
 
 vsoxseg8ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ee 
 
 vsoxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ec 
 
 vsoxseg8ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ee 
 
 vsoxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ec 
 
 vsoxseg8ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ee 
 
 vsoxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ec 
 
 vsoxseg8ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ee 

From 118babe67adfe3eed1a6d89e3c2d5a70ba8322a4 Mon Sep 17 00:00:00 2001
From: Philip Reames 
Date: Thu, 20 Jan 2022 12:44:20 -0800
Subject: [PATCH 079/946] [SLP] Use for loops for walking bundle elements

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d9d02cb56c447..dfda6a97303c2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2521,12 +2521,11 @@ class BoUpSLP {
       SD->IsScheduled = true;
       LLVM_DEBUG(dbgs() << "SLP:   schedule " << *SD << "\n");
 
-      ScheduleData *BundleMember = SD;
-      while (BundleMember) {
-        if (BundleMember->Inst != BundleMember->OpValue) {
-          BundleMember = BundleMember->NextInBundle;
+      for (ScheduleData *BundleMember = SD; BundleMember;
+           BundleMember = BundleMember->NextInBundle) {
+        if (BundleMember->Inst != BundleMember->OpValue)
           continue;
-        }
+        
         // Handle the def-use chain dependencies.
 
         // Decrement the unscheduled counter and insert to ready list if ready.
@@ -2591,7 +2590,6 @@ class BoUpSLP {
                        << "SLP:    gets ready (mem): " << *DepBundle << "\n");
           }
         }
-        BundleMember = BundleMember->NextInBundle;
       }
     }
 
@@ -7659,8 +7657,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
 
     // Move the scheduled instruction(s) to their dedicated places, if not
     // there yet.
-    ScheduleData *BundleMember = picked;
-    while (BundleMember) {
+    for (ScheduleData *BundleMember = picked; BundleMember;
+         BundleMember = BundleMember->NextInBundle) {
       Instruction *pickedInst = BundleMember->Inst;
       if (pickedInst->getNextNode() != LastScheduledInst) {
         BS->BB->getInstList().remove(pickedInst);
@@ -7668,7 +7666,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
                                      pickedInst);
       }
       LastScheduledInst = pickedInst;
-      BundleMember = BundleMember->NextInBundle;
     }
 
     BS->schedule(picked, ReadyInsts);

From 40aef79db0b02b171a65b3a13053ae963a3e8753 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak 
Date: Mon, 10 Jan 2022 23:53:58 +0000
Subject: [PATCH 080/946] [MLIR][GPU] Add debug output to enable dumping GPU
 assembly

- Set the DEBUG_TYPE of SerializeToBlob to serialize-to-blob
- Add debug output to print the assembly or PTX for GPU modules before
  they are assembled and linked

Note that, as SerializeToBlob is a superclass of SerializeToCubin and
SerializeToHsaco, --debug-only=serialize-to-blom will dump the
intermediate compiler result for both of these passes.

In addition, if LLVM options such as --stop-after are used to control
the GPU kernel compilation process, the debug output will contain the
appropriate intermediate IR.

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D117519
---
 mlir/include/mlir/Dialect/GPU/Passes.h               |  7 +++++++
 mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp  | 10 ++++++++++
 mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp | 11 +++++++++++
 3 files changed, 28 insertions(+)

diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h
index c9b396edfdeb0..c9c6f8668b4d3 100644
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@@ -102,6 +102,13 @@ void registerGpuSerializeToCubinPass();
 /// annotation.
 void registerGpuSerializeToHsacoPass();
 
+/// Create an instance of the GPU kernel function to HSAco binary serialization
+/// pass.
+std::unique_ptr createGpuSerializeToHsacoPass(StringRef triple,
+                                                    StringRef arch,
+                                                    StringRef features,
+                                                    int optLevel);
+
 /// Generate the code for registering passes.
 #define GEN_PASS_REGISTRATION
 #include "mlir/Dialect/GPU/Passes.h.inc"
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
index 6ae4662006089..8dadb630f4a94 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
@@ -21,6 +21,10 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"
 
+#include 
+
+#define DEBUG_TYPE "serialize-to-blob"
+
 using namespace mlir;
 
 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
@@ -76,6 +80,12 @@ void gpu::SerializeToBlobPass::runOnOperation() {
 
   std::string targetISA = std::move(maybeTargetISA.getValue());
 
+  LLVM_DEBUG({
+    llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
+    llvm::dbgs() << targetISA << "\n";
+    llvm::dbgs().flush();
+  });
+
   // Serialize the target ISA.
   std::unique_ptr> blob = serializeISA(targetISA);
   if (!blob)
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
index 75d14d2bb93ea..d9209b9012dd5 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -479,6 +479,17 @@ void mlir::registerGpuSerializeToHsacoPass() {
                                                       "", 2);
       });
 }
+
+/// Create an instance of the GPU kernel function to HSAco binary serialization
+/// pass.
+std::unique_ptr mlir::createGpuSerializeToHsacoPass(StringRef triple,
+                                                          StringRef arch,
+                                                          StringRef features,
+                                                          int optLevel) {
+  return std::make_unique(triple, arch, features,
+                                                optLevel);
+}
+
 #else  // MLIR_GPU_TO_HSACO_PASS_ENABLE
 void mlir::registerGpuSerializeToHsacoPass() {}
 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE

From dd7b69a61fa382737f06ec36a133d6db645f4cb0 Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Thu, 20 Jan 2022 12:53:12 -0800
Subject: [PATCH 081/946] [RISCV] Remove HadStdExtV and HasStdZve* Predicates
 from tablegen.

No instructions should be using these. Everything should use
HasVInstructions* Predicates. Remove them so that they can't be
used by accident.
---
 llvm/lib/Target/RISCV/RISCV.td             | 9 ---------
 llvm/lib/Target/RISCV/RISCVSchedRocket.td  | 2 +-
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 2 +-
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 36c7263235ab5..7972ced08edd8 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -166,43 +166,34 @@ def FeatureStdExtZve32x
                        "'Zve32x' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW)",
                        [FeatureStdExtZvl32b]>;
-def HasStdExtZve32x : Predicate<"SubTarget->hasStdExtZve32x()">,
-                                 AssemblerPredicate<(all_of FeatureStdExtZve32x),
-                                 "'Zve32x' (Vector Extensions for Embedded Processors "
-                                 "with maximal 32 EEW)">;
 
 def FeatureStdExtZve32f
     : SubtargetFeature<"experimental-zve32f", "HasStdExtZve32f", "true",
                        "'Zve32f' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW and F extension)",
                        [FeatureStdExtZve32x]>;
-def HasStdExtZve32f : Predicate<"SubTarget->hasStdExtZve32f()">;
 
 def FeatureStdExtZve64x
     : SubtargetFeature<"experimental-zve64x", "HasStdExtZve64x", "true",
                        "'Zve64x' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW)", [FeatureStdExtZve32x, FeatureStdExtZvl64b]>;
-def HasStdExtZve64x : Predicate<"SubTarget->hasStdExtZve64x()">;
 
 def FeatureStdExtZve64f
     : SubtargetFeature<"experimental-zve64f", "HasStdExtZve64f", "true",
                        "'Zve64f' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW and F extension)",
                        [FeatureStdExtZve32f, FeatureStdExtZve64x]>;
-def HasStdExtZve64f : Predicate<"SubTarget->hasStdExtZve64f()">;
 
 def FeatureStdExtZve64d
     : SubtargetFeature<"experimental-zve64d", "HasStdExtZve64d", "true",
                        "'Zve64d' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW, F and D extension)",
                        [FeatureStdExtZve64f]>;
-def HasStdExtZve64d : Predicate<"SubTarget->hasStdExtZve64d()">;
 
 def FeatureStdExtV
     : SubtargetFeature<"experimental-v", "HasStdExtV", "true",
                        "'V' (Vector Extension for Application Processors)",
                        [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>;
-def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">;
 
 def HasVInstructions    : Predicate<"Subtarget->hasVInstructions()">,
       AssemblerPredicate<
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 4655015a9d1ec..b907ada3a1d5a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasStdExtV, HasVInstructions, HasVInstructionsI64];
+  let UnsupportedFeatures = [HasVInstructions, HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 3b3e2699d6b60..5672637a40cc2 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasStdExtV];
+  let UnsupportedFeatures = [HasVInstructions];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.

From 4c1dc65015ae8ba4b7e0ac56f4d88d29e712ce25 Mon Sep 17 00:00:00 2001
From: Sanjay Patel 
Date: Thu, 20 Jan 2022 14:41:01 -0500
Subject: [PATCH 082/946] [InstCombine] add/adjust tests for multiply with
 extended bool; NFC

---
 llvm/test/Transforms/InstCombine/mul.ll | 51 +++++++++++++++++++++----
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 76dc39598dc7f..e99d1e9cfd181 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -103,32 +103,69 @@ define i32 @mul_bool(i32 %x, i1 %y) {
 ; CHECK-NEXT:    ret i32 [[M]]
 ;
   %z = zext i1 %y to i32
-  %m = mul i32 %x, %z
+  %m = mul i32 %z, %x
   ret i32 %m
 }
 
-; Commute and test vector type.
-
 define <2 x i32> @mul_bool_vec(<2 x i32> %x, <2 x i1> %y) {
 ; CHECK-LABEL: @mul_bool_vec(
 ; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %z = zext <2 x i1> %y to <2 x i32>
-  %m = mul <2 x i32> %x, %z
+  %m = mul <2 x i32> %z, %x
   ret <2 x i32> %m
 }
 
-define <2 x i32> @mul_bool_vec_commute(<2 x i32> %x, <2 x i1> %y) {
+define <2 x i32> @mul_bool_vec_commute(<2 x i32> %px, <2 x i1> %y) {
 ; CHECK-LABEL: @mul_bool_vec_commute(
-; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[X:%.*]] = mul <2 x i32> [[PX:%.*]], [[PX]]
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
+  %x = mul <2 x i32> %px, %px  ; thwart complexity-based canonicalization
   %z = zext <2 x i1> %y to <2 x i32>
-  %m = mul <2 x i32> %z, %x
+  %m = mul <2 x i32> %x, %z
   ret <2 x i32> %m
 }
 
+; X * C (when X is a sext boolean) --> X ? -C : 0
+
+define i32 @mul_sext_bool(i1 %x) {
+; CHECK-LABEL: @mul_sext_bool(
+; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %s = sext i1 %x to i32
+  %m = mul i32 %s, 42
+  ret i32 %m
+}
+
+define i32 @mul_sext_bool_use(i1 %x) {
+; CHECK-LABEL: @mul_sext_bool_use(
+; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    call void @use32(i32 [[S]])
+; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %s = sext i1 %x to i32
+  call void @use32(i32 %s)
+  %m = mul i32 %s, 42
+  ret i32 %m
+}
+
+define <2 x i8> @mul_sext_bool_vec(<2 x i1> %x) {
+; CHECK-LABEL: @mul_sext_bool_vec(
+; CHECK-NEXT:    [[S:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[M:%.*]] = mul <2 x i8> [[S]], 
+; CHECK-NEXT:    ret <2 x i8> [[M]]
+;
+  %s = sext <2 x i1> %x to <2 x i8>
+  %m = mul <2 x i8> %s, 
+  ret <2 x i8> %m
+}
+
 define <3 x i7> @mul_bools(<3 x i1> %x, <3 x i1> %y) {
 ; CHECK-LABEL: @mul_bools(
 ; CHECK-NEXT:    [[MULBOOL:%.*]] = and <3 x i1> [[X:%.*]], [[Y:%.*]]

From a7a2860d0eee37d9e0fd0b6a8e3d884f8ee4ec16 Mon Sep 17 00:00:00 2001
From: Sanjay Patel 
Date: Thu, 20 Jan 2022 14:51:45 -0500
Subject: [PATCH 083/946] [InstCombine] convert mul with sexted bool and
 constant to select

We already have the related folds for zext-of-bool, so it
should make things more consistent to have this transform
to select for sext-of-bool too:
https://alive2.llvm.org/ce/z/YikdfA

Fixes #53319
---
 .../Transforms/InstCombine/InstCombineMulDivRem.cpp  | 12 ++++++++++--
 llvm/test/Transforms/InstCombine/mul.ll              |  8 +++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index aca7ec8d7325a..076c3134d0782 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -348,13 +348,21 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
     return CastInst::Create(Instruction::SExt, And, I.getType());
   }
 
-  // (bool X) * Y --> X ? Y : 0
-  // Y * (bool X) --> X ? Y : 0
+  // (zext bool X) * Y --> X ? Y : 0
+  // Y * (zext bool X) --> X ? Y : 0
   if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(X, Op1, ConstantInt::get(I.getType(), 0));
   if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(X, Op0, ConstantInt::get(I.getType(), 0));
 
+  // (sext bool X) * C --> X ? -C : 0
+  Constant *ImmC;
+  if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1) &&
+      match(Op1, m_ImmConstant(ImmC))) {
+    Constant *NegC = ConstantExpr::getNeg(ImmC);
+    return SelectInst::Create(X, NegC, ConstantInt::getNullValue(I.getType()));
+  }
+
   // (lshr X, 31) * Y --> (ashr X, 31) & Y
   // Y * (lshr X, 31) --> (ashr X, 31) & Y
   // TODO: We are not checking one-use because the elimination of the multiply
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index e99d1e9cfd181..56947040f78a9 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -133,8 +133,7 @@ define <2 x i32> @mul_bool_vec_commute(<2 x i32> %px, <2 x i1> %y) {
 
 define i32 @mul_sext_bool(i1 %x) {
 ; CHECK-LABEL: @mul_sext_bool(
-; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[X:%.*]], i32 -42, i32 0
 ; CHECK-NEXT:    ret i32 [[M]]
 ;
   %s = sext i1 %x to i32
@@ -146,7 +145,7 @@ define i32 @mul_sext_bool_use(i1 %x) {
 ; CHECK-LABEL: @mul_sext_bool_use(
 ; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
 ; CHECK-NEXT:    call void @use32(i32 [[S]])
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[X]], i32 -42, i32 0
 ; CHECK-NEXT:    ret i32 [[M]]
 ;
   %s = sext i1 %x to i32
@@ -157,8 +156,7 @@ define i32 @mul_sext_bool_use(i1 %x) {
 
 define <2 x i8> @mul_sext_bool_vec(<2 x i1> %x) {
 ; CHECK-LABEL: @mul_sext_bool_vec(
-; CHECK-NEXT:    [[S:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[M:%.*]] = mul <2 x i8> [[S]], 
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> , <2 x i8> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i8> [[M]]
 ;
   %s = sext <2 x i1> %x to <2 x i8>

From 60f61918795b91f3fc9d310bc0957b75783f826b Mon Sep 17 00:00:00 2001
From: Philip Reames 
Date: Thu, 20 Jan 2022 13:06:55 -0800
Subject: [PATCH 084/946] [SLP] Extract formBundle helper for readability [NFC]

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 67 ++++++++++++-------
 1 file changed, 42 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dfda6a97303c2..1b4e90e78d955 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2618,6 +2618,10 @@ class BoUpSLP {
       }
     }
 
+    /// Build a bundle from the ScheduleData nodes corresponding to the
+    /// scalar instruction for each lane.
+    ScheduleData *buildBundle(ArrayRef VL);
+
     /// Checks if a bundle of instructions can be scheduled, i.e. has no
     /// cyclic dependencies. This is only a dry-run, no instructions are
     /// actually moved at this stage.
@@ -7227,6 +7231,33 @@ void BoUpSLP::optimizeGatherSequence() {
   GatherShuffleSeq.clear();
 }
 
+BoUpSLP::ScheduleData *
+BoUpSLP::BlockScheduling::buildBundle(ArrayRef VL) {
+  ScheduleData *Bundle = nullptr;  
+  ScheduleData *PrevInBundle = nullptr;
+  for (Value *V : VL) {
+    ScheduleData *BundleMember = getScheduleData(V);
+    assert(BundleMember &&
+           "no ScheduleData for bundle member "
+           "(maybe not in same basic block)");
+    assert(BundleMember->isSchedulingEntity() &&
+           "bundle member already part of other bundle");
+    if (PrevInBundle) {
+      PrevInBundle->NextInBundle = BundleMember;
+    } else {
+      Bundle = BundleMember;
+    }
+    BundleMember->UnscheduledDepsInBundle = 0;
+    Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
+
+    // Group the instructions to a bundle.
+    BundleMember->FirstInBundle = Bundle;
+    PrevInBundle = BundleMember;
+  }
+  assert(Bundle && "Failed to find schedule bundle");
+  return Bundle;
+};
+
 // Groups the instructions to a bundle (which is then a single scheduling entity)
 // and schedules instructions until the bundle gets ready.
 Optional
@@ -7239,9 +7270,6 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP,
 
   // Initialize the instruction bundle.
   Instruction *OldScheduleEnd = ScheduleEnd;
-  ScheduleData *PrevInBundle = nullptr;
-  ScheduleData *Bundle = nullptr;
-  bool ReSchedule = false;
   LLVM_DEBUG(dbgs() << "SLP:  bundle: " << *S.OpValue << "\n");
 
   auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
@@ -7293,33 +7321,22 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP,
     }
   }
 
+  bool ReSchedule = false;
   for (Value *V : VL) {
     ScheduleData *BundleMember = getScheduleData(V);
     assert(BundleMember &&
            "no ScheduleData for bundle member (maybe not in same basic block)");
-    if (BundleMember->IsScheduled) {
-      // A bundle member was scheduled as single instruction before and now
-      // needs to be scheduled as part of the bundle. We just get rid of the
-      // existing schedule.
-      LLVM_DEBUG(dbgs() << "SLP:  reset schedule because " << *BundleMember
-                        << " was already scheduled\n");
-      ReSchedule = true;
-    }
-    assert(BundleMember->isSchedulingEntity() &&
-           "bundle member already part of other bundle");
-    if (PrevInBundle) {
-      PrevInBundle->NextInBundle = BundleMember;
-    } else {
-      Bundle = BundleMember;
-    }
-    BundleMember->UnscheduledDepsInBundle = 0;
-    Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
-
-    // Group the instructions to a bundle.
-    BundleMember->FirstInBundle = Bundle;
-    PrevInBundle = BundleMember;
+    if (!BundleMember->IsScheduled)
+      continue;
+    // A bundle member was scheduled as single instruction before and now
+    // needs to be scheduled as part of the bundle. We just get rid of the
+    // existing schedule.
+    LLVM_DEBUG(dbgs() << "SLP:  reset schedule because " << *BundleMember
+                      << " was already scheduled\n");
+    ReSchedule = true;
   }
-  assert(Bundle && "Failed to find schedule bundle");
+
+  auto *Bundle = buildBundle(VL);
   TryScheduleBundleImpl(ReSchedule, Bundle);
   if (!Bundle->isReady()) {
     cancelScheduling(VL, S.OpValue);

From 5ef7abbc6f99b46c25c95ad462844990861e8927 Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Thu, 20 Jan 2022 12:54:03 -0800
Subject: [PATCH 085/946] [mlir:TiingInterface] Remove unnecessary include of
 Tensor.h

Interfaces in Interfaces/ should not depend on any dialects, and this include
is unnecessary anyways.
---
 mlir/lib/Interfaces/TilingInterface.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Interfaces/TilingInterface.cpp b/mlir/lib/Interfaces/TilingInterface.cpp
index 6e252ca5418fc..67ddb5b1c1c50 100644
--- a/mlir/lib/Interfaces/TilingInterface.cpp
+++ b/mlir/lib/Interfaces/TilingInterface.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Interfaces/TilingInterface.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 
 using namespace mlir;
 

From 1f3f90ab8869887561acadb2540836e7959419f5 Mon Sep 17 00:00:00 2001
From: Tue Ly 
Date: Thu, 20 Jan 2022 13:51:04 -0500
Subject: [PATCH 086/946] [libc] Make log2f correctly rounded for all rounding
 modes when FMA is not available.

Add to log2f 2 more exceptional cases got when not using fma for polyeval.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D117812
---
 libc/src/math/generic/log2f.cpp   | 15 ++++++++++++++-
 libc/test/src/math/log2f_test.cpp |  8 ++++----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/libc/src/math/generic/log2f.cpp b/libc/src/math/generic/log2f.cpp
index d2f40fe6763d0..dc5a6b670afd5 100644
--- a/libc/src/math/generic/log2f.cpp
+++ b/libc/src/math/generic/log2f.cpp
@@ -105,11 +105,24 @@ LLVM_LIBC_FUNCTION(float, log2f, (float x)) {
   int m = 0;
 
   // Hard to round value(s).
-  if (FPBits(x).uintval() == 0x3f81d0b5U) {
+  switch (FPBits(x).uintval()) {
+  case 0x3f81d0b5U: {
     int rounding_mode = fputil::get_round();
     if (rounding_mode == FE_DOWNWARD || rounding_mode == FE_TOWARDZERO) {
       return 0x1.4cdc4cp-6f;
     }
+    break;
+  }
+  case 0x3f7e3274U:
+    if (fputil::get_round() == FE_TONEAREST) {
+      return -0x1.4e1d16p-7f;
+    }
+    break;
+  case 0x3f7d57f5U:
+    if (fputil::get_round() == FE_TOWARDZERO) {
+      return -0x1.ed1c32p-7f;
+    }
+    break;
   }
 
   // Exceptional inputs.
diff --git a/libc/test/src/math/log2f_test.cpp b/libc/test/src/math/log2f_test.cpp
index 1708552f0c291..ecedfb68cc7a7 100644
--- a/libc/test/src/math/log2f_test.cpp
+++ b/libc/test/src/math/log2f_test.cpp
@@ -31,10 +31,10 @@ TEST(LlvmLibcLog2fTest, SpecialNumbers) {
 }
 
 TEST(LlvmLibcLog2fTest, TrickyInputs) {
-  constexpr int N = 9;
-  constexpr uint32_t INPUTS[N] = {0x3f7d57f5U, 0x3f7ed848U, 0x3f7fd6ccU,
-                                  0x3f7fffffU, 0x3f80079bU, 0x3f81d0b5U,
-                                  0x3f82e602U, 0x3f83c98dU, 0x3f8cba39U};
+  constexpr int N = 10;
+  constexpr uint32_t INPUTS[N] = {
+      0x3f7d57f5U, 0x3f7e3274U, 0x3f7ed848U, 0x3f7fd6ccU, 0x3f7fffffU,
+      0x3f80079bU, 0x3f81d0b5U, 0x3f82e602U, 0x3f83c98dU, 0x3f8cba39U};
 
   for (int i = 0; i < N; ++i) {
     float x = float(FPBits(INPUTS[i]));

From 8c9f62ea90c70d538766a81ef5980c9223b8566b Mon Sep 17 00:00:00 2001
From: John Ericson 
Date: Thu, 20 Jan 2022 19:04:15 +0000
Subject: [PATCH 087/946] [compiler-rt][cmake] Use HandleOutOfTreeLLVM like
 libcxx and friends

This gives us the option of using CMake modules from LLVM, and other
things. We will use that to deduplicate code later.

Reviewed By: phosek

Differential Revision: https://reviews.llvm.org/D117815
---
 compiler-rt/CMakeLists.txt | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 12946d74c797b..974e2333c7abd 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -5,13 +5,6 @@
 
 cmake_minimum_required(VERSION 3.13.4)
 
-# Check if compiler-rt is built as a standalone project.
-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
-  project(CompilerRT C CXX ASM)
-  set(COMPILER_RT_STANDALONE_BUILD TRUE)
-  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-endif()
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom compiler-rt modules.
@@ -22,6 +15,16 @@ list(INSERT CMAKE_MODULE_PATH 0
   "${LLVM_COMMON_CMAKE_UTILS}/Modules"
   )
 
+# Check if compiler-rt is built as a standalone project.
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
+  project(CompilerRT C CXX ASM)
+  set(COMPILER_RT_STANDALONE_BUILD TRUE)
+  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+
+  # Find the LLVM sources and simulate LLVM CMake options.
+  include(HandleOutOfTreeLLVM)
+endif()
+
 if(CMAKE_CONFIGURATION_TYPES)
   set(CMAKE_CFG_RESOLVED_INTDIR "${CMAKE_CFG_INTDIR}/")
 else()

From 4af11272f57a4a6fed2932e9e0857b2c1a707c51 Mon Sep 17 00:00:00 2001
From: John Ericson 
Date: Tue, 18 Jan 2022 23:34:54 +0000
Subject: [PATCH 088/946] [cmake] Duplicate
 `{llvm,compiler_rt}_check_linker_flag` for runtime libs and llvm

We previously had a few varied definitions of this floating around. I made the one installed with LLVM handle all the cases, and then made the others use it.

This issue was reported to me in https://reviews.llvm.org/D116521#3248117 as
D116521 made clang and llvm use the common cmake utils.

Reviewed By: sebastian-ne, phosek, #libunwind, #libc, #libc_abi, ldionne

Differential Revision: https://reviews.llvm.org/D117537
---
 cmake/Modules/CheckLinkerFlag.cmake          | 17 -----------------
 compiler-rt/cmake/config-ix.cmake            | 20 +++++++-------------
 libcxx/cmake/config-ix.cmake                 |  4 ++--
 libunwind/cmake/config-ix.cmake              |  8 ++++----
 llvm/cmake/modules/LLVMCheckLinkerFlag.cmake | 12 ++++++++++--
 runtimes/CMakeLists.txt                      |  6 +++---
 6 files changed, 26 insertions(+), 41 deletions(-)
 delete mode 100644 cmake/Modules/CheckLinkerFlag.cmake

diff --git a/cmake/Modules/CheckLinkerFlag.cmake b/cmake/Modules/CheckLinkerFlag.cmake
deleted file mode 100644
index 722fe5b1b8ead..0000000000000
--- a/cmake/Modules/CheckLinkerFlag.cmake
+++ /dev/null
@@ -1,17 +0,0 @@
-include(CMakePushCheckState)
-include(CheckCCompilerFlag)
-
-function(llvm_check_linker_flag flag dest)
-  # If testing a flag with check_c_compiler_flag, it gets added to the compile
-  # command only, but not to the linker command in that test. If the flag
-  # is vital for linking to succeed, the test would fail even if it would
-  # have succeeded if it was included on both commands.
-  #
-  # Therefore, try adding the flag to CMAKE_REQUIRED_FLAGS, which gets
-  # added to both compiling and linking commands in the tests.
-
-  cmake_push_check_state()
-  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}")
-  check_c_compiler_flag("" ${dest})
-  cmake_pop_check_state()
-endfunction()
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 33693ce60321d..596f61e8c82ec 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -1,4 +1,5 @@
 include(CMakePushCheckState)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckIncludeFiles)
@@ -6,13 +7,6 @@ include(CheckLibraryExists)
 include(CheckSymbolExists)
 include(TestBigEndian)
 
-function(compiler_rt_check_linker_flag flag out_var)
-  cmake_push_check_state()
-  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}")
-  check_cxx_compiler_flag("" ${out_var})
-  cmake_pop_check_state()
-endfunction()
-
 check_library_exists(c fopen "" COMPILER_RT_HAS_LIBC)
 if (COMPILER_RT_USE_BUILTINS_LIBRARY)
   include(HandleCompilerRT)
@@ -171,12 +165,12 @@ check_library_exists(c++ __cxa_throw "" COMPILER_RT_HAS_LIBCXX)
 check_library_exists(stdc++ __cxa_throw "" COMPILER_RT_HAS_LIBSTDCXX)
 
 # Linker flags.
-compiler_rt_check_linker_flag("-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT)
-compiler_rt_check_linker_flag("-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG)
+llvm_check_linker_flag(CXX "-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT)
+llvm_check_linker_flag(CXX "-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG)
 
 if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
   set(VERS_COMPAT_OPTION "-Wl,-z,gnu-version-script-compat")
-  compiler_rt_check_linker_flag("${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
+  llvm_check_linker_flag(CXX "${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
 endif()
 
 set(DUMMY_VERS ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/dummy.vers)
@@ -187,10 +181,10 @@ if(COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
   # -z gnu-version-script-compat.
   string(APPEND VERS_OPTION " ${VERS_COMPAT_OPTION}")
 endif()
-compiler_rt_check_linker_flag("${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT)
+llvm_check_linker_flag(CXX "${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT)
 
 if(ANDROID)
-  compiler_rt_check_linker_flag("-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL)
+  llvm_check_linker_flag(CXX "-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL)
   check_library_exists(log __android_log_write "" COMPILER_RT_HAS_LIBLOG)
 endif()
 
@@ -436,7 +430,7 @@ if(APPLE)
     -lc++
     -lc++abi)
 
-  compiler_rt_check_linker_flag("-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
+  llvm_check_linker_flag(CXX "-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
   if(COMPILER_RT_HAS_APP_EXTENSION)
     list(APPEND DARWIN_COMMON_LINK_FLAGS "-fapplication-extension")
   endif()
diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake
index 689a9d09c0179..e114337f081a3 100644
--- a/libcxx/cmake/config-ix.cmake
+++ b/libcxx/cmake/config-ix.cmake
@@ -1,6 +1,6 @@
 include(CMakePushCheckState)
 include(CheckLibraryExists)
-include(CheckLinkerFlag)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckCSourceCompiles)
@@ -12,7 +12,7 @@ include(CheckCSourceCompiles)
 # libunwind (and the compiler implicit -lunwind wouldn't succeed as the newly
 # built libunwind isn't installed yet). For those cases, it'd be good to
 # link with --uwnindlib=none. Check if that option works.
-llvm_check_linker_flag("--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
+llvm_check_linker_flag(C "--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
 if (LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
 endif()
diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake
index c814611d43785..c1e1b4631abfe 100644
--- a/libunwind/cmake/config-ix.cmake
+++ b/libunwind/cmake/config-ix.cmake
@@ -2,14 +2,14 @@ include(CMakePushCheckState)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckLibraryExists)
-include(CheckLinkerFlag)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckSymbolExists)
 include(CheckCSourceCompiles)
 
 # The compiler driver may be implicitly trying to link against libunwind, which
 # might not work if libunwind doesn't exist yet. Try to check if
 # --unwindlib=none is supported, and use that if possible.
-llvm_check_linker_flag("--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
+llvm_check_linker_flag(C "--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
 if (LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
 endif()
@@ -34,11 +34,11 @@ endif()
 # required for the link to go through. We remove sanitizers from the
 # configuration checks to avoid spurious link errors.
 
-llvm_check_linker_flag(-nostdlib++ LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
+llvm_check_linker_flag(C "-nostdlib++" LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
 if (LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++")
 else()
-  llvm_check_linker_flag(-nodefaultlibs LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
+  llvm_check_linker_flag(C "-nodefaultlibs" LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
   if (LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
     set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs")
   endif()
diff --git a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
index 253dd768654a2..79c4e2cb4c2cd 100644
--- a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
+++ b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
@@ -5,14 +5,22 @@ if (COMMAND check_linker_flag)
     check_linker_flag(${ARGN})
   endmacro()
 else()
+  # Until the minimum CMAKE version is 3.18
+
   include(CheckCXXCompilerFlag)
   include(CMakePushCheckState)
 
-  # cmake builtin compatible, except we assume lang is CXX
+  # cmake builtin compatible, except we assume lang is C or CXX
   function(llvm_check_linker_flag lang flag out_var)
     cmake_push_check_state()
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${flag}")
-    check_cxx_compiler_flag("" ${out_var})
+    if("${lang}" STREQUAL "C")
+      check_c_compiler_flag("" ${out_var})
+    elseif("${lang}" STREQUAL "CXX")
+      check_cxx_compiler_flag("" ${out_var})
+    else()
+      message(FATAL_ERROR "\"${lang}\" is not C or CXX")
+    endif()
     cmake_pop_check_state()
   endfunction()
 endif()
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index cedce7b3541e5..1a50d9e8c98b3 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -88,7 +88,7 @@ set(LLVM_CMAKE_DIR ${LLVM_MAIN_SRC_DIR}/cmake/modules)
 set(LLVM_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../llvm)
 
 include(CheckLibraryExists)
-include(CheckLinkerFlag)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 
@@ -100,7 +100,7 @@ if (NOT LLVM_RUNTIMES_LINKING_WORKS)
   # --unwindlib=none is supported, and use that if possible.
   # Don't add this if not necessary to fix linking, as it can break using
   # e.g. ASAN/TSAN.
-  llvm_check_linker_flag("--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
+  llvm_check_linker_flag(C "--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
   if (LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
     set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
   endif()
@@ -110,7 +110,7 @@ endif()
 # Check for -nostdlib++ first; if there's no C++ standard library yet,
 # all check_cxx_compiler_flag commands will fail until we add -nostdlib++
 # (or -nodefaultlibs).
-llvm_check_linker_flag(-nostdlib++ LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
+llvm_check_linker_flag(C "-nostdlib++" LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
 if (LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++")
 endif()

From 36cb29cbbe1b22dcd298ad65e1fabe899b7d7249 Mon Sep 17 00:00:00 2001
From: Adrian Prantl 
Date: Thu, 20 Jan 2022 13:36:55 -0800
Subject: [PATCH 089/946] Work around a module build failure on the bots.

This patch works around what looks like a bug in Clang itself.

The error on the bot is:

https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/40466/consoleText

In module 'LLVM_Utils' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h:18:
/Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/Support/Error.h:720:3: error: 'llvm::Expected::(anonymous)' from module 'LLVM_Utils.Support.Error' is not present in definition of 'llvm::Expected' in module 'LLVM_Utils.Support.Error'
  union {
  ^
/Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/Support/Error.h:720:3: note: declaration of '' does not match
/Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/Support/Error.h:720:3: note: declaration of '' does not match
1 error generated.

The intention is to revert this as soon as a proper fix has been identified!

rdar://87845391
---
 lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h b/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h
index 48f27b09b95c5..c99372fa110cd 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h
@@ -9,6 +9,13 @@
 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_LLDB_PYTHON_H
 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_LLDB_PYTHON_H
 
+// BEGIN FIXME
+// This declaration works around a clang module build failure.
+// It should be deleted ASAP.
+#include "llvm/Support/Error.h"
+static llvm::Expected *g_fcxx_modules_workaround;
+// END
+
 #include "lldb/Host/Config.h"
 
 // Python.h needs to be included before any system headers in order to avoid

From 5a670f1378aef27e8fe21fdf4aa47eea0e97f7f5 Mon Sep 17 00:00:00 2001
From: Philip Reames 
Date: Thu, 20 Jan 2022 13:58:13 -0800
Subject: [PATCH 090/946] [SLP] Kill an unused param and use a for-loop in
 calculateDependencies [NFC]

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1b4e90e78d955..f0a031168f48e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -581,7 +581,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
 }
 
 /// \returns the AA location that is being access by the instruction.
-static MemoryLocation getLocation(Instruction *I, AAResults *AA) {
+static MemoryLocation getLocation(Instruction *I) {
   if (StoreInst *SI = dyn_cast(I))
     return MemoryLocation::get(SI);
   if (LoadInst *LI = dyn_cast(I))
@@ -7551,12 +7551,12 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
       Instruction *SrcInst = BundleMember->Inst;
       assert(SrcInst->mayReadOrWriteMemory() &&
              "NextLoadStore list for non memory effecting bundle?");
-      MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
+      MemoryLocation SrcLoc = getLocation(SrcInst);
       bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
       unsigned numAliased = 0;
       unsigned DistToSrc = 1;
 
-      while (DepDest) {
+      for ( ; DepDest; DepDest = DepDest->NextLoadStore) {
         assert(isInSchedulingRegion(DepDest));
 
         // We have two limits to reduce the complexity:
@@ -7586,7 +7586,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
             WorkList.push_back(DestBundle);
           }
         }
-        DepDest = DepDest->NextLoadStore;
 
         // Example, explaining the loop break condition: Let's assume our
         // starting instruction is i0 and MaxMemDepDistance = 3.

From 41ebd1968165d8f9646f3dc2bfbed834baaede8e Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Wed, 1 Dec 2021 13:44:42 -0800
Subject: [PATCH 091/946] [AMDGPU] Do not ignore exec use where exec is read as
 data

Compares, v_cndmask_b32, and v_readfirstlane_b32 use EXEC
in a way which modifies the result. This implicit EXEC use
shall not be ignored for the purposes of instruction moves.

Differential Revision: https://reviews.llvm.org/D117814
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  21 +-
 llvm/test/CodeGen/AMDGPU/licm-valu.mir        | 245 ++++++++++++++++++
 .../CodeGen/AMDGPU/mul24-pass-ordering.ll     |  20 +-
 3 files changed, 275 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/licm-valu.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 82776a77c58da..3ac04ee717dea 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -130,10 +130,29 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   return false;
 }
 
+static bool readsExecAsData(const MachineInstr &MI) {
+  if (MI.isCompare())
+    return true;
+
+  switch (MI.getOpcode()) {
+  default:
+    break;
+  case AMDGPU::V_READFIRSTLANE_B32:
+  case AMDGPU::V_CNDMASK_B64_PSEUDO:
+  case AMDGPU::V_CNDMASK_B32_dpp:
+  case AMDGPU::V_CNDMASK_B32_e32:
+  case AMDGPU::V_CNDMASK_B32_e64:
+  case AMDGPU::V_CNDMASK_B32_sdwa:
+    return true;
+  }
+
+  return false;
+}
+
 bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
   // Any implicit use of exec by VALU is not a real register read.
   return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() &&
-         isVALU(*MO.getParent());
+         isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent());
 }
 
 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
new file mode 100644
index 0000000000000..45a050cffde9c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
@@ -0,0 +1,245 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: hoist_move
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: hoist_move
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    S_BRANCH %bb.1
+
+  bb.1:
+    %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cmp
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cmp
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    S_BRANCH %bb.1
+
+  bb.1:
+    %0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_readfirstlane
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_readfirstlane
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF]], implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %1:sgpr_32 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_e64
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_e64
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF1]], implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_e64 0, %0, 0, %0, %1, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_e32
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_e32
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 [[DEF]], [[DEF]], implicit undef $vcc, implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_e32 %0, %0, implicit undef $vcc, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_dpp
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_dpp
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp [[DEF]], 0, [[DEF]], 0, [[DEF]], 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_dpp %0:vgpr_32, 0, %0:vgpr_32, 0, %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_sdwa
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_sdwa
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[DEF]], 0, [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, %0:vgpr_32, 0, %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index d1970a735a17a..2fac70d96bc31 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -54,17 +54,17 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-LABEL: lsr_order_mul24_1:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
-; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GFX9-NEXT:    v_and_b32_e32 v5, 1, v18
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v5
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v1
+; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; GFX9-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX9-NEXT:  ; %bb.1: ; %bb19
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, v6
-; GFX9-NEXT:    v_and_b32_e32 v5, 0xffffff, v6
-; GFX9-NEXT:    v_and_b32_e32 v8, 1, v18
 ; GFX9-NEXT:    v_add_u32_e32 v4, v4, v0
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v7
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
-; GFX9-NEXT:    v_lshl_add_u32 v7, v4, 2, v3
+; GFX9-NEXT:    v_and_b32_e32 v5, 0xffffff, v6
+; GFX9-NEXT:    v_lshl_add_u32 v6, v4, 2, v3
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v7, v7
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v8, 2, v2
 ; GFX9-NEXT:    v_add_u32_e32 v9, v17, v12
 ; GFX9-NEXT:    s_mov_b64 s[10:11], 0
@@ -76,7 +76,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    v_add_u32_e32 v12, v17, v0
 ; GFX9-NEXT:    v_add_u32_e32 v19, v9, v0
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
-; GFX9-NEXT:    v_madak_f32 v3, v3, v6, 0x3727c5ac
+; GFX9-NEXT:    v_madak_f32 v3, v3, v7, 0x3727c5ac
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GFX9-NEXT:    v_mul_u32_u24_e32 v18, v3, v5
 ; GFX9-NEXT:    v_add_u32_e32 v3, v3, v16
@@ -97,8 +97,8 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    s_or_b64 s[10:11], s[6:7], s[10:11]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[4:5]
-; GFX9-NEXT:    ds_write_b32 v7, v3
-; GFX9-NEXT:    v_add_u32_e32 v7, v7, v8
+; GFX9-NEXT:    ds_write_b32 v6, v3
+; GFX9-NEXT:    v_add_u32_e32 v6, v6, v8
 ; GFX9-NEXT:    s_andn2_b64 exec, exec, s[10:11]
 ; GFX9-NEXT:    s_cbranch_execnz .LBB1_2
 ; GFX9-NEXT:  .LBB1_3: ; %Flow3

From c0906f6b21a1f64f493c9180622d5978d05b17c2 Mon Sep 17 00:00:00 2001
From: Philip Reames 
Date: Thu, 20 Jan 2022 14:07:46 -0800
Subject: [PATCH 092/946] [SLP] Remove stray semicolon to make bots happy

Certain bots (e.g. sanitizer-x86_64-linux-android) appear to be running with strict c++98 flags which disallow ; at global scope.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f0a031168f48e..475ab7e1f495e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7256,7 +7256,7 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef VL) {
   }
   assert(Bundle && "Failed to find schedule bundle");
   return Bundle;
-};
+}
 
 // Groups the instructions to a bundle (which is then a single scheduling entity)
 // and schedules instructions until the bundle gets ready.

From 1d4ca42b43805f7199b921f35c1257b2cbad72c9 Mon Sep 17 00:00:00 2001
From: Michael Kruse 
Date: Thu, 20 Jan 2022 10:42:17 -0600
Subject: [PATCH 093/946] [OpenMPIRBuilder] Detect ambiguous InsertPoints for
 apply*WorkshareLoop. NFC.

Follow-up on D117226 for applyStaticWorkshareLoop and
applyDynamicWorkshareLoop checking for conflicting InertPoints via an
assert. There is no in-tree code that violates this assertion, hence
nothing changes.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 0bf0c832d5ddf..177ae9a47db75 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1516,6 +1516,8 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
                                           InsertPointTy AllocaIP,
                                           bool NeedsBarrier, Value *Chunk) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
+  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
+         "Require dedicated allocate IP");
 
   // Set up the source location value for OpenMP runtime.
   Builder.restoreIP(CLI->getPreheaderIP());
@@ -1646,6 +1648,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
     DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
     OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
+  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
+         "Require dedicated allocate IP");
 
   // Set up the source location value for OpenMP runtime.
   Builder.SetCurrentDebugLocation(DL);

From 39f779afb35420c51e2c7094b2643a53baed9f59 Mon Sep 17 00:00:00 2001
From: Nathan James 
Date: Thu, 20 Jan 2022 22:20:10 +0000
Subject: [PATCH 094/946] [clang-tidy][NFC] Remove redundant string creation
 for comparison

---
 .../clang-tidy/utils/IncludeSorter.cpp          | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
index fbc1dc6d52a0b..8d620ca3af681 100644
--- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
+++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
@@ -84,15 +84,14 @@ determineIncludeKind(StringRef CanonicalFile, StringRef IncludeFile,
   if ((Style == IncludeSorter::IS_Google) ||
       (Style == IncludeSorter::IS_Google_ObjC)) {
     std::pair Parts = CanonicalInclude.split("/public/");
-    std::string AltCanonicalInclude =
-        Parts.first.str() + "/internal/" + Parts.second.str();
-    std::string ProtoCanonicalInclude =
-        Parts.first.str() + "/proto/" + Parts.second.str();
-
-    // Determine the kind of this inclusion.
-    if (CanonicalFile.equals(AltCanonicalInclude) ||
-        CanonicalFile.equals(ProtoCanonicalInclude)) {
-      return IncludeSorter::IK_MainTUInclude;
+    StringRef FileCopy = CanonicalFile;
+    if (FileCopy.consume_front(Parts.first) &&
+        FileCopy.consume_back(Parts.second)) {
+      // Determine the kind of this inclusion.
+      if (FileCopy.equals("/internal/") ||
+          FileCopy.equals("/proto/")) {
+        return IncludeSorter::IK_MainTUInclude;
+      }
     }
   }
   if (Style == IncludeSorter::IS_Google_ObjC) {

From c95afac89e000b65edc51dc7d05610250a62c86f Mon Sep 17 00:00:00 2001
From: owenca 
Date: Thu, 20 Jan 2022 01:59:52 -0800
Subject: [PATCH 095/946] [clang-format][NFC] Clean up tryMergeLessLess()

Differential Revision: https://reviews.llvm.org/D117759
---
 clang/lib/Format/FormatTokenLexer.cpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index e8b9b3d61c888..c9166f4b17aab 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -430,25 +430,22 @@ bool FormatTokenLexer::tryMergeLessLess() {
     return false;
 
   auto First = Tokens.end() - 3;
-  bool FourthTokenIsLess = false;
-
-  if (Tokens.size() > 3) {
-    auto Fourth = (Tokens.end() - 4)[0];
-    FourthTokenIsLess = Fourth->is(tok::less);
-
-    // Do not remove a whitespace between the two "<" e.g. "operator< <>".
-    if (First[2]->is(tok::greater) && Fourth->is(tok::kw_operator))
-      return false;
-  }
-
-  if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
-      First[0]->isNot(tok::less) || FourthTokenIsLess)
+  if (First[0]->isNot(tok::less) || First[1]->isNot(tok::less))
     return false;
 
   // Only merge if there currently is no whitespace between the two "<".
   if (First[1]->hasWhitespaceBefore())
     return false;
 
+  auto X = Tokens.size() > 3 ? First[-1] : nullptr;
+  auto Y = First[2];
+  if ((X && X->is(tok::less)) || Y->is(tok::less))
+    return false;
+
+  // Do not remove a whitespace between the two "<" e.g. "operator< <>".
+  if (X && X->is(tok::kw_operator) && Y->is(tok::greater))
+    return false;
+
   First[0]->Tok.setKind(tok::lessless);
   First[0]->TokenText = "<<";
   First[0]->ColumnWidth += 1;

From cd2d7369639e70df17e7977ac3a4a5b7854043fa Mon Sep 17 00:00:00 2001
From: Rob Suderman 
Date: Thu, 20 Jan 2022 14:32:19 -0800
Subject: [PATCH 096/946] [mlir][tosa] Limit right-shift to 31 bits

Right shift can occur that is a 32-bit right shift. This is undefined behavior.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D117732
---
 mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp b/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp
index ff82acec21e97..3fa471c37f312 100644
--- a/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp
+++ b/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp
@@ -46,8 +46,8 @@ static void computeMultiplierAndShiftTosaScale16(double scale,
 
   // Shifting tops out at 63 bits. Right shift to make 63 bits the max.
   if (shift > 63) {
-    // Shifting the multiplier by more than 32-bits is unnecessary.
-    multiplier = multiplier >> std::min(32, shift - 63);
+    // Shifting the multiplier by more than 31-bits is unnecessary.
+    multiplier = multiplier >> std::min(31, shift - 63);
     shift = 63;
   }
 }
@@ -82,7 +82,7 @@ static void computeMultiplierAndShiftTosaScale32(double scale,
   // Shifting tops out at 63 bits. Right shift to make 63 bits the max.
   if (shift > 63) {
     // Shifting the multiplier by more than 32-bits is unnecessary.
-    multiplier = multiplier >> std::min(32, shift - 63);
+    multiplier = multiplier >> std::min(31, shift - 63);
     shift = 63;
   }
 }

From fa8bb224661dfb38cb2a246f7d98dc61fd45602e Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Thu, 20 Jan 2022 14:16:37 -0800
Subject: [PATCH 097/946] [RISCV] Optimize vector_shuffles that are
 interleaving the lowest elements of two vectors.

RISCV only has a unary shuffle that requires places indices in a
register. For interleaving two vectors this means we need at least
two vrgathers and a vmerge to do a shuffle of two vectors.

This patch teaches shuffle lowering to use a widening addu followed
by a widening vmaccu to implement the interleave. First we extract
the low half of both V1 and V2. Then we implement
(zext(V1) + zext(V2)) + (zext(V2) * zext(2^eltbits - 1)) which
simplifies to (zext(V1) + zext(V2) * 2^eltbits). This further
simplifies to (zext(V1) + zext(V2) << eltbits). Then we bitcast the
result back to the original type splitting the wide elements in half.

We can only do this if we have a type with wider elements available.
Because we're using extends we also have to be careful with fractional
lmuls. Floating point types are supported by bitcasting to/from integer.

The tests test a varied combination of LMULs split across VLEN>=128 and
VLEN>=512 tests. There a few tests with shuffle indices commuted as well
as tests for undef indices. There's one test for a vXi64/vXf64 vector which
we can't optimize, but verifies we don't crash.

Reviewed By: rogfer01

Differential Revision: https://reviews.llvm.org/D117743
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 122 ++++-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |   1 +
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td |  10 +-
 .../RISCV/rvv/fixed-vectors-fp-interleave.ll  | 378 ++++++++++++++
 .../RISCV/rvv/fixed-vectors-int-interleave.ll | 484 ++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-int-shuffles.ll   |  15 +-
 6 files changed, 995 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f942f395d5328..507a21b16e4eb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2328,6 +2328,48 @@ static int matchShuffleAsSlideDown(ArrayRef Mask) {
   return -1;
 }
 
+static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, bool &SwapSources,
+                                const RISCVSubtarget &Subtarget) {
+  // We need to be able to widen elements to the next larger integer type.
+  if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
+    return false;
+
+  int Size = Mask.size();
+  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+  int Srcs[] = {-1, -1};
+  for (int i = 0; i != Size; ++i) {
+    // Ignore undef elements.
+    if (Mask[i] < 0)
+      continue;
+
+    // Is this an even or odd element.
+    int Pol = i % 2;
+
+    // Ensure we consistently use the same source for this element polarity.
+    int Src = Mask[i] / Size;
+    if (Srcs[Pol] < 0)
+      Srcs[Pol] = Src;
+    if (Srcs[Pol] != Src)
+      return false;
+
+    // Make sure the element within the source is appropriate for this element
+    // in the destination.
+    int Elt = Mask[i] % Size;
+    if (Elt != i / 2)
+      return false;
+  }
+
+  // We need to find a source for each polarity and they can't be the same.
+  if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
+    return false;
+
+  // Swap the sources if the second source was in the even polarity.
+  SwapSources = Srcs[0] > Srcs[1];
+
+  return true;
+}
+
 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
                                    const RISCVSubtarget &Subtarget) {
   SDValue V1 = Op.getOperand(0);
@@ -2413,8 +2455,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     }
   }
 
+  ArrayRef Mask = SVN->getMask();
+
   // Try to match as a slidedown.
-  int SlideAmt = matchShuffleAsSlideDown(SVN->getMask());
+  int SlideAmt = matchShuffleAsSlideDown(Mask);
   if (SlideAmt >= 0) {
     // TODO: Should we reduce the VL to account for the upper undef elements?
     // Requires additional vsetvlis, but might be faster to execute.
@@ -2427,10 +2471,81 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
   }
 
+  // Detect an interleave shuffle and lower to
+  // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
+  bool SwapSources;
+  if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
+    // Swap sources if needed.
+    if (SwapSources)
+      std::swap(V1, V2);
+
+    // Extract the lower half of the vectors.
+    MVT HalfVT = VT.getHalfNumVectorElementsVT();
+    V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+                     DAG.getConstant(0, DL, XLenVT));
+    V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
+                     DAG.getConstant(0, DL, XLenVT));
+
+    // Double the element width and halve the number of elements in an int type.
+    unsigned EltBits = VT.getScalarSizeInBits();
+    MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
+    MVT WideIntVT =
+        MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
+    // Convert this to a scalable vector. We need to base this on the
+    // destination size to ensure there's always a type with a smaller LMUL.
+    MVT WideIntContainerVT =
+        getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
+
+    // Convert sources to scalable vectors with the same element count as the
+    // larger type.
+    MVT HalfContainerVT = MVT::getVectorVT(
+        VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
+    V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
+    V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
+
+    // Cast sources to integer.
+    MVT IntEltVT = MVT::getIntegerVT(EltBits);
+    MVT IntHalfVT =
+        MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
+    V1 = DAG.getBitcast(IntHalfVT, V1);
+    V2 = DAG.getBitcast(IntHalfVT, V2);
+
+    // Freeze V2 since we use it twice and we need to be sure that the add and
+    // multiply see the same value.
+    V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
+
+    // Recreate TrueMask using the widened type's element count.
+    MVT MaskVT =
+        MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
+    TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+
+    // Widen V1 and V2 with 0s and add one copy of V2 to V1.
+    SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
+                              V2, TrueMask, VL);
+    // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
+    SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
+                                     DAG.getAllOnesConstant(DL, XLenVT));
+    SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
+                                   V2, Multiplier, TrueMask, VL);
+    // Add the new copies to our previous addition giving us 2^eltbits copies of
+    // V2. This is equivalent to shifting V2 left by eltbits. This should
+    // combine with the vwmulu.vv above to form vwmaccu.vv.
+    Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
+                      TrueMask, VL);
+    // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
+    // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
+    // vector VT.
+    ContainerVT =
+        MVT::getVectorVT(VT.getVectorElementType(),
+                         WideIntContainerVT.getVectorElementCount() * 2);
+    Add = DAG.getBitcast(ContainerVT, Add);
+    return convertFromScalableVector(VT, Add, DAG, Subtarget);
+  }
+
   // Detect shuffles which can be re-expressed as vector selects; these are
   // shuffles in which each element in the destination is taken from an element
   // at the corresponding index in either source vectors.
-  bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
+  bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
     int MaskIndex = MaskIdx.value();
     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
   });
@@ -2456,7 +2571,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
   // Now construct the mask that will be used by the vselect or blended
   // vrgather operation. For vrgathers, construct the appropriate indices into
   // each vector.
-  for (int MaskIndex : SVN->getMask()) {
+  for (int MaskIndex : Mask) {
     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
     if (!IsSelect) {
@@ -9941,6 +10056,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(FP_ROUND_VL)
   NODE_NAME_CASE(VWMUL_VL)
   NODE_NAME_CASE(VWMULU_VL)
+  NODE_NAME_CASE(VWADDU_VL)
   NODE_NAME_CASE(SETCC_VL)
   NODE_NAME_CASE(VSELECT_VL)
   NODE_NAME_CASE(VMAND_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 76b778831fae0..23857f93e0159 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ enum NodeType : unsigned {
   // Widening instructions
   VWMUL_VL,
   VWMULU_VL,
+  VWADDU_VL,
 
   // Vector compare producing a mask. Fourth operand is input mask. Fifth
   // operand is VL.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index f646a4605e339..9745c13863823 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -221,14 +221,15 @@ def riscv_trunc_vector_vl : SDNode<"RISCVISD::TRUNCATE_VECTOR_VL",
                                                         SDTCVecEltisVT<2, i1>,
                                                         SDTCisVT<3, XLenVT>]>>;
 
-def SDT_RISCVVWMUL_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
+def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
                                              SDTCisSameNumEltsAs<0, 1>,
                                              SDTCisSameAs<1, 2>,
                                              SDTCisSameNumEltsAs<1, 3>,
                                              SDTCVecEltisVT<3, i1>,
                                              SDTCisVT<4, XLenVT>]>;
-def riscv_vwmul_vl  : SDNode<"RISCVISD::VWMUL_VL",  SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
-def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
+def riscv_vwmul_vl  : SDNode<"RISCVISD::VWMUL_VL",  SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
 
 def SDTRVVVecReduce : SDTypeProfile<1, 5, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
@@ -712,6 +713,9 @@ foreach vti = AllIntegerVectors in {
                  (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
 }
 
+// 12.2. Vector Widening Integer Add/Subtract
+defm : VPatBinaryWVL_VV_VX;
+
 // 12.3. Vector Integer Extension
 defm : VPatExtendSDNode_V_VL;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
new file mode 100644
index 0000000000000..3f57fcea6acc5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
+
+; Test optimizing interleaves to widening arithmetic.
+
+define <4 x half> @interleave_v2f16(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: interleave_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x half> %x, <2 x half> %y, <4 x i32> 
+  ret <4 x half> %a
+}
+
+; Vector order switched for coverage.
+define <4 x float> @interleave_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: interleave_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v9, v8
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x float> %x, <2 x float> %y, <4 x i32> 
+  ret <4 x float> %a
+}
+
+; One vXf64 test case to very that we don't optimize it.
+; FIXME: Is there better codegen we can do here?
+define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
+; RV32-V128-LABEL: interleave_v2f64:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    vmv1r.v v12, v9
+; RV32-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV32-V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-V128-NEXT:    vid.v v10
+; RV32-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v14
+; RV32-V128-NEXT:    li a0, 10
+; RV32-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v12, v14, v0.t
+; RV32-V128-NEXT:    vmv.v.v v8, v10
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v2f64:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    vmv1r.v v12, v9
+; RV64-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vid.v v10
+; RV64-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV64-V128-NEXT:    vrgather.vv v10, v8, v14
+; RV64-V128-NEXT:    li a0, 10
+; RV64-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vrgather.vv v10, v12, v14, v0.t
+; RV64-V128-NEXT:    vmv.v.v v8, v10
+; RV64-V128-NEXT:    ret
+;
+; RV32-V512-LABEL: interleave_v2f64:
+; RV32-V512:       # %bb.0:
+; RV32-V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; RV32-V512-NEXT:    vid.v v10
+; RV32-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT:    li a0, 10
+; RV32-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V512-NEXT:    vmv.s.x v0, a0
+; RV32-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v9, v11, v0.t
+; RV32-V512-NEXT:    vmv.v.v v8, v10
+; RV32-V512-NEXT:    ret
+;
+; RV64-V512-LABEL: interleave_v2f64:
+; RV64-V512:       # %bb.0:
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vid.v v10
+; RV64-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV64-V512-NEXT:    vrgather.vv v10, v8, v11
+; RV64-V512-NEXT:    li a0, 10
+; RV64-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V512-NEXT:    vmv.s.x v0, a0
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; RV64-V512-NEXT:    vmv.v.v v8, v10
+; RV64-V512-NEXT:    ret
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <4 x i32> 
+  ret <4 x double> %a
+}
+
+; Undef elements for coverage
+define <8 x half> @interleave_v4f16(<4 x half> %x, <4 x half> %y) {
+; V128-LABEL: interleave_v4f16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4f16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> 
+  ret <8 x half> %a
+}
+
+define <8 x float> @interleave_v4f32(<4 x float> %x, <4 x float> %y) {
+; V128-LABEL: interleave_v4f32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4f32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <8 x i32> 
+  ret <8 x float> %a
+}
+
+; Vector order switched for coverage.
+define <16 x half> @interleave_v8f16(<8 x half> %x, <8 x half> %y) {
+; V128-LABEL: interleave_v8f16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v9, v8
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v8
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8f16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v9, v8
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v8
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x half> %x, <8 x half> %y, <16 x i32> 
+  ret <16 x half> %a
+}
+
+define <16 x float> @interleave_v8f32(<8 x float> %x, <8 x float> %y) {
+; V128-LABEL: interleave_v8f32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e32, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8f32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <16 x i32> 
+  ret <16 x float> %a
+}
+
+define <32 x half> @interleave_v16f16(<16 x half> %x, <16 x half> %y) {
+; V128-LABEL: interleave_v16f16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16f16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x half> %x, <16 x half> %y, <32 x i32> 
+  ret <32 x half> %a
+}
+
+define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
+; V128-LABEL: interleave_v16f32:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16f32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x float> %x, <16 x float> %y, <32 x i32> 
+  ret <32 x float> %a
+}
+
+define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
+; V128-LABEL: interleave_v32f16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 64
+; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32f16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <32 x half> %x, <32 x half> %y, <64 x i32> 
+  ret <64 x half> %a
+}
+
+define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
+; RV32-V128-LABEL: interleave_v32f32:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    addi sp, sp, -16
+; RV32-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    sub sp, sp, a0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI10_0)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
+; RV32-V128-NEXT:    li a1, 32
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    vle32.v v0, (a0)
+; RV32-V128-NEXT:    vmv8r.v v24, v8
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI10_1)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
+; RV32-V128-NEXT:    vle32.v v24, (a0)
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    lui a0, 699051
+; RV32-V128-NEXT:    addi a0, a0, -1366
+; RV32-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV32-V128-NEXT:    vmv.v.v v24, v8
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV32-V128-NEXT:    li a0, -1
+; RV32-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV32-V128-NEXT:    vmv8r.v v8, v0
+; RV32-V128-NEXT:    vmv8r.v v16, v24
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    add sp, sp, a0
+; RV32-V128-NEXT:    addi sp, sp, 16
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v32f32:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    addi sp, sp, -16
+; RV64-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    sub sp, sp, a0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI10_0)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
+; RV64-V128-NEXT:    li a1, 32
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    vle32.v v0, (a0)
+; RV64-V128-NEXT:    vmv8r.v v24, v8
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI10_1)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
+; RV64-V128-NEXT:    vle32.v v24, (a0)
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    lui a0, 699051
+; RV64-V128-NEXT:    addiw a0, a0, -1366
+; RV64-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV64-V128-NEXT:    vmv.v.v v24, v8
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV64-V128-NEXT:    li a0, -1
+; RV64-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV64-V128-NEXT:    vmv8r.v v8, v0
+; RV64-V128-NEXT:    vmv8r.v v16, v24
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    add sp, sp, a0
+; RV64-V128-NEXT:    addi sp, sp, 16
+; RV64-V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32f32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; V512-NEXT:    vwaddu.vv v12, v8, v10
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v12, a0, v10
+; V512-NEXT:    vmv4r.v v8, v12
+; V512-NEXT:    ret
+  %a = shufflevector <32 x float> %x, <32 x float> %y, <64 x i32> 
+  ret <64 x float> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
new file mode 100644
index 0000000000000..a17a83169373b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -0,0 +1,484 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
+
+; Test optimizing interleaves to widening arithmetic.
+
+define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: interleave_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x i8> %x, <2 x i8> %y, <4 x i32> 
+  ret <4 x i8> %a
+}
+
+define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) {
+; CHECK-LABEL: interleave_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x i16> %x, <2 x i16> %y, <4 x i32> 
+  ret <4 x i16> %a
+}
+
+; Vector order switched for coverage.
+define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: interleave_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v9, v8
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> 
+  ret <4 x i32> %a
+}
+
+; One vXi64 test case to very that we don't optimize it.
+; FIXME: Is there better codegen we can do here?
+define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; RV32-V128-LABEL: interleave_v2i64:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    vmv1r.v v12, v9
+; RV32-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV32-V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-V128-NEXT:    vid.v v10
+; RV32-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v14
+; RV32-V128-NEXT:    li a0, 10
+; RV32-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v12, v14, v0.t
+; RV32-V128-NEXT:    vmv.v.v v8, v10
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v2i64:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    vmv1r.v v12, v9
+; RV64-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vid.v v10
+; RV64-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV64-V128-NEXT:    vrgather.vv v10, v8, v14
+; RV64-V128-NEXT:    li a0, 10
+; RV64-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vrgather.vv v10, v12, v14, v0.t
+; RV64-V128-NEXT:    vmv.v.v v8, v10
+; RV64-V128-NEXT:    ret
+;
+; RV32-V512-LABEL: interleave_v2i64:
+; RV32-V512:       # %bb.0:
+; RV32-V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; RV32-V512-NEXT:    vid.v v10
+; RV32-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT:    li a0, 10
+; RV32-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V512-NEXT:    vmv.s.x v0, a0
+; RV32-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v9, v11, v0.t
+; RV32-V512-NEXT:    vmv.v.v v8, v10
+; RV32-V512-NEXT:    ret
+;
+; RV64-V512-LABEL: interleave_v2i64:
+; RV64-V512:       # %bb.0:
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vid.v v10
+; RV64-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV64-V512-NEXT:    vrgather.vv v10, v8, v11
+; RV64-V512-NEXT:    li a0, 10
+; RV64-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V512-NEXT:    vmv.s.x v0, a0
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; RV64-V512-NEXT:    vmv.v.v v8, v10
+; RV64-V512-NEXT:    ret
+  %a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> 
+  ret <4 x i64> %a
+}
+
+; Vector order switched for coverage.
+define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
+; V128-LABEL: interleave_v4i8:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e8, mf4, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v9, v8
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v8
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4i8:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e8, mf8, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v9, v8
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v8
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> 
+  ret <8 x i8> %a
+}
+
+; Undef elements for coverage
+define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
+; V128-LABEL: interleave_v4i16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4i16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> 
+  ret <8 x i16> %a
+}
+
+define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; V128-LABEL: interleave_v4i32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4i32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <8 x i32> 
+  ret <8 x i32> %a
+}
+
+define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
+; V128-LABEL: interleave_v8i8:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e8, mf2, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8i8:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e8, mf8, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> 
+  ret <16 x i8> %a
+}
+
+; Vector order switched for coverage.
+define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; V128-LABEL: interleave_v8i16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v9, v8
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v8
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8i16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v9, v8
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v8
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <16 x i32> 
+  ret <16 x i16> %a
+}
+
+define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; V128-LABEL: interleave_v8i32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e32, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8i32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <16 x i32> 
+  ret <16 x i32> %a
+}
+
+define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; V128-LABEL: interleave_v16i8:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16i8:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x i8> %x, <16 x i8> %y, <32 x i32> 
+  ret <32 x i8> %a
+}
+
+define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
+; V128-LABEL: interleave_v16i16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16i16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <32 x i32> 
+  ret <32 x i16> %a
+}
+
+define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
+; V128-LABEL: interleave_v16i32:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16i32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x i32> %x, <16 x i32> %y, <32 x i32> 
+  ret <32 x i32> %a
+}
+
+define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
+; V128-LABEL: interleave_v32i8:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 64
+; V128-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32i8:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <32 x i8> %x, <32 x i8> %y, <64 x i32> 
+  ret <64 x i8> %a
+}
+
+define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
+; V128-LABEL: interleave_v32i16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 64
+; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32i16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <32 x i16> %x, <32 x i16> %y, <64 x i32> 
+  ret <64 x i16> %a
+}
+
+define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
+; RV32-V128-LABEL: interleave_v32i32:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    addi sp, sp, -16
+; RV32-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    sub sp, sp, a0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; RV32-V128-NEXT:    li a1, 32
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    vle32.v v0, (a0)
+; RV32-V128-NEXT:    vmv8r.v v24, v8
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI15_1)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI15_1)
+; RV32-V128-NEXT:    vle32.v v24, (a0)
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    lui a0, 699051
+; RV32-V128-NEXT:    addi a0, a0, -1366
+; RV32-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV32-V128-NEXT:    vmv.v.v v24, v8
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV32-V128-NEXT:    li a0, -1
+; RV32-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV32-V128-NEXT:    vmv8r.v v8, v0
+; RV32-V128-NEXT:    vmv8r.v v16, v24
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    add sp, sp, a0
+; RV32-V128-NEXT:    addi sp, sp, 16
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v32i32:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    addi sp, sp, -16
+; RV64-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    sub sp, sp, a0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; RV64-V128-NEXT:    li a1, 32
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    vle32.v v0, (a0)
+; RV64-V128-NEXT:    vmv8r.v v24, v8
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI15_1)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI15_1)
+; RV64-V128-NEXT:    vle32.v v24, (a0)
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    lui a0, 699051
+; RV64-V128-NEXT:    addiw a0, a0, -1366
+; RV64-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV64-V128-NEXT:    vmv.v.v v24, v8
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV64-V128-NEXT:    li a0, -1
+; RV64-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV64-V128-NEXT:    vmv8r.v v8, v0
+; RV64-V128-NEXT:    vmv8r.v v16, v24
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    add sp, sp, a0
+; RV64-V128-NEXT:    addi sp, sp, 16
+; RV64-V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32i32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; V512-NEXT:    vwaddu.vv v12, v8, v10
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v12, a0, v10
+; V512-NEXT:    vmv4r.v v8, v12
+; V512-NEXT:    ret
+  %a = shufflevector <32 x i32> %x, <32 x i32> %y, <64 x i32> 
+  ret <64 x i32> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 97ec81c64cebe..1b8f1d246cd20 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -314,16 +314,13 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
 define <4 x i8> @interleave_shuffles(<4 x i8> %x) {
 ; CHECK-LABEL: interleave_shuffles:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vrgather.vi v9, v8, 1
-; CHECK-NEXT:    li a1, 10
-; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    vsrl.vi v10, v8, 1
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vi v9, v8, 0
+; CHECK-NEXT:    vrgather.vi v10, v8, 1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, mu
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v8, a0, v10
 ; CHECK-NEXT:    ret
   %y = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> 
   %z = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> 

From 284cd693f1c695a4ae9c8de7e3ada645eb19d22b Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy 
Date: Thu, 20 Jan 2022 08:11:54 +0000
Subject: [PATCH 098/946] [libc] Move the remaining public types to their own
 type headers.

Reviewed By: michaelrj

Differential Revision: https://reviews.llvm.org/D117838
---
 libc/config/linux/api.td                      | 98 ++++++-------------
 libc/include/CMakeLists.txt                   | 27 ++++-
 libc/include/llvm-libc-types/CMakeLists.txt   | 19 ++++
 libc/include/llvm-libc-types/FILE.h           | 14 +++
 .../llvm-libc-types/__bsearchcompare_t.h      | 14 +++
 .../llvm-libc-types/__call_once_func_t.h      | 14 +++
 .../llvm-libc-types/__qsortcompare_t.h        | 14 +++
 libc/include/llvm-libc-types/__sigaction.h    | 22 +++++
 libc/include/llvm-libc-types/__sighandler_t.h | 14 +++
 libc/include/llvm-libc-types/cnd_t.h          | 21 ++++
 libc/include/llvm-libc-types/div_t.h          | 17 ++++
 libc/include/llvm-libc-types/fenv_t.h         | 25 +++++
 libc/include/llvm-libc-types/fexcept_t.h      | 14 +++
 libc/include/llvm-libc-types/imaxdiv_t.h      | 17 ++++
 libc/include/llvm-libc-types/ldiv_t.h         | 17 ++++
 libc/include/llvm-libc-types/lldiv_t.h        | 17 ++++
 libc/include/llvm-libc-types/mtx_t.h          | 17 ++++
 libc/include/llvm-libc-types/once_flag.h      | 14 +++
 libc/include/llvm-libc-types/thrd_start_t.h   | 14 +++
 .../llvm-libc-types/thrd_t.h}                 |  7 +-
 libc/include/llvm-libc-types/time_t.h         | 14 +++
 libc/include/llvm-libc-types/tm.h             | 24 +++++
 libc/include/threads.h.def                    |  2 -
 23 files changed, 378 insertions(+), 78 deletions(-)
 create mode 100644 libc/include/llvm-libc-types/FILE.h
 create mode 100644 libc/include/llvm-libc-types/__bsearchcompare_t.h
 create mode 100644 libc/include/llvm-libc-types/__call_once_func_t.h
 create mode 100644 libc/include/llvm-libc-types/__qsortcompare_t.h
 create mode 100644 libc/include/llvm-libc-types/__sigaction.h
 create mode 100644 libc/include/llvm-libc-types/__sighandler_t.h
 create mode 100644 libc/include/llvm-libc-types/cnd_t.h
 create mode 100644 libc/include/llvm-libc-types/div_t.h
 create mode 100644 libc/include/llvm-libc-types/fenv_t.h
 create mode 100644 libc/include/llvm-libc-types/fexcept_t.h
 create mode 100644 libc/include/llvm-libc-types/imaxdiv_t.h
 create mode 100644 libc/include/llvm-libc-types/ldiv_t.h
 create mode 100644 libc/include/llvm-libc-types/lldiv_t.h
 create mode 100644 libc/include/llvm-libc-types/mtx_t.h
 create mode 100644 libc/include/llvm-libc-types/once_flag.h
 create mode 100644 libc/include/llvm-libc-types/thrd_start_t.h
 rename libc/{config/linux/threads.h.in => include/llvm-libc-types/thrd_t.h} (70%)
 create mode 100644 libc/include/llvm-libc-types/time_t.h
 create mode 100644 libc/include/llvm-libc-types/tm.h

diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 8201edb76682b..f882ec304f192 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -24,23 +24,13 @@ def SSizeT : TypeDecl<"ssize_t"> {
 
 def StructTm: TypeDecl<"struct tm"> {
   let Decl = [{
-    struct tm {
-      int tm_sec; // seconds after the minute
-      int tm_min; // minutes after the hour
-      int tm_hour; // hours since midnight
-      int tm_mday; // day of the month
-      int tm_mon; // months since January
-      int tm_year; // years since 1900
-      int tm_wday; // days since Sunday
-      int tm_yday; // days since January
-      int tm_isdst; // Daylight Saving Time flag
-    };
+    #include 
   }];
 }
 
 def TimeT: TypeDecl<"time_t"> {
-   let Decl = [{
-   typedef long time_t;
+  let Decl = [{
+    #include 
   }];
 }
 
@@ -52,7 +42,7 @@ def OffT : TypeDecl<"off_t"> {
 
 def FILE : TypeDecl<"FILE"> {
   let Decl = [{
-    typedef struct FILE FILE;
+    #include 
   }];
 }
 
@@ -114,10 +104,7 @@ def CTypeAPI : PublicAPI<"ctype.h"> {
 
 def IMaxDivT : TypeDecl<"imaxdiv_t"> {
   let Decl = [{
-    typedef struct {
-      intmax_t quot;
-      intmax_t rem;
-    } imaxdiv_t;
+    #include 
   }];
 }
 
@@ -195,24 +182,13 @@ def MathAPI : PublicAPI<"math.h"> {
 
 def FEnvT : TypeDecl<"fenv_t"> {
   let Decl = [{
-    #ifdef __aarch64__
-    typedef struct {
-      unsigned char __control_word[4];
-      unsigned char __status_word[4];
-    } fenv_t;
-    #endif
-    #ifdef __x86_64__
-    typedef struct {
-      unsigned char __x86_status[28];
-      unsigned char __mxcsr[4];
-    } fenv_t;
-    #endif
+    #include 
   }];
 }
 
 def FExceptT : TypeDecl<"fexcept_t"> {
   let Decl = [{
-    typedef int fexcept_t;
+    #include 
   }];
 }
 
@@ -257,40 +233,31 @@ def StdIOAPI : PublicAPI<"stdio.h"> {
 
 def DivT : TypeDecl<"div_t"> {
   let Decl = [{
-    typedef struct {
-      int quot;
-      int rem;
-    } div_t;
+    #include 
   }];
 }
 
 def LDivT : TypeDecl<"ldiv_t"> {
   let Decl = [{
-    typedef struct {
-      long quot;
-      long rem;
-    } ldiv_t;
+    #include 
   }];
 }
 
 def LLDivT : TypeDecl<"lldiv_t"> {
   let Decl = [{
-    typedef struct {
-      long long quot;
-      long long rem;
-    } lldiv_t;
+    #include 
   }];
 }
 
 def BSearchCompareTDefn : TypeDecl<"__bsearchcompare_t"> {
   let Decl = [{
-    typedef int(*__bsearchcompare_t)(const void *, const void *);
+    #include 
   }];
 }
 
 def QSortCompareTDefn : TypeDecl<"__qsortcompare_t"> {
   let Decl = [{
-    typedef int(*__qsortcompare_t)(const void *, const void *);
+    #include 
   }];
 }
 
@@ -367,21 +334,13 @@ def SysMManAPI : PublicAPI<"sys/mman.h"> {
 
 def StructSigactionDefn : TypeDecl<"struct sigaction"> {
   let Decl = [{
-    struct __sigaction {
-      union {
-        void (*sa_handler)(int);
-        void (*sa_action)(int, siginfo_t *, void *);
-      };
-      sigset_t sa_mask;
-      int sa_flags;
-      void (*sa_restorer)(void);
-    };
+    #include 
   }];
 }
 
 def SighandlerTDefn : TypeDecl<"__sighandler_t"> {
   let Decl = [{
-    typedef void(*__sighandler_t)(int);
+    #include 
   }];
 }
 
@@ -394,39 +353,37 @@ def SignalAPI : PublicAPI<"signal.h"> {
 
 def OnceFlag : TypeDecl<"once_flag"> {
   let Decl = [{
-    typedef unsigned int once_flag;
+    #include 
   }];
 }
 
 def MtxT : TypeDecl<"mtx_t"> {
   let Decl = [{
-    typedef struct {
-      unsigned char __internal_data[4];
-      int __mtx_type;
-    } mtx_t;
+    #include 
   }];
 }
 
 def CndT : TypeDecl<"cnd_t"> {
   let Decl = [{
-    typedef struct {
-      void *__qfront;
-      void *__qback;
-      struct {
-        unsigned char __w[4];
-        int __t;
-      } __qmtx;
-    } cnd_t;
+    #include 
+  }];
+}
+
+def ThrdT : TypeDecl<"thrd_t"> {
+  let Decl = [{
+    #include 
   }];
 }
 
 def ThreadStartT : TypeDecl<"thrd_start_t"> {
-  let Decl = "typedef int (*thrd_start_t)(void *);";
+  let Decl = [{
+    #include 
+  }];
 }
 
 def CallOnceFuncT : TypeDecl<"__call_once_func_t"> {
   let Decl = [{
-    typedef void(*__call_once_func_t)(void);
+    #include 
   }];
 }
 
@@ -440,6 +397,7 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
     CallOnceFuncT,
     MtxT,
     CndT,
+    ThrdT,
     ThreadStartT,
   ];
 
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index ba6b47c3d1859..0b62563f3170a 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -20,6 +20,8 @@ add_gen_header(
   GEN_HDR fenv.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.fenv_t
+    .llvm-libc-types.fexcept_t
 )
 
 add_gen_header(
@@ -28,6 +30,7 @@ add_gen_header(
   GEN_HDR inttypes.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.imaxdiv_t
 )
 
 add_gen_header(
@@ -54,6 +57,7 @@ add_gen_header(
   GEN_HDR string.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.size_t
 )
 
 add_gen_header(
@@ -62,18 +66,22 @@ add_gen_header(
   GEN_HDR time.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.time_t
+    .llvm-libc-types.tm
 )
 
 add_gen_header(
   threads
   DEF_FILE threads.h.def
   GEN_HDR threads.h
-  PARAMS
-    platform_threads=../config/${LIBC_TARGET_OS}/threads.h.in
   DEPENDS
     .llvm_libc_common_h
-  DATA_FILES
-    ../config/${LIBC_TARGET_OS}/threads.h.in
+    .llvm-libc-types.__call_once_func_t
+    .llvm-libc-types.cnd_t
+    .llvm-libc-types.mtx_t
+    .llvm-libc-types.once_flag
+    .llvm-libc-types.thrd_start_t
+    .llvm-libc-types.thrd_t
 )
 
 add_gen_header(
@@ -94,6 +102,9 @@ add_gen_header(
   GEN_HDR signal.h
   DATA_FILES
     ../config/${LIBC_TARGET_OS}/signal.h.in
+  DEPENDS
+    .llvm-libc-types.__sigaction
+    .llvm-libc-types.__sighandler_t
 )
 
 add_gen_header(
@@ -102,6 +113,8 @@ add_gen_header(
   GEN_HDR stdio.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.FILE
+    .llvm-libc-types.size_t
 )
 
 add_gen_header(
@@ -110,6 +123,12 @@ add_gen_header(
   GEN_HDR stdlib.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.__bsearchcompare_t
+    .llvm-libc-types.__qsortcompare_t
+    .llvm-libc-types.div_t
+    .llvm-libc-types.ldiv_t
+    .llvm-libc-types.lldiv_t
+    .llvm-libc-types.size_t
 )
 
 add_gen_header(
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index ced6a38a2de3f..73c295843e2a4 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -1,5 +1,24 @@
+add_header(__bsearchcompare_t HDR __bsearchcompare_t.h)
+add_header(__call_once_func_t HDR __call_once_func_t.h)
+add_header(__qsortcompare_t HDR __qsortcompare_t.h)
+add_header(__sigaction HDR __sigaction.h)
+add_header(__sighandler_t HDR __sighandler_t.h)
+add_header(cnd_t HDR cnd_t.h)
 add_header(double_t HDR double_t.h)
+add_header(div_t HDR div_t.h)
+add_header(ldiv_t HDR ldiv_t.h)
+add_header(lldiv_t HDR lldiv_t.h)
+add_header(FILE HDR FILE.h)
+add_header(fenv_t HDR fenv_t.h)
+add_header(fexcept_t HDR fexcept_t.h)
 add_header(float_t HDR float_t.h)
+add_header(imaxdiv_t HDR imaxdiv_t.h)
+add_header(mtx_t HDR mtx_t.h)
 add_header(off_t HDR off_t.h)
+add_header(once_flag HDR once_flag.h)
 add_header(size_t HDR size_t.h)
 add_header(ssize_t HDR ssize_t.h)
+add_header(thrd_start_t HDR thrd_start_t.h)
+add_header(thrd_t HDR thrd_t.h)
+add_header(time_t HDR time_t.h)
+add_header(tm HDR tm.h)
diff --git a/libc/include/llvm-libc-types/FILE.h b/libc/include/llvm-libc-types/FILE.h
new file mode 100644
index 0000000000000..1c1ff97ec86a5
--- /dev/null
+++ b/libc/include/llvm-libc-types/FILE.h
@@ -0,0 +1,14 @@
+//===-- Definition of the type FILE ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_FILE_H__
+#define __LLVM_LIBC_TYPES_FILE_H__
+
+typedef struct FILE FILE;
+
+#endif // __LLVM_LIBC_TYPES_FILE_H__
diff --git a/libc/include/llvm-libc-types/__bsearchcompare_t.h b/libc/include/llvm-libc-types/__bsearchcompare_t.h
new file mode 100644
index 0000000000000..40ebc7f356688
--- /dev/null
+++ b/libc/include/llvm-libc-types/__bsearchcompare_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of type __bsearchcompare_t -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_BSEARCHCOMPARE_T_H__
+#define __LLVM_LIBC_TYPES_BSEARCHCOMPARE_T_H__
+
+typedef int (*__bsearchcompare_t)(const void *, const void *);
+
+#endif // __LLVM_LIBC_TYPES_BSEARCHCOMPARE_T_H__
diff --git a/libc/include/llvm-libc-types/__call_once_func_t.h b/libc/include/llvm-libc-types/__call_once_func_t.h
new file mode 100644
index 0000000000000..bc8ed8331bd80
--- /dev/null
+++ b/libc/include/llvm-libc-types/__call_once_func_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of __call_once_func_t type -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_CALL_ONCE_FUNC_T_H__
+#define __LLVM_LIBC_TYPES_CALL_ONCE_FUNC_T_H__
+
+typedef void (*__call_once_func_t)(void);
+
+#endif // __LLVM_LIBC_TYPES_CALL_ONCE_FUNC_T_H__
diff --git a/libc/include/llvm-libc-types/__qsortcompare_t.h b/libc/include/llvm-libc-types/__qsortcompare_t.h
new file mode 100644
index 0000000000000..82bd4cc1fcd03
--- /dev/null
+++ b/libc/include/llvm-libc-types/__qsortcompare_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of type __qsortcompare_t -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_QSORTCOMPARE_T_H__
+#define __LLVM_LIBC_TYPES_QSORTCOMPARE_T_H__
+
+typedef int (*__qsortcompare_t)(const void *, const void *);
+
+#endif // __LLVM_LIBC_TYPES_QSORTCOMPARE_T_H__
diff --git a/libc/include/llvm-libc-types/__sigaction.h b/libc/include/llvm-libc-types/__sigaction.h
new file mode 100644
index 0000000000000..1c7243c0e921f
--- /dev/null
+++ b/libc/include/llvm-libc-types/__sigaction.h
@@ -0,0 +1,22 @@
+//===-- Definition of struct __sigaction ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_SIGACTION_H__
+#define __LLVM_LIBC_TYPES_SIGACTION_H__
+
+struct __sigaction {
+  union {
+    void (*sa_handler)(int);
+    void (*sa_action)(int, siginfo_t *, void *);
+  };
+  sigset_t sa_mask;
+  int sa_flags;
+  void (*sa_restorer)(void);
+};
+
+#endif // __LLVM_LIBC_TYPES_SIGACTION_H__
diff --git a/libc/include/llvm-libc-types/__sighandler_t.h b/libc/include/llvm-libc-types/__sighandler_t.h
new file mode 100644
index 0000000000000..bd0ad98d85295
--- /dev/null
+++ b/libc/include/llvm-libc-types/__sighandler_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of struct __sighandler_t -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_SIGHANDLER_T_H__
+#define __LLVM_LIBC_TYPES_SIGHANDLER_T_H__
+
+typedef void (*__sighandler_t)(int);
+
+#endif // __LLVM_LIBC_TYPES_SIGHANDLER_T_H__
diff --git a/libc/include/llvm-libc-types/cnd_t.h b/libc/include/llvm-libc-types/cnd_t.h
new file mode 100644
index 0000000000000..62f0636fc7bcf
--- /dev/null
+++ b/libc/include/llvm-libc-types/cnd_t.h
@@ -0,0 +1,21 @@
+//===-- Definition of cnd_t type ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_CND_T_H__
+#define __LLVM_LIBC_TYPES_CND_T_H__
+
+typedef struct {
+  void *__qfront;
+  void *__qback;
+  struct {
+    unsigned char __w[4];
+    int __t;
+  } __qmtx;
+} cnd_t;
+
+#endif // __LLVM_LIBC_TYPES_CND_T_H__
diff --git a/libc/include/llvm-libc-types/div_t.h b/libc/include/llvm-libc-types/div_t.h
new file mode 100644
index 0000000000000..e495a1c3f9dcc
--- /dev/null
+++ b/libc/include/llvm-libc-types/div_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type div_t ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_DIV_T_H__
+#define __LLVM_LIBC_TYPES_DIV_T_H__
+
+typedef struct {
+  int quot;
+  int rem;
+} div_t;
+
+#endif // __LLVM_LIBC_TYPES_DIV_T_H__
diff --git a/libc/include/llvm-libc-types/fenv_t.h b/libc/include/llvm-libc-types/fenv_t.h
new file mode 100644
index 0000000000000..a95e08179cceb
--- /dev/null
+++ b/libc/include/llvm-libc-types/fenv_t.h
@@ -0,0 +1,25 @@
+//===-- Definition of type fenv_t -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_FENV_T_H__
+#define __LLVM_LIBC_TYPES_FENV_T_H__
+
+#ifdef __aarch64__
+typedef struct {
+  unsigned char __control_word[4];
+  unsigned char __status_word[4];
+} fenv_t;
+#endif
+#ifdef __x86_64__
+typedef struct {
+  unsigned char __x86_status[28];
+  unsigned char __mxcsr[4];
+} fenv_t;
+#endif
+
+#endif // __LLVM_LIBC_TYPES_FENV_T_H__
diff --git a/libc/include/llvm-libc-types/fexcept_t.h b/libc/include/llvm-libc-types/fexcept_t.h
new file mode 100644
index 0000000000000..6e7969c1be0a8
--- /dev/null
+++ b/libc/include/llvm-libc-types/fexcept_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of fexcept_t type --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_FEXCEPT_T_H__
+#define __LLVM_LIBC_TYPES_FEXCEPT_T_H__
+
+typedef int fexcept_t;
+
+#endif // __LLVM_LIBC_TYPES_FEXCEPT_T_H__
diff --git a/libc/include/llvm-libc-types/imaxdiv_t.h b/libc/include/llvm-libc-types/imaxdiv_t.h
new file mode 100644
index 0000000000000..5062b643065a7
--- /dev/null
+++ b/libc/include/llvm-libc-types/imaxdiv_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type imaxdiv_t --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_IMAXDIV_T_H__
+#define __LLVM_LIBC_TYPES_IMAXDIV_T_H__
+
+typedef struct {
+  intmax_t quot;
+  intmax_t rem;
+} imaxdiv_t;
+
+#endif // __LLVM_LIBC_TYPES_IMAXDIV_T_H__
diff --git a/libc/include/llvm-libc-types/ldiv_t.h b/libc/include/llvm-libc-types/ldiv_t.h
new file mode 100644
index 0000000000000..9bd8d253330a0
--- /dev/null
+++ b/libc/include/llvm-libc-types/ldiv_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type ldiv_t -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_LDIV_T_H__
+#define __LLVM_LIBC_TYPES_LDIV_T_H__
+
+typedef struct {
+  long quot;
+  long rem;
+} ldiv_t;
+
+#endif // __LLVM_LIBC_TYPES_LDIV_T_H__
diff --git a/libc/include/llvm-libc-types/lldiv_t.h b/libc/include/llvm-libc-types/lldiv_t.h
new file mode 100644
index 0000000000000..109304d120787
--- /dev/null
+++ b/libc/include/llvm-libc-types/lldiv_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type lldiv_t ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_LLDIV_T_H__
+#define __LLVM_LIBC_TYPES_LLDIV_T_H__
+
+typedef struct {
+  long long quot;
+  long long rem;
+} lldiv_t;
+
+#endif // __LLVM_LIBC_TYPES_LLDIV_T_H__
diff --git a/libc/include/llvm-libc-types/mtx_t.h b/libc/include/llvm-libc-types/mtx_t.h
new file mode 100644
index 0000000000000..42f27297bd0af
--- /dev/null
+++ b/libc/include/llvm-libc-types/mtx_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of mtx_t type ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_MTX_T_H__
+#define __LLVM_LIBC_TYPES_MTX_T_H__
+
+typedef struct {
+  unsigned char __internal_data[4];
+  int __mtx_type;
+} mtx_t;
+
+#endif // __LLVM_LIBC_TYPES_MTX_T_H__
diff --git a/libc/include/llvm-libc-types/once_flag.h b/libc/include/llvm-libc-types/once_flag.h
new file mode 100644
index 0000000000000..4987bda38bbe4
--- /dev/null
+++ b/libc/include/llvm-libc-types/once_flag.h
@@ -0,0 +1,14 @@
+//===-- Definition of once_flag type --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ONCE_FLAG_H__
+#define __LLVM_LIBC_TYPES_ONCE_FLAG_H__
+
+typedef unsigned int once_flag;
+
+#endif // __LLVM_LIBC_TYPES_ONCE_FLAg_H__
diff --git a/libc/include/llvm-libc-types/thrd_start_t.h b/libc/include/llvm-libc-types/thrd_start_t.h
new file mode 100644
index 0000000000000..83fc32cbd1f87
--- /dev/null
+++ b/libc/include/llvm-libc-types/thrd_start_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of thrd_start_t type -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_THRD_START_T_H__
+#define __LLVM_LIBC_TYPES_THRD_START_T_H__
+
+typedef int (*thrd_start_t)(void *);
+
+#endif // __LLVM_LIBC_TYPES_THRD_START_T_H__
diff --git a/libc/config/linux/threads.h.in b/libc/include/llvm-libc-types/thrd_t.h
similarity index 70%
rename from libc/config/linux/threads.h.in
rename to libc/include/llvm-libc-types/thrd_t.h
index cd45ebfec9bc1..ebbf9b0a36506 100644
--- a/libc/config/linux/threads.h.in
+++ b/libc/include/llvm-libc-types/thrd_t.h
@@ -1,4 +1,4 @@
-//===-- Linux specific definitions of types from threads.h ----------------===//
+//===-- Definition of thrd_t type -----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-%%begin()
+#ifndef __LLVM_LIBC_TYPES_THRD_T_H__
+#define __LLVM_LIBC_TYPES_THRD_T_H__
 
 typedef struct {
   unsigned char __clear_tid[4];
@@ -15,3 +16,5 @@ typedef struct {
   int __stack_size;
   int __retval;
 } thrd_t;
+
+#endif // __LLVM_LIBC_TYPES_THRD_T_H__
diff --git a/libc/include/llvm-libc-types/time_t.h b/libc/include/llvm-libc-types/time_t.h
new file mode 100644
index 0000000000000..13c33b07c2238
--- /dev/null
+++ b/libc/include/llvm-libc-types/time_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of the type time_t -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_TIME_T_H__
+#define __LLVM_LIBC_TYPES_TIME_T_H__
+
+typedef long time_t;
+
+#endif // __LLVM_LIBC_TYPES_TIME_T_H__
diff --git a/libc/include/llvm-libc-types/tm.h b/libc/include/llvm-libc-types/tm.h
new file mode 100644
index 0000000000000..953e12e819c3a
--- /dev/null
+++ b/libc/include/llvm-libc-types/tm.h
@@ -0,0 +1,24 @@
+//===-- Definition of struct tm -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_TM_H__
+#define __LLVM_LIBC_TYPES_TM_H__
+
+struct tm {
+  int tm_sec;   // seconds after the minute
+  int tm_min;   // minutes after the hour
+  int tm_hour;  // hours since midnight
+  int tm_mday;  // day of the month
+  int tm_mon;   // months since January
+  int tm_year;  // years since 1900
+  int tm_wday;  // days since Sunday
+  int tm_yday;  // days since January
+  int tm_isdst; // Daylight Saving Time flag
+};
+
+#endif // __LLVM_LIBC_TYPES_TM_H__
diff --git a/libc/include/threads.h.def b/libc/include/threads.h.def
index c8a24e0979db9..93541b8d3bac4 100644
--- a/libc/include/threads.h.def
+++ b/libc/include/threads.h.def
@@ -11,8 +11,6 @@
 
 #include <__llvm-libc-common.h>
 
-%%include_file(${platform_threads})
-
 %%public_api()
 
 #endif // LLVM_LIBC_THREADS_H

From 5e88f527da2175019e443d3600ade8f23e84f116 Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Thu, 20 Jan 2022 14:57:31 -0800
Subject: [PATCH 099/946] [RISCV] Remove RISCVSubtarget::hasStdExtV() and
 hasStdExtZve*(). NFC

All code should use one of the cleaner named hasVInstructions*
functions. Fix the two uses that weren't and delete the methods
so no new uses can be created.
---
 llvm/lib/Target/RISCV/RISCV.td         | 4 ++--
 llvm/lib/Target/RISCV/RISCVSubtarget.h | 6 ------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 7972ced08edd8..b5d2bd01d3552 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -200,12 +200,12 @@ def HasVInstructions    : Predicate<"Subtarget->hasVInstructions()">,
           (any_of FeatureStdExtZve32x),
           "'V' (Vector Extension for Application Processors), 'Zve32x' or "
           "'Zve64x' (Vector Extensions for Embedded Processors)">;
-def HasVInstructionsI64 : Predicate<"Subtarget->hasStdExtZve64x()">,
+def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve64x),
           "'V' (Vector Extension for Application Processors) or 'Zve64x' "
           "(Vector Extensions for Embedded Processors)">;
-def HasVInstructionsAnyF : Predicate<"Subtarget->hasStdExtZve32f()">,
+def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve32f),
           "'V' (Vector Extension for Application Processors), 'Zve32f', "
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index d55affd0539be..4b5958ad38d9c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -153,12 +153,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZbr() const { return HasStdExtZbr; }
   bool hasStdExtZbs() const { return HasStdExtZbs; }
   bool hasStdExtZbt() const { return HasStdExtZbt; }
-  bool hasStdExtV() const { return HasStdExtV; }
-  bool hasStdExtZve32x() const { return HasStdExtZve32x; }
-  bool hasStdExtZve32f() const { return HasStdExtZve32f; }
-  bool hasStdExtZve64x() const { return HasStdExtZve64x; }
-  bool hasStdExtZve64f() const { return HasStdExtZve64f; }
-  bool hasStdExtZve64d() const { return HasStdExtZve64d; }
   bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; }
   bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
   bool hasStdExtZfh() const { return HasStdExtZfh; }

From a5684114445a72b5c0bb5b7b68a5c6eb3486b66d Mon Sep 17 00:00:00 2001
From: CJ Johnson 
Date: Thu, 20 Jan 2022 18:05:07 -0500
Subject: [PATCH 100/946] [clang-tidy] Update bugprone-stringview-nullptr to
 consistently prefer the empty string when passing arguments to
 constructors/functions

Previously, function(nullptr) would have been fixed with function({}). This unfortunately can change overload resolution and even become ambiguous. T(nullptr) was already being fixed with T(""), so this change just brings function calls in line with that.

Differential Revision: https://reviews.llvm.org/D117840
---
 .../bugprone/StringviewNullptrCheck.cpp       | 19 ++++++++--------
 .../checks/bugprone-stringview-nullptr.rst    |  4 ++--
 .../checkers/bugprone-stringview-nullptr.cpp  | 22 +++++++++----------
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp
index a0ae262318914..b45aa93533b08 100644
--- a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp
@@ -44,6 +44,9 @@ RewriteRule StringviewNullptrCheckImpl() {
   auto static_cast_warning =
       cat("casting to basic_string_view from null is undefined; replace with "
           "the empty string");
+  auto argument_construction_warning =
+      cat("passing null as basic_string_view is undefined; replace with the "
+          "empty string");
   auto assignment_warning =
       cat("assignment to basic_string_view from null is undefined; replace "
           "with the default constructor");
@@ -53,9 +56,6 @@ RewriteRule StringviewNullptrCheckImpl() {
   auto equality_comparison_warning =
       cat("comparing basic_string_view to null is undefined; replace with the "
           "emptiness query");
-  auto constructor_argument_warning =
-      cat("passing null as basic_string_view is undefined; replace with the "
-          "empty string");
 
   // Matches declarations and expressions of type `basic_string_view`
   auto HasBasicStringViewType = hasType(hasUnqualifiedDesugaredType(recordType(
@@ -211,11 +211,12 @@ RewriteRule StringviewNullptrCheckImpl() {
       remove(node("null_arg_expr")), construction_warning);
 
   // `function(null_arg_expr)`
-  auto HandleFunctionArgumentInitialization = makeRule(
-      callExpr(hasAnyArgument(
-                   ignoringImpCasts(BasicStringViewConstructingFromNullExpr)),
-               unless(cxxOperatorCallExpr())),
-      changeTo(node("construct_expr"), cat("{}")), construction_warning);
+  auto HandleFunctionArgumentInitialization =
+      makeRule(callExpr(hasAnyArgument(ignoringImpCasts(
+                            BasicStringViewConstructingFromNullExpr)),
+                        unless(cxxOperatorCallExpr())),
+               changeTo(node("construct_expr"), cat("\"\"")),
+               argument_construction_warning);
 
   // `sv = null_arg_expr`
   auto HandleAssignment = makeRule(
@@ -268,7 +269,7 @@ RewriteRule StringviewNullptrCheckImpl() {
                                       BasicStringViewConstructingFromNullExpr)),
                    unless(HasBasicStringViewType)),
                changeTo(node("construct_expr"), cat("\"\"")),
-               constructor_argument_warning);
+               argument_construction_warning);
 
   return applyFirst(
       {HandleTemporaryCXXFunctionalCastExpr,
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst
index 198ad398ec7b7..7138c97b745ae 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst
@@ -43,9 +43,9 @@ is translated into...
   bool is_empty = sv.empty();
   bool isnt_empty = !sv.empty();
 
-  accepts_sv({});
+  accepts_sv("");
 
-  accepts_sv({});  // A
+  accepts_sv("");  // A
 
   accepts_sv({nullptr, 0});  // B
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp
index 322c8eeca754e..02fcab31dcf3e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp
@@ -1039,24 +1039,24 @@ void function_argument_initialization() /* f */ {
   // Function Argument Initialization
   {
     function(nullptr) /* f1 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f1 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing null as basic_string_view is undefined; replace with the empty string
+    // CHECK-FIXES: {{^}}    function("") /* f1 */;
 
     function((nullptr)) /* f2 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f2 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f2 */;
 
     function({nullptr}) /* f3 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f3 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f3 */;
 
     function({(nullptr)}) /* f4 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f4 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f4 */;
 
     function({{}}) /* f5 */; // Default `const CharT*`
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f5 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f5 */;
   }
 
   // Function Argument Initialization With Temporary
@@ -1599,7 +1599,7 @@ void constructor_invocation() /* r */ {
   struct AcceptsSV {
     explicit AcceptsSV(std::string_view) {}
   } r1(nullptr);
-  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: passing null as basic_string_view is undefined; replace with the empty string
+  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: passing{{.*}}empty string
   // CHECK-FIXES: {{^}}  } r1("");
 
   (void)(AcceptsSV{nullptr}) /* r2 */;

From 8457b61699e8b8315579c8c2ae2a2d47f28d6fc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= 
Date: Thu, 20 Jan 2022 21:16:46 +0100
Subject: [PATCH 101/946] [test] Add tests for bswap combining. NFC

---
 .../test/Transforms/InstCombine/bswap-fold.ll | 270 ++++++++++++++++++
 1 file changed, 270 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index da7380e0ab74e..9ffae93bbbc7b 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -355,6 +355,276 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
   ret i64 %t3
 }
 
+
+define i64 @bs_active_high8(i64 %0) {
+; CHECK-LABEL: @bs_active_high8(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = shl i64 %0, 56
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+define i32 @bs_active_high7(i32 %0) {
+; CHECK-LABEL: @bs_active_high7(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = and i32 %0, -33554432  ; 0xfe000000
+  %3 = call i32 @llvm.bswap.i32(i32 %2)
+  ret i32 %3
+}
+
+define <2 x i64> @bs_active_high4(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high4(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0, 
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high_different(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0, 
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+; negative test
+define <2 x i64> @bs_active_high_different_negative(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high_different_negative(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0,   ; second elem has 9 active high bits
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+; negative test
+define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high_undef(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0, 
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+define i64 @bs_active_high8_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_high8_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = shl i64 %0, 56
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of shl and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_high7_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_high7_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = shl i64 %0, 57
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of shl and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_byte_6h(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_6h(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 280375465082880  ; 0xff00'00000000
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+define i32 @bs_active_byte_3h(i32 %0) {
+; CHECK-LABEL: @bs_active_byte_3h(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = and i32 %0, 393216  ; 0x0006'0000
+  %3 = call i32 @llvm.bswap.i32(i32 %2)
+  ret i32 %3
+}
+
+define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_byte_3h_v2(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0,   ; 0x0080'0000, 0x0001'0000
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+; negative test
+define i64 @bs_active_byte_78h(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_78h(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 108086391056891904
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 108086391056891904  ; 0x01800000'00000000
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+
+define i16 @bs_active_low1(i16 %0) {
+; CHECK-LABEL: @bs_active_low1(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+  %2 = lshr i16 %0, 15
+  %3 = call i16 @llvm.bswap.i16(i16 %2)
+  ret i16 %3
+}
+
+define <2 x i32> @bs_active_low8(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low8(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, 
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low_different(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, 
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+; negative test
+define <2 x i32> @bs_active_low_different_negative(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low_different_negative(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, 
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+; negative test
+define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low_undef(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, 
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+define i64 @bs_active_low8_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_low8_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = and i64 %0, 255
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of and and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_low7_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_low7_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = and i64 %0, 127
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of and and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_byte_4l(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_4l(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 1140850688  ; 0x44000000
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+define i32 @bs_active_byte_2l(i32 %0) {
+; CHECK-LABEL: @bs_active_byte_2l(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = and i32 %0, 65280  ; 0xff00
+  %3 = call i32 @llvm.bswap.i32(i32 %2)
+  ret i32 %3
+}
+
+define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_byte_2l_v2(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = and <2 x i64> %0,   ; 0x0100, 0xff00
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+; negative test
+define i64 @bs_active_byte_12l(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_12l(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 384
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 384  ; 0x0180
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+
 declare i16 @llvm.bswap.i16(i16)
 declare i32 @llvm.bswap.i32(i32)
 declare i64 @llvm.bswap.i64(i64)

From ac2f3df8396ac5bc507bd84eec185d756420e47c Mon Sep 17 00:00:00 2001
From: Sam Clegg 
Date: Wed, 19 Jan 2022 18:17:08 -0800
Subject: [PATCH 102/946] [lld][WebAssembly] Remove redundant config setting

Unresolved symbols are not currently reported when building with
`-shared` or `-pie` so setting unresolvedSymbols doesn't have any
effect.

Differential Revision: https://reviews.llvm.org/D117737
---
 lld/wasm/Driver.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 7523755806ab9..cb7ca13ebd6ae 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -474,7 +474,6 @@ static void setConfigs() {
   if (config->shared) {
     config->importMemory = true;
     config->importUndefined = true;
-    config->unresolvedSymbols = UnresolvedPolicy::Ignore;
   }
 }
 

From d92e5412ea571158b5b524855d19b5eafa0567ce Mon Sep 17 00:00:00 2001
From: Clint Caywood 
Date: Thu, 20 Jan 2022 23:27:05 +0000
Subject: [PATCH 103/946] [libc] Use __builtin_clz to find leading 1 in hypot

This is an optimization that using a single CPU instruction on supported
architectures (amd64 and aarch64, but possibly others) to replace what was
previously an iterative look-up-table algorithm.

Originally I suggested using inline assembly for this in
https://reviews.llvm.org/D117584.

Reviewed By: lntue, sivachandra

Differential Revision: https://reviews.llvm.org/D117684
---
 libc/src/__support/FPUtil/Hypot.h | 44 ++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h
index 15b26798ccb54..bb658b0085fea 100644
--- a/libc/src/__support/FPUtil/Hypot.h
+++ b/libc/src/__support/FPUtil/Hypot.h
@@ -22,33 +22,39 @@ namespace internal {
 template 
 static inline T find_leading_one(T mant, int &shift_length);
 
+// The following overloads are matched based on what is accepted by
+// __builtin_clz* rather than using the exactly-sized aliases from stdint.h
+// (such as uint32_t). There are 3 overloads even though 2 will only ever be
+// used by a specific platform, since unsigned long varies in size depending on
+// the word size of the architecture.
+
 template <>
-inline uint32_t find_leading_one(uint32_t mant, int &shift_length) {
+inline unsigned int find_leading_one(unsigned int mant,
+                                                   int &shift_length) {
   shift_length = 0;
-  constexpr int NSTEPS = 5;
-  constexpr uint32_t BOUNDS[NSTEPS] = {1 << 16, 1 << 8, 1 << 4, 1 << 2, 1 << 1};
-  constexpr int SHIFTS[NSTEPS] = {16, 8, 4, 2, 1};
-  for (int i = 0; i < NSTEPS; ++i) {
-    if (mant >= BOUNDS[i]) {
-      shift_length += SHIFTS[i];
-      mant >>= SHIFTS[i];
-    }
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clz(mant);
   }
   return 1U << shift_length;
 }
 
 template <>
-inline uint64_t find_leading_one(uint64_t mant, int &shift_length) {
+inline unsigned long find_leading_one(unsigned long mant,
+                                                     int &shift_length) {
   shift_length = 0;
-  constexpr int NSTEPS = 6;
-  constexpr uint64_t BOUNDS[NSTEPS] = {1ULL << 32, 1ULL << 16, 1ULL << 8,
-                                       1ULL << 4,  1ULL << 2,  1ULL << 1};
-  constexpr int SHIFTS[NSTEPS] = {32, 16, 8, 4, 2, 1};
-  for (int i = 0; i < NSTEPS; ++i) {
-    if (mant >= BOUNDS[i]) {
-      shift_length += SHIFTS[i];
-      mant >>= SHIFTS[i];
-    }
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clzl(mant);
+  }
+  return 1UL << shift_length;
+}
+
+template <>
+inline unsigned long long
+find_leading_one(unsigned long long mant,
+                                     int &shift_length) {
+  shift_length = 0;
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clzll(mant);
   }
   return 1ULL << shift_length;
 }

From 5501c16edf0f0211386edee9029af5a2b92fcc17 Mon Sep 17 00:00:00 2001
From: Peter Klausler 
Date: Tue, 11 Jan 2022 14:54:31 -0800
Subject: [PATCH 104/946] [flang] Fix OPEN/WRITE(SIGN='SUPPRESS')

The keyword value was misspelled in the runtime.

Differential Revision: https://reviews.llvm.org/D117816
---
 flang/runtime/io-api.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 811095417ffb8..2f12bfcad350d 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -593,7 +593,8 @@ bool IONAME(SetRound)(Cookie cookie, const char *keyword, std::size_t length) {
 bool IONAME(SetSign)(Cookie cookie, const char *keyword, std::size_t length) {
   IoStatementState &io{*cookie};
   ConnectionState &connection{io.GetConnectionState()};
-  static const char *keywords[]{"PLUS", "YES", "PROCESSOR_DEFINED", nullptr};
+  static const char *keywords[]{
+      "PLUS", "SUPPRESS", "PROCESSOR_DEFINED", nullptr};
   switch (IdentifyValue(keyword, length, keywords)) {
   case 0:
     connection.modes.editingFlags |= signPlus;

From 9ddd07922f65ec7b633228b8b71076031355937e Mon Sep 17 00:00:00 2001
From: Peter Klausler 
Date: Wed, 12 Jan 2022 15:48:06 -0800
Subject: [PATCH 105/946] [flang] Handle FLUSH(unknown unit)

The unit number passed to a FLUSH statement is not required to
be a valid open unit; nothing happens (esp. not the creation of
an empty fort.n file) in this case.

Differential Revision: https://reviews.llvm.org/D117819
---
 flang/runtime/io-api.cpp | 19 ++++++++++++-------
 flang/runtime/io-stmt.h  |  8 ++++----
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 2f12bfcad350d..03c878d1a8506 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -317,7 +317,7 @@ Cookie IONAME(BeginClose)(
   } else {
     // CLOSE(UNIT=bad unit) is just a no-op
     Terminator oom{sourceFile, sourceLine};
-    return &New{oom}(sourceFile, sourceLine)
+    return &New{oom}(sourceFile, sourceLine)
                 .release()
                 ->ioStatementState();
   }
@@ -325,11 +325,16 @@ Cookie IONAME(BeginClose)(
 
 Cookie IONAME(BeginFlush)(
     ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
-  Terminator terminator{sourceFile, sourceLine};
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
-  return &unit.BeginIoStatement(
-      unit, ExternalMiscIoStatementState::Flush, sourceFile, sourceLine);
+  if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) {
+    return &unit->BeginIoStatement(
+        *unit, ExternalMiscIoStatementState::Flush, sourceFile, sourceLine);
+  } else {
+    // FLUSH(UNIT=unknown) is a no-op
+    Terminator oom{sourceFile, sourceLine};
+    return &New{oom}(sourceFile, sourceLine)
+                .release()
+                ->ioStatementState();
+  }
 }
 
 Cookie IONAME(BeginBackspace)(
@@ -880,7 +885,7 @@ bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) {
     }
     return false;
   }
-  if (io.get_if()) {
+  if (io.get_if()) {
     return true; // don't bother validating STATUS= in a no-op CLOSE
   }
   io.GetIoErrorHandler().Crash(
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index bf84f4a3369a0..8327326c7f9ef 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -34,7 +34,7 @@ class InquireUnconnectedFileState;
 class InquireIOLengthState;
 class ExternalMiscIoStatementState;
 class CloseStatementState;
-class NoopCloseStatementState;
+class NoopStatementState; // CLOSE or FLUSH on unknown unit
 
 template 
 class InternalFormattedIoStatementState;
@@ -238,7 +238,7 @@ class IoStatementState {
 private:
   std::variant,
       std::reference_wrapper,
-      std::reference_wrapper,
+      std::reference_wrapper,
       std::reference_wrapper<
           InternalFormattedIoStatementState>,
       std::reference_wrapper<
@@ -616,9 +616,9 @@ class NoUnitIoStatementState : public IoStatementBase {
   ConnectionState connection_;
 };
 
-class NoopCloseStatementState : public NoUnitIoStatementState {
+class NoopStatementState : public NoUnitIoStatementState {
 public:
-  NoopCloseStatementState(const char *sourceFile, int sourceLine)
+  NoopStatementState(const char *sourceFile, int sourceLine)
       : NoUnitIoStatementState{sourceFile, sourceLine, *this} {}
   void set_status(CloseStatus) {} // discards
 };

From e1b7bd911d9e491e50db1aa00d340fb3b6b907ef Mon Sep 17 00:00:00 2001
From: Dave Airlie 
Date: Fri, 21 Jan 2022 09:49:41 +1000
Subject: [PATCH 106/946] [OpenCL] opencl-c.h: add __opencl_c_images and 
 __opencl_c_read_write_images

This wraps the image and rw images usages in the correct macros

Reviewed By: Anastasia

Differential Revision: https://reviews.llvm.org/D107539
---
 clang/lib/Headers/opencl-c.h | 94 +++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 40 deletions(-)

diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 77a7a8b9bb3a1..e65e3634d010d 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -11,11 +11,11 @@
 
 #include "opencl-c-base.h"
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_images)
 #ifndef cl_khr_depth_images
 #define cl_khr_depth_images
 #endif //cl_khr_depth_images
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_images)
 
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
 #ifdef cl_khr_3d_image_writes
@@ -15585,7 +15585,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
 #endif //cl_khr_fp16
 
 // Image read functions for read_write images
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);
 int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);
 uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);
@@ -15628,7 +15628,6 @@ float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 co
 float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 #ifdef cl_khr_mipmap_image
 float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
 int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
@@ -15679,7 +15678,6 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler,
 uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
 
 #endif //cl_khr_mipmap_image
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 
 // Image read functions returning half4 type
 #ifdef cl_khr_fp16
@@ -15690,7 +15688,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);
 half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);
 half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);
 #endif //cl_khr_fp16
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images
 
 /**
  * Write color value to location specified by coordinate
@@ -15834,7 +15832,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col
 #endif //cl_khr_fp16
 
 // Image write functions for read_write images
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);
 void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);
 void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);
@@ -15866,7 +15864,6 @@ void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float col
 void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);
 #endif //cl_khr_depth_images
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 #if defined(cl_khr_mipmap_image_writes)
 void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);
 void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);
@@ -15894,7 +15891,6 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4
 #endif //cl_khr_3d_image_writes
 
 #endif //cl_khr_mipmap_image_writes
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 
 // Image write functions for half4 type
 #ifdef cl_khr_fp16
@@ -15907,7 +15903,7 @@ void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 col
 void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);
 void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);
 #endif //cl_khr_fp16
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have
 // access qualifier, which by default assume read_only access qualifier. Image query builtin
@@ -15955,7 +15951,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);
 int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_width(read_write image1d_t image);
 int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);
 int __ovld __cnfn get_image_width(read_write image2d_t image);
@@ -15972,7 +15968,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);
 int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image height in pixels.
@@ -16007,7 +16003,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);
 int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_height(read_write image2d_t image);
 int __ovld __cnfn get_image_height(read_write image3d_t image);
 int __ovld __cnfn get_image_height(read_write image2d_array_t image);
@@ -16021,7 +16017,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);
 int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image depth in pixels.
@@ -16032,9 +16028,9 @@ int __ovld __cnfn get_image_depth(read_only image3d_t image);
 int __ovld __cnfn get_image_depth(write_only image3d_t image);
 #endif
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_depth(read_write image3d_t image);
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 // OpenCL Extension v2.0 s9.18 - Mipmaps
 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
@@ -16130,7 +16126,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t im
 int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);
 int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);
 int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);
@@ -16147,7 +16143,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im
 int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image channel order. Valid values are:
@@ -16202,7 +16198,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image)
 int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_channel_order(read_write image1d_t image);
 int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);
 int __ovld __cnfn get_image_channel_order(read_write image2d_t image);
@@ -16219,7 +16215,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image)
 int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the 2D image width and height as an int2
@@ -16252,7 +16248,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);
 int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int2 __ovld __cnfn get_image_dim(read_write image2d_t image);
 int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);
 #ifdef cl_khr_depth_images
@@ -16265,7 +16261,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);
 int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);
 int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the 3D image width, height, and depth as an
@@ -16277,9 +16273,9 @@ int4 __ovld __cnfn get_image_dim(read_only image3d_t image);
 #ifdef cl_khr_3d_image_writes
 int4 __ovld __cnfn get_image_dim(write_only image3d_t image);
 #endif
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int4 __ovld __cnfn get_image_dim(read_write image3d_t image);
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image array size.
@@ -16305,7 +16301,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_
 size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);
 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);
 #ifdef cl_khr_depth_images
@@ -16315,7 +16311,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image
 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);
 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
 * Return the number of samples associated with image
@@ -16331,12 +16327,12 @@ int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);
 int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);
 int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld get_image_num_samples(read_write image2d_msaa_t image);
 int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);
 int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);
 int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 #endif
 
 // OpenCL v2.0 s6.13.15 - Work-group Functions
@@ -17572,34 +17568,38 @@ uint16  __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
 long    __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
 ulong   __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
 
+#if defined(__opencl_c_images)
 uint    __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
 uint2   __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
 uint4   __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
 uint8   __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
+#endif
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 uint    __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
 uint2   __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
 uint4   __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
 uint8   __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 uint    __ovld __conv intel_sub_group_block_read( const __global uint* p );
 uint2   __ovld __conv intel_sub_group_block_read2( const __global uint* p );
 uint4   __ovld __conv intel_sub_group_block_read4( const __global uint* p );
 uint8   __ovld __conv intel_sub_group_block_read8( const __global uint* p );
 
+#if defined(__opencl_c_images)
 void    __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
 void    __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
 void    __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
 void    __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void    __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
 void    __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
 void    __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
 void    __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 void    __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
 void    __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
@@ -17712,68 +17712,76 @@ ushort      __ovld __conv intel_sub_group_scan_inclusive_min( ushort  x );
 short       __ovld __conv intel_sub_group_scan_inclusive_max( short   x );
 ushort      __ovld __conv intel_sub_group_scan_inclusive_max( ushort  x );
 
+#if defined(__opencl_c_images)
 uint       __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
 uint2      __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
 uint4      __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
 uint8      __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 uint       __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
 uint2      __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
 uint4      __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
 uint8      __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 uint       __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
 uint2      __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
 uint4      __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
 uint8      __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
 
+#if defined(__opencl_c_images)
 void       __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
 void       __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
 void       __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
 void       __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
+#endif //defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void       __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
 void       __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
 void       __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
 void       __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 void       __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
 void       __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
 void       __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
 void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
 
+#if defined(__opencl_c_images)
 ushort      __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
 ushort2     __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
 ushort4     __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
 ushort8     __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 ushort      __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
 ushort2     __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
 ushort4     __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
 ushort8     __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 ushort      __ovld __conv intel_sub_group_block_read_us(  const __global ushort* p );
 ushort2     __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
 ushort4     __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
 ushort8     __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
 
+#if defined(__opencl_c_images)
 void        __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort  data);
 void        __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
 void        __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
 void        __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void        __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort  data);
 void        __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
 void        __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
 void        __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 void        __ovld __conv intel_sub_group_block_write_us(  __global ushort* p, ushort  data );
 void        __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
@@ -17891,6 +17899,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(
     short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,
     ushort2 image_size);
 
+#if defined(__opencl_c_images)
 intel_sub_group_avc_ime_result_t __ovld
 intel_sub_group_avc_ime_evaluate_with_single_reference(
     read_only image2d_t src_image, read_only image2d_t ref_image,
@@ -17931,6 +17940,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(
     read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
     intel_sub_group_avc_ime_payload_t payload,
     intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
+#endif
 
 intel_sub_group_avc_ime_single_reference_streamin_t __ovld
 intel_sub_group_avc_ime_get_single_reference_streamin(
@@ -17995,6 +18005,7 @@ intel_sub_group_avc_ref_payload_t __ovld
 intel_sub_group_avc_ref_set_bilinear_filter_enable(
     intel_sub_group_avc_ref_payload_t payload);
 
+#if defined(__opencl_c_images)
 intel_sub_group_avc_ref_result_t __ovld
 intel_sub_group_avc_ref_evaluate_with_single_reference(
     read_only image2d_t src_image, read_only image2d_t ref_image,
@@ -18013,6 +18024,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference(
     read_only image2d_t src_image, uint packed_reference_ids,
     uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
     intel_sub_group_avc_ref_payload_t payload);
+#endif //defined(__opencl_c_images)
 
 // SIC built-in functions
 intel_sub_group_avc_sic_payload_t __ovld
@@ -18063,6 +18075,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad(
     uchar block_based_skip_type,
     intel_sub_group_avc_sic_payload_t payload);
 
+#if defined(__opencl_c_images)
 intel_sub_group_avc_sic_result_t __ovld
 intel_sub_group_avc_sic_evaluate_ipe(
     read_only image2d_t src_image, sampler_t vme_media_sampler,
@@ -18085,6 +18098,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference(
     read_only image2d_t src_image, uint packed_reference_ids,
     uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
     intel_sub_group_avc_sic_payload_t payload);
+#endif //defined(__opencl_c_images)
 
 uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(
     intel_sub_group_avc_sic_result_t result);

From d3b188a2d72f9c398e4b1a36d23888c4ac783e9f Mon Sep 17 00:00:00 2001
From: Fabian Wolff 
Date: Thu, 20 Jan 2022 22:51:53 +0100
Subject: [PATCH 107/946] [clang-tidy] Include constructor initializers in
 `bugprone-exception-escape` check

Fixes PR#52435.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D113507
---
 .../clang-tidy/utils/ExceptionAnalyzer.cpp    | 18 ++++++++
 .../checkers/bugprone-exception-escape.cpp    | 43 +++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp
index 97895115a7d5c..1f22c1d936561 100644
--- a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp
+++ b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp
@@ -119,6 +119,16 @@ ExceptionAnalyzer::ExceptionInfo ExceptionAnalyzer::throwsException(
     CallStack.insert(Func);
     ExceptionInfo Result =
         throwsException(Body, ExceptionInfo::Throwables(), CallStack);
+
+    // For a constructor, we also have to check the initializers.
+    if (const auto *Ctor = dyn_cast(Func)) {
+      for (const CXXCtorInitializer *Init : Ctor->inits()) {
+        ExceptionInfo Excs = throwsException(
+            Init->getInit(), ExceptionInfo::Throwables(), CallStack);
+        Result.merge(Excs);
+      }
+    }
+
     CallStack.erase(Func);
     return Result;
   }
@@ -195,6 +205,14 @@ ExceptionAnalyzer::ExceptionInfo ExceptionAnalyzer::throwsException(
       ExceptionInfo Excs = throwsException(Func, CallStack);
       Results.merge(Excs);
     }
+  } else if (const auto *Construct = dyn_cast(St)) {
+    ExceptionInfo Excs =
+        throwsException(Construct->getConstructor(), CallStack);
+    Results.merge(Excs);
+  } else if (const auto *DefaultInit = dyn_cast(St)) {
+    ExceptionInfo Excs =
+        throwsException(DefaultInit->getExpr(), Caught, CallStack);
+    Results.merge(Excs);
   } else {
     for (const Stmt *Child : St->children()) {
       ExceptionInfo Excs = throwsException(Child, Caught, CallStack);
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp
index ebb44f84f67cc..769064d74adc5 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp
@@ -288,6 +288,49 @@ int indirectly_recursive(int n) noexcept {
   return recursion_helper(n);
 }
 
+struct super_throws {
+  super_throws() noexcept(false) { throw 42; }
+};
+
+struct sub_throws : super_throws {
+  sub_throws() noexcept : super_throws() {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'sub_throws' which should not throw exceptions
+};
+
+struct super_throws_again {
+  super_throws_again() throw(int);
+};
+
+struct sub_throws_again : super_throws_again {
+  sub_throws_again() noexcept : super_throws_again() {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'sub_throws_again' which should not throw exceptions
+};
+
+struct init_member_throws {
+  super_throws s;
+
+  init_member_throws() noexcept : s() {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'init_member_throws' which should not throw exceptions
+};
+
+struct implicit_init_member_throws {
+  super_throws s;
+
+  implicit_init_member_throws() noexcept {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'implicit_init_member_throws' which should not throw exceptions
+};
+
+struct init {
+  explicit init(int, int) noexcept(false) { throw 42; }
+};
+
+struct in_class_init_throws {
+  init i{1, 2};
+
+  in_class_init_throws() noexcept {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'in_class_init_throws' which should not throw exceptions
+};
+
 int main() {
   // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: an exception may be thrown in function 'main' which should not throw exceptions
   throw 1;

From 7bf9065ad7d9f6c908ab109fa85bbc36ae6a1130 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert 
Date: Thu, 20 Jan 2022 18:04:32 -0600
Subject: [PATCH 108/946] [Attributor][NFC] Clang format

---
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1a7763cbf49c0..71caa027498b7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -6007,9 +6007,10 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
 
   Optional getSize(Attributor &A, const AbstractAttribute &AA,
                           AllocationInfo &AI) {
-    auto Mapper = [&](const Value *V) -> const Value* {
+    auto Mapper = [&](const Value *V) -> const Value * {
       bool UsedAssumedInformation = false;
-      if (Optional SimpleV = A.getAssumedConstant(*V, AA, UsedAssumedInformation))
+      if (Optional SimpleV =
+              A.getAssumedConstant(*V, AA, UsedAssumedInformation))
         if (*SimpleV)
           return *SimpleV;
       return V;
@@ -6266,8 +6267,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
       if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
         LLVM_DEBUG({
           if (!Size.hasValue())
-            dbgs() << "[H2S] Unknown allocation size: " << *AI.CB
-                   << "\n";
+            dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n";
           else
             dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
                    << MaxHeapToStackSize << "\n";

From 37e0c58559ad367f16960fd65c8efb7bb1a6414f Mon Sep 17 00:00:00 2001
From: Johannes Doerfert 
Date: Thu, 20 Jan 2022 17:52:41 -0600
Subject: [PATCH 109/946] [Attributor][FIX] AAValueConstantRange should not
 loop unconstrained

The old method to avoid unconstrained expansion of the constant range in
a loop did not work as soon as there were multiple instructions in
between the phi and its input. We now take a generic approach and limit
the number of updates as a fallback. The old method is kept as it
catches "the common case" early.
---
 .../Transforms/IPO/AttributorAttributes.cpp   |  19 +-
 llvm/test/Transforms/Attributor/range.ll      | 433 +++++++-----------
 2 files changed, 195 insertions(+), 257 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 71caa027498b7..0723402c19ee0 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -8497,13 +8497,30 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
                                                   /* UseValueSimplify */ false))
       return indicatePessimisticFixpoint();
 
-    return clampStateAndIndicateChange(getState(), T);
+    // Ensure that long def-use chains can't cause circular reasoning either by
+    // introducing a cutoff below.
+    if (clampStateAndIndicateChange(getState(), T) == ChangeStatus::UNCHANGED)
+      return ChangeStatus::UNCHANGED;
+    if (++NumChanges > MaxNumChanges) {
+      LLVM_DEBUG(dbgs() << "[AAValueConstantRange] performed " << NumChanges
+                        << " but only " << MaxNumChanges
+                        << " are allowed to avoid cyclic reasoning.");
+      return indicatePessimisticFixpoint();
+    }
+    return ChangeStatus::CHANGED;
   }
 
   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override {
     STATS_DECLTRACK_FLOATING_ATTR(value_range)
   }
+
+  /// Tracker to bail after too many widening steps of the constant range.
+  int NumChanges = 0;
+
+  /// Upper bound for the number of allowed changes (=widening steps) for the
+  /// constant range before we give up.
+  static constexpr int MaxNumChanges = 5;
 };
 
 struct AAValueConstantRangeFunction : AAValueConstantRangeImpl {
diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll
index 58b888a767d97..8c120acbb8900 100644
--- a/llvm/test/Transforms/Attributor/range.ll
+++ b/llvm/test/Transforms/Attributor/range.ll
@@ -24,29 +24,17 @@ define i32 @test0(i32* %p) {
 }
 
 define i32 @test0-range-check(i32* %p) {
-; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_OPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
-; IS__TUNIT_OPM-NEXT:    ret i32 [[A]]
-;
-; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_NPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR2:[0-9]+]], !range [[RNG0]]
-; IS__TUNIT_NPM-NEXT:    ret i32 [[A]]
-;
-; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
-; IS__CGSCC_OPM-NEXT:    ret i32 [[A]]
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test0-range-check
+; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
+; IS__TUNIT____-NEXT:    ret i32 [[A]]
 ;
-; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR2:[0-9]+]], !range [[RNG0]]
-; IS__CGSCC_NPM-NEXT:    ret i32 [[A]]
+; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test0-range-check
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
+; IS__CGSCC____-NEXT:    ret i32 [[A]]
 ;
   %a = tail call i32 @test0(i32* %p)
   ret i32 %a
@@ -66,193 +54,99 @@ define void @use3(i1, i1, i1) {
 ; TEST0 icmp test
 define void @test0-icmp-check(i32* %p){
   ; ret = [0, 10)
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) {
-; IS__TUNIT_OPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG0]]
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__TUNIT_OPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__TUNIT_OPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    ret void
-;
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) {
-; IS__TUNIT_NPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR2]], !range [[RNG0]]
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__TUNIT_NPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__TUNIT_NPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) {
-; IS__CGSCC_OPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG0]]
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__CGSCC_OPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__CGSCC_OPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) {
-; IS__CGSCC_NPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR2]], !range [[RNG0]]
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__CGSCC_NPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__CGSCC_NPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    ret void
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test0-icmp-check
+; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) {
+; IS__TUNIT____-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG0]]
+; IS__TUNIT____-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
+; IS__TUNIT____-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
+; IS__TUNIT____-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
+; IS__TUNIT____-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
+; IS__TUNIT____-NEXT:    ret void
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test0-icmp-check
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) {
+; IS__CGSCC____-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG0]]
+; IS__CGSCC____-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
+; IS__CGSCC____-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
+; IS__CGSCC____-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
+; IS__CGSCC____-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
+; IS__CGSCC____-NEXT:    ret void
 ;
   %ret = tail call i32 @test0(i32 *%p)
 
@@ -363,33 +257,19 @@ define i32 @test1(i32* %p) {
 
 define i1 @test1-check(i32* %p) {
 ;
-; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test1-check
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_OPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
-; IS__TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__TUNIT_OPM-NEXT:    ret i1 [[CMP]]
-;
-; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test1-check
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_NPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR2]], !range [[RNG2:![0-9]+]]
-; IS__TUNIT_NPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__TUNIT_NPM-NEXT:    ret i1 [[CMP]]
-;
-; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test1-check
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_OPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
-; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__CGSCC_OPM-NEXT:    ret i1 [[CMP]]
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test1-check
+; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
+; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
+; IS__TUNIT____-NEXT:    ret i1 [[CMP]]
 ;
-; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test1-check
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR2]], !range [[RNG2:![0-9]+]]
-; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__CGSCC_NPM-NEXT:    ret i1 [[CMP]]
+; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test1-check
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
+; IS__CGSCC____-NEXT:    ret i1 [[CMP]]
 ;
   %res = tail call i32 @test1(i32* %p)
   %cmp = icmp eq i32 %res, 500
@@ -698,7 +578,7 @@ define dso_local i32 @test4-g1(i32 %u) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4-g1
 ; IS__CGSCC_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR3:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR4:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[CALL]]
 ;
 ; FIXME: %call should have range [0, inf]
@@ -790,7 +670,7 @@ define dso_local i32 @test4-g2(i32 %u) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4-g2
 ; IS__CGSCC_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR3]], !range [[RNG3:![0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR4]], !range [[RNG3:![0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[CALL]]
 ;
 entry:
@@ -1161,10 +1041,10 @@ define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dp
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fcmp_caller
 ; IS__CGSCC_NPM-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR3]]
-; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR3]]
-; IS__CGSCC_NPM-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR3]]
-; IS__CGSCC_NPM-NEXT:    [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR3]]
+; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT:    [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR4]]
 ; IS__CGSCC_NPM-NEXT:    [[O1:%.*]] = or i1 [[R1]], [[R2]]
 ; IS__CGSCC_NPM-NEXT:    [[O2:%.*]] = or i1 [[R3]], [[R4]]
 ; IS__CGSCC_NPM-NEXT:    [[O3:%.*]] = or i1 [[O1]], [[O2]]
@@ -1331,8 +1211,8 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) {
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_range_2
 ; IS__CGSCC_NPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR3]], !range [[RNG5:![0-9]+]]
-; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR3]], !range [[RNG5]]
+; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR4]], !range [[RNG5:![0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR4]], !range [[RNG5]]
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = add i32 [[R1]], [[R2]]
 ; IS__CGSCC_NPM-NEXT:    [[I1:%.*]] = icmp sle i32 [[A]], 3
 ; IS__CGSCC_NPM-NEXT:    ret i1 [[I1]]
@@ -1489,10 +1369,10 @@ define i32 @simplify_callsite_argument(i1 %d) {
 ; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
 ; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC_NPM:       t:
-; IS__CGSCC_NPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef true) #[[ATTR3]], !range [[RNG4]]
+; IS__CGSCC_NPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef true) #[[ATTR4]], !range [[RNG4]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[RET1]]
 ; IS__CGSCC_NPM:       f:
-; IS__CGSCC_NPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR3]], !range [[RNG4]]
+; IS__CGSCC_NPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR4]], !range [[RNG4]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[RET2]]
 ;
   %c = select i1 %d, i1 true, i1 false
@@ -1898,6 +1778,50 @@ bb3:                                              ; preds = %bb2, %bb1
   ret void
 }
 
+define i1 @loop_1(i32 %N) {
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
+; IS__TUNIT____-LABEL: define {{[^@]+}}@loop_1
+; IS__TUNIT____-SAME: (i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__TUNIT____-NEXT:  entry:
+; IS__TUNIT____-NEXT:    br label [[HEADER:%.*]]
+; IS__TUNIT____:       header:
+; IS__TUNIT____-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[AND:%.*]], [[HEADER]] ]
+; IS__TUNIT____-NEXT:    [[INC:%.*]] = add i32 [[I]], 1
+; IS__TUNIT____-NEXT:    [[AND]] = and i32 [[INC]], 9999
+; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], [[AND]]
+; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[HEADER]], label [[EXIT:%.*]]
+; IS__TUNIT____:       exit:
+; IS__TUNIT____-NEXT:    [[R:%.*]] = icmp sle i32 [[I]], 5
+; IS__TUNIT____-NEXT:    ret i1 [[R]]
+;
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__CGSCC____-LABEL: define {{[^@]+}}@loop_1
+; IS__CGSCC____-SAME: (i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    br label [[HEADER:%.*]]
+; IS__CGSCC____:       header:
+; IS__CGSCC____-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[AND:%.*]], [[HEADER]] ]
+; IS__CGSCC____-NEXT:    [[INC:%.*]] = add i32 [[I]], 1
+; IS__CGSCC____-NEXT:    [[AND]] = and i32 [[INC]], 9999
+; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], [[AND]]
+; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[HEADER]], label [[EXIT:%.*]]
+; IS__CGSCC____:       exit:
+; IS__CGSCC____-NEXT:    [[R:%.*]] = icmp sle i32 [[I]], 5
+; IS__CGSCC____-NEXT:    ret i1 [[R]]
+;
+entry:
+  br label %header
+header:
+  %i = phi i32 [0, %entry], [%and, %header]
+  %inc = add i32 %i, 1
+  %and = and i32 %inc, 9999
+  %cmp = icmp ne i32 %N, %and
+  br i1 %cmp, label %header, label %exit
+exit:
+  %r = icmp sle i32 %i, 5
+  ret i1 %r
+}
+
 declare void @ham(i32)
 
 declare void @barney(i32 signext, i32 signext)
@@ -1906,14 +1830,10 @@ declare void @barney(i32 signext, i32 signext)
 !0 = !{i32 0, i32 10}
 !1 = !{i32 10, i32 100}
 ;.
-; IS__TUNIT_OPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind readnone }
-; IS__TUNIT_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn }
-;.
-; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR2]] = { nofree nosync nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR2]] = { nofree nosync nounwind readnone }
+; IS__TUNIT____: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn }
 ;.
 ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
@@ -1924,8 +1844,9 @@ declare void @barney(i32 signext, i32 signext)
 ;.
 ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR2]] = { readonly willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR3]] = { readnone willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone }
+; IS__CGSCC_NPM: attributes #[[ATTR3]] = { readonly willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR4]] = { readnone willreturn }
 ;.
 ; IS__TUNIT_OPM: [[RNG0]] = !{i32 0, i32 10}
 ; IS__TUNIT_OPM: [[RNG1]] = !{i32 10, i32 100}
@@ -1935,7 +1856,7 @@ declare void @barney(i32 signext, i32 signext)
 ;.
 ; IS________NPM: [[RNG0]] = !{i32 0, i32 10}
 ; IS________NPM: [[RNG1]] = !{i32 10, i32 100}
-; IS________NPM: [[META2:![0-9]+]] = !{i32 200, i32 1091}
+; IS________NPM: [[RNG2]] = !{i32 200, i32 1091}
 ; IS________NPM: [[META3:![0-9]+]] = !{i32 1, i32 -2147483648}
 ; IS________NPM: [[RNG4]] = !{i32 0, i32 2}
 ; IS________NPM: [[META5:![0-9]+]] = !{i32 1, i32 3}

From 1d7604fdcebda9cc431b7f19c2f4cb769efdca02 Mon Sep 17 00:00:00 2001
From: Pawe Bylica 
Date: Fri, 21 Jan 2022 00:56:38 +0100
Subject: [PATCH 110/946] [InstCombine] Simplify bswap -> shift

Simplify bswap(x) to shl(x) or lshr(x) if x has exactly one
"active byte", i.e. all active bits are contained in boundaries
of a single byte of x.

https://alive2.llvm.org/ce/z/nvbbU5
https://alive2.llvm.org/ce/z/KiiL3J

Reviewed By: spatel, craig.topper, lebedev.ri

Differential Revision: https://reviews.llvm.org/D117680
---
 .../InstCombine/InstCombineCalls.cpp          | 15 ++++++
 .../test/Transforms/InstCombine/bswap-fold.ll | 54 +++++++++----------
 2 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ad97e7265404c..e3a9e806abdba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1215,6 +1215,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
 
+    KnownBits Known = computeKnownBits(IIOperand, 0, II);
+    uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
+    uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
+
+    // bswap(x) -> shift(x) if x has exactly one "active byte"
+    if (Known.getBitWidth() - LZ - TZ == 8) {
+      assert(LZ != TZ && "active byte cannot be in the middle");
+      if (LZ > TZ)  // -> shl(x) if the "active byte" is in the low part of x
+        return BinaryOperator::CreateNUWShl(
+            IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
+      // -> lshr(x) if the "active byte" is in the high part of x
+      return BinaryOperator::CreateExactLShr(
+            IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
+    }
+
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
     if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
       unsigned C = X->getType()->getScalarSizeInBits() -
diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index 9ffae93bbbc7b..47083b815e9fe 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -358,9 +358,8 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
 
 define i64 @bs_active_high8(i64 %0) {
 ; CHECK-LABEL: @bs_active_high8(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %2 = shl i64 %0, 56
   %3 = call i64 @llvm.bswap.i64(i64 %2)
@@ -369,8 +368,8 @@ define i64 @bs_active_high8(i64 %0) {
 
 define i32 @bs_active_high7(i32 %0) {
 ; CHECK-LABEL: @bs_active_high7(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 254
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, -33554432  ; 0xfe000000
@@ -380,8 +379,8 @@ define i32 @bs_active_high7(i32 %0) {
 
 define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high4(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], 
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], 
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], 
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, 
@@ -392,7 +391,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high_different(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], 
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], 
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, 
@@ -427,7 +426,7 @@ define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
 define i64 @bs_active_high8_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_high8_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP0]], 255
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -440,7 +439,7 @@ define i64 @bs_active_high8_multiuse(i64 %0) {
 define i64 @bs_active_high7_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_high7_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -452,8 +451,8 @@ define i64 @bs_active_high7_multiuse(i64 %0) {
 
 define i64 @bs_active_byte_6h(i64 %0) {
 ; CHECK-LABEL: @bs_active_byte_6h(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 16711680
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = and i64 %0, 280375465082880  ; 0xff00'00000000
@@ -463,8 +462,8 @@ define i64 @bs_active_byte_6h(i64 %0) {
 
 define i32 @bs_active_byte_3h(i32 %0) {
 ; CHECK-LABEL: @bs_active_byte_3h(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1536
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, 393216  ; 0x0006'0000
@@ -475,7 +474,7 @@ define i32 @bs_active_byte_3h(i32 %0) {
 define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_byte_3h_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], 
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0,   ; 0x0080'0000, 0x0001'0000
@@ -498,8 +497,8 @@ define i64 @bs_active_byte_78h(i64 %0) {
 
 define i16 @bs_active_low1(i16 %0) {
 ; CHECK-LABEL: @bs_active_low1(
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
-; CHECK-NEXT:    [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7
+; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 256
 ; CHECK-NEXT:    ret i16 [[TMP3]]
 ;
   %2 = lshr i16 %0, 15
@@ -509,9 +508,8 @@ define i16 @bs_active_low1(i16 %0) {
 
 define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low8(
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
-; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], 
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %2 = and <2 x i32> %0, 
   %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
@@ -521,7 +519,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low_different(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], 
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], 
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0, 
@@ -556,7 +554,7 @@ define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
 define i64 @bs_active_low8_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_low8_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -569,7 +567,7 @@ define i64 @bs_active_low8_multiuse(i64 %0) {
 define i64 @bs_active_low7_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_low7_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -581,8 +579,8 @@ define i64 @bs_active_low7_multiuse(i64 %0) {
 
 define i64 @bs_active_byte_4l(i64 %0) {
 ; CHECK-LABEL: @bs_active_byte_4l(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = and i64 %0, 1140850688  ; 0x44000000
@@ -592,8 +590,8 @@ define i64 @bs_active_byte_4l(i64 %0) {
 
 define i32 @bs_active_byte_2l(i32 %0) {
 ; CHECK-LABEL: @bs_active_byte_2l(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 16711680
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, 65280  ; 0xff00
@@ -604,7 +602,7 @@ define i32 @bs_active_byte_2l(i32 %0) {
 define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_byte_2l_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], 
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], 
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = and <2 x i64> %0,   ; 0x0100, 0xff00

From cfae2c65dbbe1a252958b4db2e32574e8e8dcec0 Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Thu, 20 Jan 2022 15:24:06 -0800
Subject: [PATCH 111/946] [RISCV] Factor Zve32 support into
 RISCVSubtarget::getMaxELENForFixedLengthVectors.

This is needed to properly limit fractional LMULs for Zve32.

Add new RUN Zve32 RUN lines to the existing tests for the
-riscv-v-fixed-length-vector-elen-max command line option.
---
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp          | 3 ++-
 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 96f119ed3bc15..de6b0df2df8a8 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -199,8 +199,9 @@ unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const {
   assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 &&
          isPowerOf2_32(RVVVectorELENMax) &&
          "V extension requires a ELEN to be a power of 2 between 8 and 64!");
+  unsigned ELEN = hasVInstructionsI64() ? 64 : 32;
   return PowerOf2Floor(
-      std::max(std::min(RVVVectorELENMax, 64), 8));
+      std::max(std::min(RVVVectorELENMax, ELEN), 8));
 }
 
 bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
index 9e6da01784589..cbccf73c32ed3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
@@ -1,9 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
-; Test that limiting ELEN, scalarizes elements larger than that and disables
-; some fractional LMULs.
+; Test that limiting ELEN, either through the command line or zve32, scalarizes
+; elements larger than that and disables some fractional LMULs.
 
 ; This should use LMUL=1.
 define void @add_v4i32(<4 x i32>* %x, <4 x i32>* %y) {

From 922c29ccf143aa6c42d5887eef74d23e4ee83cb2 Mon Sep 17 00:00:00 2001
From: Peter Klausler 
Date: Wed, 12 Jan 2022 15:10:20 -0800
Subject: [PATCH 112/946] [flang] Allow explicit '+' in NAMELIST input
 subscripts

Array subscripts and substring limits in NAMELIST input are
allowed to bear an explicit plus sign.

Differential Revision: https://reviews.llvm.org/D117818
---
 flang/runtime/namelist.cpp           | 2 +-
 flang/unittests/Runtime/Namelist.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp
index 2804679db01e2..205212ccfb66e 100644
--- a/flang/runtime/namelist.cpp
+++ b/flang/runtime/namelist.cpp
@@ -99,7 +99,7 @@ static std::optional GetSubscriptValue(IoStatementState &io) {
   std::optional value;
   std::optional ch{io.GetCurrentChar()};
   bool negate{ch && *ch == '-'};
-  if (negate) {
+  if ((ch && *ch == '+') || negate) {
     io.HandleRelativePosition(1);
     ch = io.GetCurrentChar();
   }
diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp
index 4770e26048de9..f4f5a30e101eb 100644
--- a/flang/unittests/Runtime/Namelist.cpp
+++ b/flang/unittests/Runtime/Namelist.cpp
@@ -135,7 +135,7 @@ TEST(NamelistTests, Subscripts) {
   aDesc->GetDimension(1).SetBounds(-1, 1);
   const NamelistGroup::Item items[]{{"a", *aDesc}};
   const NamelistGroup group{"justa", 1, items};
-  static char t1[]{"&justa A(0,1:-1:-2)=1 2/"};
+  static char t1[]{"&justa A(0,+1:-1:-2)=1 2/"};
   StaticDescriptor<1, true> statDesc;
   Descriptor &internalDesc{statDesc.descriptor()};
   internalDesc.Establish(TypeCode{CFI_type_char},
@@ -223,7 +223,7 @@ TEST(NamelistTypes, ArraySubstring) {
           std::vector{"abcdefgh", "ijklmnop"}, 8)};
   const NamelistGroup::Item items[]{{"a", *scDesc}};
   const NamelistGroup group{"justa", 1, items};
-  static char t1[]{"&justa A(:)(2:5)='BCDE' 'JKLM'/"};
+  static char t1[]{"&justa A(:)(2:+5)='BCDE' 'JKLM'/"};
   StaticDescriptor<1, true> statDesc;
   Descriptor &internalDesc{statDesc.descriptor()};
   internalDesc.Establish(TypeCode{CFI_type_char},

From d1123e36922d18e2b93b01e85ef706bc8f819fb5 Mon Sep 17 00:00:00 2001
From: Peter Klausler 
Date: Wed, 12 Jan 2022 17:34:52 -0800
Subject: [PATCH 113/946] [flang] Extension: skip over NAMELIST groups

Implements a near-universal extension in which NAMELIST
input will skip over unrelated namelist groups in the
input stream until the group with the requested name appears.

Differential Revision: https://reviews.llvm.org/D117843
---
 flang/docs/Extensions.md             |  3 ++
 flang/runtime/namelist.cpp           | 66 +++++++++++++++++++++-------
 flang/unittests/Runtime/Namelist.cpp | 32 +++++++++++++-
 3 files changed, 82 insertions(+), 19 deletions(-)

diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index aa60800246ba3..270ec2dfd93c7 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -212,6 +212,9 @@ end
   This legacy extension supports pre-Fortran'77 usage in which
   variables initialized in DATA statements with Hollerith literals
   as modifiable formats.
+* At runtime, `NAMELIST` input will skip over `NAMELIST` groups
+  with other names, and will treat text before and between groups
+  as if they were comment lines, even if not begun with `!`.
 
 ### Extensions supported when enabled by options
 
diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp
index 205212ccfb66e..fde828fddf443 100644
--- a/flang/runtime/namelist.cpp
+++ b/flang/runtime/namelist.cpp
@@ -322,6 +322,29 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc,
   return false;
 }
 
+// Advance to the terminal '/' of a namelist group.
+static void SkipNamelistGroup(IoStatementState &io) {
+  while (auto ch{io.GetNextNonBlank()}) {
+    io.HandleRelativePosition(1);
+    if (*ch == '/') {
+      break;
+    } else if (*ch == '\'' || *ch == '"') {
+      // Skip quoted character literal
+      char32_t quote{*ch};
+      while (true) {
+        if ((ch = io.GetCurrentChar())) {
+          io.HandleRelativePosition(1);
+          if (*ch == quote) {
+            break;
+          }
+        } else if (!io.AdvanceRecord()) {
+          return;
+        }
+      }
+    }
+  }
+}
+
 bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
   IoStatementState &io{*cookie};
   io.CheckFormattedStmtType("InputNamelist");
@@ -330,26 +353,35 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
   IoErrorHandler &handler{io.GetIoErrorHandler()};
   auto *listInput{io.get_if>()};
   RUNTIME_CHECK(handler, listInput != nullptr);
-  // Check the group header
+  // Find this namelist group's header in the input
   io.BeginReadingRecord();
-  std::optional next{io.GetNextNonBlank()};
-  if (!next || *next != '&') {
-    handler.SignalError(
-        "NAMELIST input group does not begin with '&' (at '%lc')", *next);
-    return false;
-  }
-  io.HandleRelativePosition(1);
+  std::optional next;
   char name[nameBufferSize];
-  if (!GetLowerCaseName(io, name, sizeof name)) {
-    handler.SignalError("NAMELIST input group has no name");
-    return false;
-  }
   RUNTIME_CHECK(handler, group.groupName != nullptr);
-  if (std::strcmp(group.groupName, name) != 0) {
-    handler.SignalError(
-        "NAMELIST input group name '%s' is not the expected '%s'", name,
-        group.groupName);
-    return false;
+  while (true) {
+    next = io.GetNextNonBlank();
+    while (next && *next != '&') {
+      // Extension: comment lines without ! before namelist groups
+      if (!io.AdvanceRecord()) {
+        next.reset();
+      } else {
+        next = io.GetNextNonBlank();
+      }
+    }
+    if (!next || *next != '&') {
+      handler.SignalError(
+          "NAMELIST input group does not begin with '&' (at '%lc')", *next);
+      return false;
+    }
+    io.HandleRelativePosition(1);
+    if (!GetLowerCaseName(io, name, sizeof name)) {
+      handler.SignalError("NAMELIST input group has no name");
+      return false;
+    }
+    if (std::strcmp(group.groupName, name) == 0) {
+      break; // found it
+    }
+    SkipNamelistGroup(io);
   }
   // Read the group's items
   while (true) {
diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp
index f4f5a30e101eb..38305f729b145 100644
--- a/flang/unittests/Runtime/Namelist.cpp
+++ b/flang/unittests/Runtime/Namelist.cpp
@@ -189,7 +189,7 @@ TEST(NamelistTests, ShortArrayInput) {
   EXPECT_EQ(*bDesc->ZeroBasedIndexedElement(1), -2);
 }
 
-TEST(NamelistTypes, ScalarSubstring) {
+TEST(NamelistTests, ScalarSubstring) {
   OwningPtr scDesc{MakeArray(
       std::vector{}, std::vector{"abcdefgh"}, 8)};
   const NamelistGroup::Item items[]{{"a", *scDesc}};
@@ -217,7 +217,7 @@ TEST(NamelistTypes, ScalarSubstring) {
   EXPECT_EQ(got, expect);
 }
 
-TEST(NamelistTypes, ArraySubstring) {
+TEST(NamelistTests, ArraySubstring) {
   OwningPtr scDesc{
       MakeArray(std::vector{2},
           std::vector{"abcdefgh", "ijklmnop"}, 8)};
@@ -246,4 +246,32 @@ TEST(NamelistTypes, ArraySubstring) {
   EXPECT_EQ(got, expect);
 }
 
+TEST(NamelistTests, Skip) {
+  OwningPtr scDesc{
+      MakeArray(sizeof(int))>(
+          std::vector{}, std::vector{-1})};
+  const NamelistGroup::Item items[]{{"j", *scDesc}};
+  const NamelistGroup group{"nml", 1, items};
+  static char t1[]{"&skip a='str''ing'/&nml j=123/"};
+  StaticDescriptor<1, true> statDesc;
+  Descriptor &internalDesc{statDesc.descriptor()};
+  internalDesc.Establish(TypeCode{CFI_type_char},
+      /*elementBytes=*/std::strlen(t1), t1, 0, nullptr, CFI_attribute_pointer);
+  auto inCookie{IONAME(BeginInternalArrayListInput)(
+      internalDesc, nullptr, 0, __FILE__, __LINE__)};
+  ASSERT_TRUE(IONAME(InputNamelist)(inCookie, group));
+  ASSERT_EQ(IONAME(EndIoStatement)(inCookie), IostatOk)
+      << "namelist input with skipping";
+  char out[20];
+  internalDesc.Establish(TypeCode{CFI_type_char}, /*elementBytes=*/sizeof out,
+      out, 0, nullptr, CFI_attribute_pointer);
+  auto outCookie{IONAME(BeginInternalArrayListOutput)(
+      internalDesc, nullptr, 0, __FILE__, __LINE__)};
+  ASSERT_TRUE(IONAME(OutputNamelist)(outCookie, group));
+  ASSERT_EQ(IONAME(EndIoStatement)(outCookie), IostatOk) << "namelist output";
+  std::string got{out, sizeof out};
+  static const std::string expect{"&NML J= 123/        "};
+  EXPECT_EQ(got, expect);
+}
+
 // TODO: Internal NAMELIST error tests

From ad06e65dc4da45be2cda3e07664502651679622a Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Wed, 19 Jan 2022 11:32:27 +0000
Subject: [PATCH 114/946] [RISCV] Fix the bug in the register allocator caused
 by reserved BP.

Originally, hasRVVFrameObject() will scan all the stack objects to check
whether if there is any scalable vector object on the stack or not.
However, it causes errors in the register allocator. In issue 53016, it
returns false before RA because there is no RVV stack objects. After RA,
it returns true because there are spilling slots for RVV values during RA.
The compiler will not reserve BP during register allocation and generate BP
access in the PEI pass due to the inconsistent behavior of the function.

The function is changed to use hasStdExtV() as the return value. It is
not precise, but it can make the register allocation correct.

Refer to https://github.com/llvm/llvm-project/issues/53016.

Differential Revision: https://reviews.llvm.org/D117663
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp  | 24 +++--
 .../test/CodeGen/RISCV/rvv/emergency-slot.mir | 93 ++++++++-----------
 .../RISCV/rvv/fixed-vectors-calling-conv.ll   | 24 +++--
 .../CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll | 35 +++----
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 52 +++++------
 5 files changed, 119 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f5d4919380504..ad003404d793e 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -242,7 +242,8 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
   // adjustment, we can not use SP to access the stack objects for the
   // arguments. Instead, use BP to access these stack objects.
   return (MFI.hasVarSizedObjects() ||
-          (!hasReservedCallFrame(MF) && MFI.getMaxCallFrameSize() != 0)) &&
+          (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() ||
+                                         MFI.getMaxCallFrameSize() != 0))) &&
          TRI->hasStackRealignment(MF);
 }
 
@@ -940,11 +941,22 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
 }
 
 static bool hasRVVFrameObject(const MachineFunction &MF) {
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
-    if (MFI.getStackID(I) == TargetStackID::ScalableVector)
-      return true;
-  return false;
+  // Originally, the function will scan all the stack objects to check whether
+  // if there is any scalable vector object on the stack or not. However, it
+  // causes errors in the register allocator. In issue 53016, it returns false
+  // before RA because there is no RVV stack objects. After RA, it returns true
+  // because there are spilling slots for RVV values during RA. It will not
+  // reserve BP during register allocation and generate BP access in the PEI
+  // pass due to the inconsistent behavior of the function.
+  //
+  // The function is changed to use hasVInstructions() as the return value. It
+  // is not precise, but it can make the register allocation correct.
+  //
+  // FIXME: Find a better way to make the decision or revisit the solution in
+  // D103622.
+  //
+  // Refer to https://github.com/llvm/llvm-project/issues/53016.
+  return MF.getSubtarget().hasVInstructions();
 }
 
 // Not preserve stack space within prologue for outgoing variables when the
diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
index 4b62b8ead3b55..8b33e981854d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
@@ -51,36 +51,34 @@ body:             |
   ; CHECK-LABEL: name: spillslot
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK-NEXT:   liveins: $x12, $x1, $x9, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $x12, $x1, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x2 = frame-setup ADDI $x2, -2032
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 2032
   ; CHECK-NEXT:   SD killed $x1, $x2, 2024 :: (store (s64) into %stack.3)
   ; CHECK-NEXT:   SD killed $x8, $x2, 2016 :: (store (s64) into %stack.4)
-  ; CHECK-NEXT:   SD killed $x9, $x2, 2008 :: (store (s64) into %stack.5)
-  ; CHECK-NEXT:   SD killed $x18, $x2, 2000 :: (store (s64) into %stack.6)
-  ; CHECK-NEXT:   SD killed $x19, $x2, 1992 :: (store (s64) into %stack.7)
-  ; CHECK-NEXT:   SD killed $x20, $x2, 1984 :: (store (s64) into %stack.8)
-  ; CHECK-NEXT:   SD killed $x21, $x2, 1976 :: (store (s64) into %stack.9)
-  ; CHECK-NEXT:   SD killed $x22, $x2, 1968 :: (store (s64) into %stack.10)
-  ; CHECK-NEXT:   SD killed $x23, $x2, 1960 :: (store (s64) into %stack.11)
-  ; CHECK-NEXT:   SD killed $x24, $x2, 1952 :: (store (s64) into %stack.12)
-  ; CHECK-NEXT:   SD killed $x25, $x2, 1944 :: (store (s64) into %stack.13)
-  ; CHECK-NEXT:   SD killed $x26, $x2, 1936 :: (store (s64) into %stack.14)
-  ; CHECK-NEXT:   SD killed $x27, $x2, 1928 :: (store (s64) into %stack.15)
+  ; CHECK-NEXT:   SD killed $x18, $x2, 2008 :: (store (s64) into %stack.5)
+  ; CHECK-NEXT:   SD killed $x19, $x2, 2000 :: (store (s64) into %stack.6)
+  ; CHECK-NEXT:   SD killed $x20, $x2, 1992 :: (store (s64) into %stack.7)
+  ; CHECK-NEXT:   SD killed $x21, $x2, 1984 :: (store (s64) into %stack.8)
+  ; CHECK-NEXT:   SD killed $x22, $x2, 1976 :: (store (s64) into %stack.9)
+  ; CHECK-NEXT:   SD killed $x23, $x2, 1968 :: (store (s64) into %stack.10)
+  ; CHECK-NEXT:   SD killed $x24, $x2, 1960 :: (store (s64) into %stack.11)
+  ; CHECK-NEXT:   SD killed $x25, $x2, 1952 :: (store (s64) into %stack.12)
+  ; CHECK-NEXT:   SD killed $x26, $x2, 1944 :: (store (s64) into %stack.13)
+  ; CHECK-NEXT:   SD killed $x27, $x2, 1936 :: (store (s64) into %stack.14)
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x1, -8
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x8, -16
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x9, -24
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x18, -32
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x19, -40
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x20, -48
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x21, -56
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x22, -64
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x23, -72
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x24, -80
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x25, -88
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x26, -96
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x27, -104
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x18, -24
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x19, -32
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x20, -40
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x21, -48
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x22, -56
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x23, -64
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x24, -72
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x25, -80
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x26, -88
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x27, -96
   ; CHECK-NEXT:   $x8 = frame-setup ADDI $x2, 2032
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa $x8, 0
   ; CHECK-NEXT:   $x2 = frame-setup ADDI $x2, -272
@@ -95,7 +93,7 @@ body:             |
   ; CHECK-NEXT:   $x10 = ADDI $x0, 50
   ; CHECK-NEXT:   $x11 = MUL killed $x11, killed $x10
   ; CHECK-NEXT:   $x10 = LUI 1
-  ; CHECK-NEXT:   $x10 = ADDIW killed $x10, -1896
+  ; CHECK-NEXT:   $x10 = ADDIW killed $x10, -1888
   ; CHECK-NEXT:   $x10 = ADD $x2, killed $x10
   ; CHECK-NEXT:   $x10 = ADD killed $x10, killed $x11
   ; CHECK-NEXT:   PseudoVSPILL_M1 killed renamable $v25, killed $x10 :: (store unknown-size into %stack.1, align 8)
@@ -113,8 +111,8 @@ body:             |
   ; CHECK-NEXT:   renamable $x21 = ADDI $x2, 1664
   ; CHECK-NEXT:   renamable $x22 = ADDI $x2, 1792
   ; CHECK-NEXT:   renamable $x23 = ADDI $x2, 1920
-  ; CHECK-NEXT:   SD killed $x1, $x2, 8 :: (store (s64) into %stack.16)
-  ; CHECK-NEXT:   SD killed $x5, $x2, 0 :: (store (s64) into %stack.17)
+  ; CHECK-NEXT:   SD killed $x1, $x2, 8 :: (store (s64) into %stack.15)
+  ; CHECK-NEXT:   SD killed $x5, $x2, 0 :: (store (s64) into %stack.16)
   ; CHECK-NEXT:   $x11 = LUI 1
   ; CHECK-NEXT:   $x11 = ADDIW killed $x11, -2048
   ; CHECK-NEXT:   $x24 = ADD $x2, killed $x11
@@ -130,23 +128,19 @@ body:             |
   ; CHECK-NEXT:   renamable $x13 = SLLI renamable $x11, 3
   ; CHECK-NEXT:   renamable $x13 = ADD renamable $x26, killed renamable $x13
   ; CHECK-NEXT:   renamable $x13 = LD killed renamable $x13, 0 :: (load (s64))
-  ; CHECK-NEXT:   renamable $x9 = SRAI renamable $x13, 63
-  ; CHECK-NEXT:   renamable $x9 = SRLI killed renamable $x9, 62
-  ; CHECK-NEXT:   renamable $x9 = ADD renamable $x13, killed renamable $x9
-  ; CHECK-NEXT:   renamable $x9 = ANDI killed renamable $x9, -4
-  ; CHECK-NEXT:   renamable $x16 = SUB killed renamable $x13, renamable $x9
+  ; CHECK-NEXT:   renamable $x16 = SUB killed renamable $x13, renamable $x13
   ; CHECK-NEXT:   dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT:   renamable $x13 = nsw ADDI renamable $x16, -2
   ; CHECK-NEXT:   $x5 = PseudoReadVLENB
   ; CHECK-NEXT:   $x1 = ADDI $x0, 50
   ; CHECK-NEXT:   $x5 = MUL killed $x5, killed $x1
   ; CHECK-NEXT:   $x1 = LUI 1
-  ; CHECK-NEXT:   $x1 = ADDIW killed $x1, -1896
+  ; CHECK-NEXT:   $x1 = ADDIW killed $x1, -1888
   ; CHECK-NEXT:   $x1 = ADD $x2, killed $x1
   ; CHECK-NEXT:   $x1 = ADD killed $x1, killed $x5
-  ; CHECK-NEXT:   $x5 = LD $x2, 0 :: (load (s64) from %stack.17)
+  ; CHECK-NEXT:   $x5 = LD $x2, 0 :: (load (s64) from %stack.16)
   ; CHECK-NEXT:   renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8)
-  ; CHECK-NEXT:   $x1 = LD $x2, 8 :: (load (s64) from %stack.16)
+  ; CHECK-NEXT:   $x1 = LD $x2, 8 :: (load (s64) from %stack.15)
   ; CHECK-NEXT:   renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 8, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 8, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   BLT killed renamable $x16, renamable $x27, %bb.2
@@ -155,7 +149,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT:   liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $x9 = COPY killed renamable $x13
+  ; CHECK-NEXT:   renamable $x10 = COPY killed renamable $x13
   ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -165,17 +159,16 @@ body:             |
   ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x2, 272
   ; CHECK-NEXT:   $x1 = LD $x2, 2024 :: (load (s64) from %stack.3)
   ; CHECK-NEXT:   $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
-  ; CHECK-NEXT:   $x9 = LD $x2, 2008 :: (load (s64) from %stack.5)
-  ; CHECK-NEXT:   $x18 = LD $x2, 2000 :: (load (s64) from %stack.6)
-  ; CHECK-NEXT:   $x19 = LD $x2, 1992 :: (load (s64) from %stack.7)
-  ; CHECK-NEXT:   $x20 = LD $x2, 1984 :: (load (s64) from %stack.8)
-  ; CHECK-NEXT:   $x21 = LD $x2, 1976 :: (load (s64) from %stack.9)
-  ; CHECK-NEXT:   $x22 = LD $x2, 1968 :: (load (s64) from %stack.10)
-  ; CHECK-NEXT:   $x23 = LD $x2, 1960 :: (load (s64) from %stack.11)
-  ; CHECK-NEXT:   $x24 = LD $x2, 1952 :: (load (s64) from %stack.12)
-  ; CHECK-NEXT:   $x25 = LD $x2, 1944 :: (load (s64) from %stack.13)
-  ; CHECK-NEXT:   $x26 = LD $x2, 1936 :: (load (s64) from %stack.14)
-  ; CHECK-NEXT:   $x27 = LD $x2, 1928 :: (load (s64) from %stack.15)
+  ; CHECK-NEXT:   $x18 = LD $x2, 2008 :: (load (s64) from %stack.5)
+  ; CHECK-NEXT:   $x19 = LD $x2, 2000 :: (load (s64) from %stack.6)
+  ; CHECK-NEXT:   $x20 = LD $x2, 1992 :: (load (s64) from %stack.7)
+  ; CHECK-NEXT:   $x21 = LD $x2, 1984 :: (load (s64) from %stack.8)
+  ; CHECK-NEXT:   $x22 = LD $x2, 1976 :: (load (s64) from %stack.9)
+  ; CHECK-NEXT:   $x23 = LD $x2, 1968 :: (load (s64) from %stack.10)
+  ; CHECK-NEXT:   $x24 = LD $x2, 1960 :: (load (s64) from %stack.11)
+  ; CHECK-NEXT:   $x25 = LD $x2, 1952 :: (load (s64) from %stack.12)
+  ; CHECK-NEXT:   $x26 = LD $x2, 1944 :: (load (s64) from %stack.13)
+  ; CHECK-NEXT:   $x27 = LD $x2, 1936 :: (load (s64) from %stack.14)
   ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x2, 2032
   ; CHECK-NEXT:   PseudoRET
   bb.0:
@@ -212,11 +205,7 @@ body:             |
     renamable $x13 = SLLI renamable $x11, 3
     renamable $x13 = ADD renamable $x26, killed renamable $x13
     renamable $x13 = LD killed renamable $x13, 0 :: (load (s64))
-    renamable $x9 = SRAI renamable $x13, 63
-    renamable $x9 = SRLI killed renamable $x9, 62
-    renamable $x9 = ADD renamable $x13, killed renamable $x9
-    renamable $x9 = ANDI killed renamable $x9, -4
-    renamable $x16 = SUB killed renamable $x13, renamable $x9
+    renamable $x16 = SUB killed renamable $x13, renamable $x13
     dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
     renamable $x13 = nsw ADDI renamable $x16, -2
     renamable $v0 = PseudoVRELOAD_M1 %stack.1 :: (load unknown-size from %stack.1, align 8)
@@ -228,7 +217,7 @@ body:             |
     successors: %bb.2
     liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
 
-    renamable $x9 = COPY killed renamable $x13
+    renamable $x10 = COPY killed renamable $x13
     PseudoBR %bb.2
 
   bb.2:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
index 23e41d67e0a54..13b856e00a7c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
@@ -882,11 +882,14 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX1-NEXT:    .cfi_def_cfa_offset 384
 ; LMULMAX1-NEXT:    sd ra, 376(sp) # 8-byte Folded Spill
 ; LMULMAX1-NEXT:    sd s0, 368(sp) # 8-byte Folded Spill
+; LMULMAX1-NEXT:    sd s1, 360(sp) # 8-byte Folded Spill
 ; LMULMAX1-NEXT:    .cfi_offset ra, -8
 ; LMULMAX1-NEXT:    .cfi_offset s0, -16
+; LMULMAX1-NEXT:    .cfi_offset s1, -24
 ; LMULMAX1-NEXT:    addi s0, sp, 384
 ; LMULMAX1-NEXT:    .cfi_def_cfa s0, 0
 ; LMULMAX1-NEXT:    andi sp, sp, -128
+; LMULMAX1-NEXT:    mv s1, sp
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; LMULMAX1-NEXT:    vle32.v v24, (a0)
 ; LMULMAX1-NEXT:    addi a1, a0, 16
@@ -904,25 +907,26 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX1-NEXT:    addi a0, a0, 112
 ; LMULMAX1-NEXT:    vle32.v v31, (a0)
 ; LMULMAX1-NEXT:    ld a0, 0(s0)
-; LMULMAX1-NEXT:    addi a1, sp, 240
+; LMULMAX1-NEXT:    addi sp, sp, -16
+; LMULMAX1-NEXT:    addi a1, s1, 240
 ; LMULMAX1-NEXT:    vse32.v v15, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 224
+; LMULMAX1-NEXT:    addi a1, s1, 224
 ; LMULMAX1-NEXT:    vse32.v v14, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 208
+; LMULMAX1-NEXT:    addi a1, s1, 208
 ; LMULMAX1-NEXT:    vse32.v v13, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 192
+; LMULMAX1-NEXT:    addi a1, s1, 192
 ; LMULMAX1-NEXT:    vse32.v v12, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 176
+; LMULMAX1-NEXT:    addi a1, s1, 176
 ; LMULMAX1-NEXT:    vse32.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 160
+; LMULMAX1-NEXT:    addi a1, s1, 160
 ; LMULMAX1-NEXT:    vse32.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 144
+; LMULMAX1-NEXT:    addi a1, s1, 144
 ; LMULMAX1-NEXT:    vse32.v v9, (a1)
 ; LMULMAX1-NEXT:    li a1, 42
 ; LMULMAX1-NEXT:    sd a1, 8(sp)
 ; LMULMAX1-NEXT:    sd a0, 0(sp)
-; LMULMAX1-NEXT:    addi a0, sp, 128
-; LMULMAX1-NEXT:    addi a1, sp, 128
+; LMULMAX1-NEXT:    addi a0, s1, 128
+; LMULMAX1-NEXT:    addi a1, s1, 128
 ; LMULMAX1-NEXT:    vse32.v v8, (a1)
 ; LMULMAX1-NEXT:    vmv.v.v v8, v24
 ; LMULMAX1-NEXT:    vmv.v.v v9, v25
@@ -933,9 +937,11 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX1-NEXT:    vmv.v.v v14, v30
 ; LMULMAX1-NEXT:    vmv.v.v v15, v31
 ; LMULMAX1-NEXT:    call ext3@plt
+; LMULMAX1-NEXT:    addi sp, sp, 16
 ; LMULMAX1-NEXT:    addi sp, s0, -384
 ; LMULMAX1-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT:    ld s1, 360(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    addi sp, sp, 384
 ; LMULMAX1-NEXT:    ret
   %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42)
diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
index 3eccd6c84d9c3..5f926efd9012c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
@@ -5,25 +5,27 @@
 define void @foo(i32* nocapture noundef %p1) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
-; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    addi s0, sp, 128
+; CHECK-NEXT:    .cfi_offset s2, -32
+; CHECK-NEXT:    addi s0, sp, 192
 ; CHECK-NEXT:    .cfi_def_cfa s0, 0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    andi sp, sp, -64
 ; CHECK-NEXT:    mv s1, sp
-; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi a0, s1, 104
+; CHECK-NEXT:    addi a0, s1, 160
 ; CHECK-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    addi t0, s1, 64
@@ -39,16 +41,17 @@ define void @foo(i32* nocapture noundef %p1) {
 ; CHECK-NEXT:    call bar@plt
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vle32.v v8, (s1)
-; CHECK-NEXT:    addi a0, s1, 104
+; CHECK-NEXT:    vle32.v v8, (s2)
+; CHECK-NEXT:    addi a0, s1, 160
 ; CHECK-NEXT:    vl2re8.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfadd.vv v8, v10, v8
-; CHECK-NEXT:    vse32.v v8, (s1)
-; CHECK-NEXT:    addi sp, s0, -128
-; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 128
+; CHECK-NEXT:    vse32.v v8, (s2)
+; CHECK-NEXT:    addi sp, s0, -192
+; CHECK-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 192
 ; CHECK-NEXT:    ret
 entry:
   %vla = alloca [10 x i32], align 64
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 5f52fdba85c30..6bae641408887 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -600,30 +600,30 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    addi sp, sp, -96
 ; RV32MV-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
 ; RV32MV-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s1, 84(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s2, 80(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s3, 76(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s4, 72(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s5, 68(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s2, 84(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s3, 80(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s4, 76(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s5, 72(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s6, 68(sp) # 4-byte Folded Spill
 ; RV32MV-NEXT:    addi s0, sp, 96
 ; RV32MV-NEXT:    andi sp, sp, -32
-; RV32MV-NEXT:    mv s1, a0
+; RV32MV-NEXT:    mv s2, a0
 ; RV32MV-NEXT:    lw a0, 8(a0)
-; RV32MV-NEXT:    lw a1, 4(s1)
+; RV32MV-NEXT:    lw a1, 4(s2)
 ; RV32MV-NEXT:    slli a2, a0, 31
 ; RV32MV-NEXT:    srli a3, a1, 1
-; RV32MV-NEXT:    or s2, a3, a2
-; RV32MV-NEXT:    lbu a2, 12(s1)
+; RV32MV-NEXT:    or s3, a3, a2
+; RV32MV-NEXT:    lbu a2, 12(s2)
 ; RV32MV-NEXT:    srli a3, a0, 1
 ; RV32MV-NEXT:    andi a3, a3, 1
-; RV32MV-NEXT:    neg s3, a3
+; RV32MV-NEXT:    neg s4, a3
 ; RV32MV-NEXT:    slli a3, a2, 30
 ; RV32MV-NEXT:    srli a0, a0, 2
-; RV32MV-NEXT:    or s4, a0, a3
+; RV32MV-NEXT:    or s5, a0, a3
 ; RV32MV-NEXT:    srli a0, a2, 2
 ; RV32MV-NEXT:    andi a2, a0, 1
-; RV32MV-NEXT:    lw a0, 0(s1)
-; RV32MV-NEXT:    neg s5, a2
+; RV32MV-NEXT:    lw a0, 0(s2)
+; RV32MV-NEXT:    neg s6, a2
 ; RV32MV-NEXT:    andi a1, a1, 1
 ; RV32MV-NEXT:    neg a1, a1
 ; RV32MV-NEXT:    li a2, 6
@@ -633,14 +633,14 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    sw a0, 32(sp)
 ; RV32MV-NEXT:    li a2, -5
 ; RV32MV-NEXT:    li a3, -1
-; RV32MV-NEXT:    mv a0, s4
-; RV32MV-NEXT:    mv a1, s5
+; RV32MV-NEXT:    mv a0, s5
+; RV32MV-NEXT:    mv a1, s6
 ; RV32MV-NEXT:    call __moddi3@plt
 ; RV32MV-NEXT:    sw a1, 52(sp)
 ; RV32MV-NEXT:    sw a0, 48(sp)
 ; RV32MV-NEXT:    li a2, 7
-; RV32MV-NEXT:    mv a0, s2
-; RV32MV-NEXT:    mv a1, s3
+; RV32MV-NEXT:    mv a0, s3
+; RV32MV-NEXT:    mv a1, s4
 ; RV32MV-NEXT:    li a3, 0
 ; RV32MV-NEXT:    call __moddi3@plt
 ; RV32MV-NEXT:    sw a1, 44(sp)
@@ -662,14 +662,14 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    vmv.v.i v8, 0
 ; RV32MV-NEXT:    vmerge.vim v8, v8, -1, v0
 ; RV32MV-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV32MV-NEXT:    vse32.v v8, (s1)
+; RV32MV-NEXT:    vse32.v v8, (s2)
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 1
 ; RV32MV-NEXT:    vmv.x.s a0, v10
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 2
 ; RV32MV-NEXT:    vmv.x.s a1, v10
 ; RV32MV-NEXT:    slli a2, a1, 1
 ; RV32MV-NEXT:    sub a0, a2, a0
-; RV32MV-NEXT:    sw a0, 4(s1)
+; RV32MV-NEXT:    sw a0, 4(s2)
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 4
 ; RV32MV-NEXT:    vmv.x.s a0, v10
 ; RV32MV-NEXT:    srli a2, a0, 30
@@ -678,7 +678,7 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    slli a3, a3, 2
 ; RV32MV-NEXT:    or a2, a3, a2
 ; RV32MV-NEXT:    andi a2, a2, 7
-; RV32MV-NEXT:    sb a2, 12(s1)
+; RV32MV-NEXT:    sb a2, 12(s2)
 ; RV32MV-NEXT:    srli a1, a1, 31
 ; RV32MV-NEXT:    vslidedown.vi v8, v8, 3
 ; RV32MV-NEXT:    vmv.x.s a2, v8
@@ -687,15 +687,15 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    or a1, a1, a2
 ; RV32MV-NEXT:    slli a0, a0, 2
 ; RV32MV-NEXT:    or a0, a1, a0
-; RV32MV-NEXT:    sw a0, 8(s1)
+; RV32MV-NEXT:    sw a0, 8(s2)
 ; RV32MV-NEXT:    addi sp, s0, -96
 ; RV32MV-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
 ; RV32MV-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s2, 84(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s3, 80(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s4, 76(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s5, 72(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s6, 68(sp) # 4-byte Folded Reload
 ; RV32MV-NEXT:    addi sp, sp, 96
 ; RV32MV-NEXT:    ret
 ;

From f811cb82a6cd811dbb4730009d0f060503aa1c76 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere 
Date: Thu, 20 Jan 2022 12:08:20 -0800
Subject: [PATCH 115/946] [lldb] Revive lldb-instr

I revived lldb-instr to update the macros for D117712. I think the new
macros are simple enough that we add them by hand, but this tool can do
it automatically for you.

Differential revision: https://reviews.llvm.org/D117748
---
 lldb/tools/CMakeLists.txt            |   1 +
 lldb/tools/lldb-instr/CMakeLists.txt |  16 +++
 lldb/tools/lldb-instr/Instrument.cpp | 173 +++++++++++++++++++++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 lldb/tools/lldb-instr/CMakeLists.txt
 create mode 100644 lldb/tools/lldb-instr/Instrument.cpp

diff --git a/lldb/tools/CMakeLists.txt b/lldb/tools/CMakeLists.txt
index a5f4ca8ec7eb3..1585fd4dc4b9e 100644
--- a/lldb/tools/CMakeLists.txt
+++ b/lldb/tools/CMakeLists.txt
@@ -7,6 +7,7 @@ add_subdirectory(intel-features)
 # example is `check-lldb`. So, we pass EXCLUDE_FROM_ALL here.
 add_subdirectory(lldb-test EXCLUDE_FROM_ALL)
 
+add_lldb_tool_subdirectory(lldb-instr)
 add_lldb_tool_subdirectory(lldb-vscode)
 
 if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
diff --git a/lldb/tools/lldb-instr/CMakeLists.txt b/lldb/tools/lldb-instr/CMakeLists.txt
new file mode 100644
index 0000000000000..8da453b2894fd
--- /dev/null
+++ b/lldb/tools/lldb-instr/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_lldb_tool(lldb-instr
+  Instrument.cpp
+
+  CLANG_LIBS
+    clangAST
+    clangBasic
+    clangCodeGen
+    clangFrontend
+    clangLex
+    clangRewrite
+    clangSerialization
+    clangTooling
+
+  LINK_COMPONENTS
+    Support
+  )
diff --git a/lldb/tools/lldb-instr/Instrument.cpp b/lldb/tools/lldb-instr/Instrument.cpp
new file mode 100644
index 0000000000000..4b8725396a61f
--- /dev/null
+++ b/lldb/tools/lldb-instr/Instrument.cpp
@@ -0,0 +1,173 @@
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/CodeGen/ObjectFilePCHContainerOperations.h"
+#include "clang/Frontend/ASTConsumers.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Rewrite/Core/Rewriter.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include 
+#include 
+
+using namespace clang;
+using namespace clang::driver;
+using namespace clang::tooling;
+
+static llvm::cl::OptionCategory InstrCategory("LLDB Instrumentation Generator");
+
+class SBVisitor : public RecursiveASTVisitor {
+public:
+  SBVisitor(Rewriter &R, ASTContext &Context)
+      : MyRewriter(R), Context(Context) {}
+
+  bool VisitCXXMethodDecl(CXXMethodDecl *Decl) {
+    // Not all decls should be registered. Please refer to that method's
+    // comment for details.
+    if (ShouldSkip(Decl))
+      return false;
+
+    // Print 'bool' instead of '_Bool'.
+    PrintingPolicy Policy(Context.getLangOpts());
+    Policy.Bool = true;
+
+    // Collect the functions parameter types and names.
+    std::vector ParamNames;
+    if (!Decl->isStatic())
+      ParamNames.push_back("this");
+    for (auto *P : Decl->parameters())
+      ParamNames.push_back(P->getNameAsString());
+
+    // Construct the macros.
+    std::string Buffer;
+    llvm::raw_string_ostream Macro(Buffer);
+    if (ParamNames.empty()) {
+      Macro << "LLDB_INSTRUMENT()";
+    } else {
+      Macro << "LLDB_INSTRUMENT_VA(" << llvm::join(ParamNames, ", ") << ")";
+    }
+
+    Stmt *Body = Decl->getBody();
+    for (auto &C : Body->children()) {
+      if (C->getBeginLoc().isMacroID()) {
+        CharSourceRange Range =
+            MyRewriter.getSourceMgr().getExpansionRange(C->getSourceRange());
+        MyRewriter.ReplaceText(Range, Macro.str());
+      } else {
+        Macro << ";";
+        SourceLocation InsertLoc = Lexer::getLocForEndOfToken(
+            Body->getBeginLoc(), 0, MyRewriter.getSourceMgr(),
+            MyRewriter.getLangOpts());
+        MyRewriter.InsertTextAfter(InsertLoc, Macro.str());
+      }
+      break;
+    }
+
+    return true;
+  }
+
+private:
+  /// Determine whether we need to consider the given CXXMethodDecl.
+  ///
+  /// Currently we skip the following cases:
+  ///  1. Decls outside the main source file,
+  ///  2. Decls that are only present in the source file,
+  ///  3. Decls that are not definitions,
+  ///  4. Non-public methods,
+  ///  5. Variadic methods.
+  ///  6. Destructors.
+  bool ShouldSkip(CXXMethodDecl *Decl) {
+    // Skip anything outside the main file.
+    if (!MyRewriter.getSourceMgr().isInMainFile(Decl->getBeginLoc()))
+      return true;
+
+    // Skip if the canonical decl in the current decl. It means that the method
+    // is declared in the implementation and is therefore not exposed as part
+    // of the API.
+    if (Decl == Decl->getCanonicalDecl())
+      return true;
+
+    // Skip decls that have no body, i.e. are just declarations.
+    Stmt *Body = Decl->getBody();
+    if (!Body)
+      return true;
+
+    // Skip non-public methods.
+    AccessSpecifier AS = Decl->getAccess();
+    if (AS != AccessSpecifier::AS_public)
+      return true;
+
+    // Skip variadic methods.
+    if (Decl->isVariadic())
+      return true;
+
+    // Skip destructors.
+    if (isa(Decl))
+      return true;
+
+    return false;
+  }
+
+  Rewriter &MyRewriter;
+  ASTContext &Context;
+};
+
+class SBConsumer : public ASTConsumer {
+public:
+  SBConsumer(Rewriter &R, ASTContext &Context) : Visitor(R, Context) {}
+
+  // Override the method that gets called for each parsed top-level
+  // declaration.
+  bool HandleTopLevelDecl(DeclGroupRef DR) override {
+    for (DeclGroupRef::iterator b = DR.begin(), e = DR.end(); b != e; ++b) {
+      Visitor.TraverseDecl(*b);
+    }
+    return true;
+  }
+
+private:
+  SBVisitor Visitor;
+};
+
+class SBAction : public ASTFrontendAction {
+public:
+  SBAction() = default;
+
+  bool BeginSourceFileAction(CompilerInstance &CI) override { return true; }
+
+  void EndSourceFileAction() override { MyRewriter.overwriteChangedFiles(); }
+
+  std::unique_ptr CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef File) override {
+    MyRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
+    return std::make_unique(MyRewriter, CI.getASTContext());
+  }
+
+private:
+  Rewriter MyRewriter;
+};
+
+int main(int argc, const char **argv) {
+  auto ExpectedParser = CommonOptionsParser::create(
+      argc, argv, InstrCategory, llvm::cl::OneOrMore,
+      "Utility for generating the macros for LLDB's "
+      "instrumentation framework.");
+  if (!ExpectedParser) {
+    llvm::errs() << ExpectedParser.takeError();
+    return 1;
+  }
+  CommonOptionsParser &OP = ExpectedParser.get();
+
+  auto PCHOpts = std::make_shared();
+  PCHOpts->registerWriter(std::make_unique());
+  PCHOpts->registerReader(std::make_unique());
+
+  ClangTool T(OP.getCompilations(), OP.getSourcePathList(), PCHOpts);
+  return T.run(newFrontendActionFactory().get());
+}

From 1755f5b1d7b7871672abdf0fde5ccd091b8dbc04 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere 
Date: Wed, 19 Jan 2022 11:38:26 -0800
Subject: [PATCH 116/946] [lldb] Decouple instrumentation from the reproducers

Remove the last remaining references to the reproducers from the
instrumentation. This patch renames the relevant files and macros.

Differential revision: https://reviews.llvm.org/D117712
---
 lldb/include/lldb/Utility/Instrumentation.h   | 103 ++++
 .../lldb/Utility/ReproducerInstrumentation.h  | 151 ------
 lldb/source/API/SBAddress.cpp                 |  59 +--
 lldb/source/API/SBAttachInfo.cpp              |  81 ++--
 lldb/source/API/SBBlock.cpp                   |  55 +--
 lldb/source/API/SBBreakpoint.cpp              | 172 +++----
 lldb/source/API/SBBreakpointLocation.cpp      |  98 ++--
 lldb/source/API/SBBreakpointName.cpp          | 115 ++---
 lldb/source/API/SBBreakpointOptionCommon.cpp  |  24 +-
 lldb/source/API/SBBroadcaster.cpp             |  52 +-
 lldb/source/API/SBCommandInterpreter.cpp      | 183 +++----
 .../API/SBCommandInterpreterRunOptions.cpp    |  86 ++--
 lldb/source/API/SBCommandReturnObject.cpp     | 102 ++--
 lldb/source/API/SBCommunication.cpp           |  51 +-
 lldb/source/API/SBCompileUnit.cpp             |  55 +--
 lldb/source/API/SBData.cpp                    | 131 ++---
 lldb/source/API/SBDebugger.cpp                | 368 ++++++--------
 lldb/source/API/SBDeclaration.cpp             |  41 +-
 lldb/source/API/SBEnvironment.cpp             |  36 +-
 lldb/source/API/SBError.cpp                   |  38 +-
 lldb/source/API/SBEvent.cpp                   |  47 +-
 lldb/source/API/SBExecutionContext.cpp        |  36 +-
 lldb/source/API/SBExpressionOptions.cpp       | 103 ++--
 lldb/source/API/SBFile.cpp                    |  35 +-
 lldb/source/API/SBFileSpec.cpp                |  47 +-
 lldb/source/API/SBFileSpecList.cpp            |  30 +-
 lldb/source/API/SBFrame.cpp                   | 134 ++----
 lldb/source/API/SBFunction.cpp                |  50 +-
 lldb/source/API/SBHostOS.cpp                  |  36 +-
 lldb/source/API/SBInstruction.cpp             |  57 +--
 lldb/source/API/SBInstructionList.cpp         |  45 +-
 lldb/source/API/SBLanguageRuntime.cpp         |   9 +-
 lldb/source/API/SBLaunchInfo.cpp              | 117 ++---
 lldb/source/API/SBLineEntry.cpp               |  40 +-
 lldb/source/API/SBListener.cpp                |  72 +--
 lldb/source/API/SBMemoryRegionInfo.cpp        |  52 +-
 lldb/source/API/SBMemoryRegionInfoList.cpp    |  29 +-
 lldb/source/API/SBModule.cpp                  | 129 ++---
 lldb/source/API/SBModuleSpec.cpp              |  80 ++-
 lldb/source/API/SBPlatform.cpp                | 156 +++---
 lldb/source/API/SBProcess.cpp                 | 245 ++++------
 lldb/source/API/SBProcessInfo.cpp             |  43 +-
 lldb/source/API/SBQueue.cpp                   |  41 +-
 lldb/source/API/SBQueueItem.cpp               |  27 +-
 lldb/source/API/SBReproducer.cpp              |  29 +-
 lldb/source/API/SBSection.cpp                 |  56 +--
 lldb/source/API/SBSourceManager.cpp           |  26 +-
 lldb/source/API/SBStream.cpp                  |  29 +-
 lldb/source/API/SBStringList.cpp              |  31 +-
 lldb/source/API/SBStructuredData.cpp          |  60 +--
 lldb/source/API/SBSymbol.cpp                  |  46 +-
 lldb/source/API/SBSymbolContext.cpp           |  58 +--
 lldb/source/API/SBSymbolContextList.cpp       |  31 +-
 lldb/source/API/SBTarget.cpp                  | 454 ++++++------------
 lldb/source/API/SBThread.cpp                  | 176 +++----
 lldb/source/API/SBThreadCollection.cpp        |  22 +-
 lldb/source/API/SBThreadPlan.cpp              |  97 ++--
 lldb/source/API/SBTrace.cpp                   |  23 +-
 lldb/source/API/SBType.cpp                    | 202 ++++----
 lldb/source/API/SBTypeCategory.cpp            | 113 ++---
 lldb/source/API/SBTypeEnumMember.cpp          |  51 +-
 lldb/source/API/SBTypeFilter.cpp              |  46 +-
 lldb/source/API/SBTypeFormat.cpp              |  44 +-
 lldb/source/API/SBTypeNameSpecifier.cpp       |  41 +-
 lldb/source/API/SBTypeSummary.cpp             |  95 ++--
 lldb/source/API/SBTypeSynthetic.cpp           |  52 +-
 lldb/source/API/SBUnixSignals.cpp             |  47 +-
 lldb/source/API/SBValue.cpp                   | 221 ++++-----
 lldb/source/API/SBValueList.cpp               |  33 +-
 lldb/source/API/SBVariablesOptions.cpp        |  56 +--
 lldb/source/API/SBWatchpoint.cpp              |  69 ++-
 .../Python/ScriptInterpreterPython.cpp        |   2 +-
 lldb/source/Utility/CMakeLists.txt            |   2 +-
 ...nstrumentation.cpp => Instrumentation.cpp} |  31 +-
 74 files changed, 2230 insertions(+), 3674 deletions(-)
 create mode 100644 lldb/include/lldb/Utility/Instrumentation.h
 delete mode 100644 lldb/include/lldb/Utility/ReproducerInstrumentation.h
 rename lldb/source/Utility/{ReproducerInstrumentation.cpp => Instrumentation.cpp} (52%)

diff --git a/lldb/include/lldb/Utility/Instrumentation.h b/lldb/include/lldb/Utility/Instrumentation.h
new file mode 100644
index 0000000000000..6962270bb89db
--- /dev/null
+++ b/lldb/include/lldb/Utility/Instrumentation.h
@@ -0,0 +1,103 @@
+//===-- Instrumentation.h ---------------------------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_UTILITY_INSTRUMENTATION_H
+#define LLDB_UTILITY_INSTRUMENTATION_H
+
+#include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Logging.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+namespace instrumentation {
+
+template ::value, int>::type = 0>
+inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
+  ss << t;
+}
+
+template ::value,
+                                              int>::type = 0>
+inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
+  ss << &t;
+}
+
+template 
+inline void stringify_append(llvm::raw_string_ostream &ss, T *t) {
+  ss << reinterpret_cast(t);
+}
+
+template 
+inline void stringify_append(llvm::raw_string_ostream &ss, const T *t) {
+  ss << reinterpret_cast(t);
+}
+
+template <>
+inline void stringify_append(llvm::raw_string_ostream &ss,
+                                   const char *t) {
+  ss << '\"' << t << '\"';
+}
+
+template <>
+inline void stringify_append(llvm::raw_string_ostream &ss,
+                                             const std::nullptr_t &t) {
+  ss << "\"nullptr\"";
+}
+
+template 
+inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head) {
+  stringify_append(ss, head);
+}
+
+template 
+inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head,
+                             const Tail &...tail) {
+  stringify_append(ss, head);
+  ss << ", ";
+  stringify_helper(ss, tail...);
+}
+
+template  inline std::string stringify_args(const Ts &...ts) {
+  std::string buffer;
+  llvm::raw_string_ostream ss(buffer);
+  stringify_helper(ss, ts...);
+  return ss.str();
+}
+
+/// RAII object for instrumenting LLDB API functions.
+class Instrumenter {
+public:
+  Instrumenter(llvm::StringRef pretty_func, std::string &&pretty_args = {});
+  ~Instrumenter();
+
+private:
+  void UpdateBoundary();
+
+  /// Whether this function call was the one crossing the API boundary.
+  bool m_local_boundary = false;
+};
+} // namespace instrumentation
+} // namespace lldb_private
+
+#define LLDB_INSTRUMENT()                                                      \
+  lldb_private::instrumentation::Instrumenter _instr(LLVM_PRETTY_FUNCTION);
+
+#define LLDB_INSTRUMENT_VA(...)                                                \
+  lldb_private::instrumentation::Instrumenter _instr(                          \
+      LLVM_PRETTY_FUNCTION,                                                    \
+      lldb_private::instrumentation::stringify_args(__VA_ARGS__));
+
+#endif // LLDB_UTILITY_INSTRUMENTATION_H
diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
deleted file mode 100644
index 3bce7b8f3a6f1..0000000000000
--- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h
+++ /dev/null
@@ -1,151 +0,0 @@
-//===-- ReproducerInstrumentation.h -----------------------------*- C++ -*-===//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLDB_UTILITY_REPRODUCERINSTRUMENTATION_H
-#define LLDB_UTILITY_REPRODUCERINSTRUMENTATION_H
-
-#include "lldb/Utility/FileSpec.h"
-#include "lldb/Utility/Log.h"
-#include "lldb/Utility/Logging.h"
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/ErrorHandling.h"
-
-#include 
-#include 
-#include 
-
-template ::value, int>::type = 0>
-inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
-  ss << t;
-}
-
-template ::value,
-                                              int>::type = 0>
-inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
-  ss << &t;
-}
-
-template 
-inline void stringify_append(llvm::raw_string_ostream &ss, T *t) {
-  ss << reinterpret_cast(t);
-}
-
-template 
-inline void stringify_append(llvm::raw_string_ostream &ss, const T *t) {
-  ss << reinterpret_cast(t);
-}
-
-template <>
-inline void stringify_append(llvm::raw_string_ostream &ss,
-                                   const char *t) {
-  ss << '\"' << t << '\"';
-}
-
-template <>
-inline void stringify_append(llvm::raw_string_ostream &ss,
-                                             const std::nullptr_t &t) {
-  ss << "\"nullptr\"";
-}
-
-template 
-inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head) {
-  stringify_append(ss, head);
-}
-
-template 
-inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head,
-                             const Tail &... tail) {
-  stringify_append(ss, head);
-  ss << ", ";
-  stringify_helper(ss, tail...);
-}
-
-template  inline std::string stringify_args(const Ts &... ts) {
-  std::string buffer;
-  llvm::raw_string_ostream ss(buffer);
-  stringify_helper(ss, ts...);
-  return ss.str();
-}
-
-#define LLDB_CONSTRUCT_(T, Class, ...)                                         \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION);
-
-#define LLDB_RECORD_CONSTRUCTOR(Class, Signature, ...)                         \
-  LLDB_CONSTRUCT_(Class Signature, this, __VA_ARGS__)
-
-#define LLDB_RECORD_CONSTRUCTOR_NO_ARGS(Class)                                 \
-  LLDB_CONSTRUCT_(Class(), this, lldb_private::repro::EmptyArg())
-
-#define LLDB_RECORD_(T1, T2, ...)                                              \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(__VA_ARGS__));
-
-#define LLDB_RECORD_METHOD(Result, Class, Method, Signature, ...)              \
-  LLDB_RECORD_(Result(Class::*) Signature, (&Class::Method), this, __VA_ARGS__)
-
-#define LLDB_RECORD_METHOD_CONST(Result, Class, Method, Signature, ...)        \
-  LLDB_RECORD_(Result(Class::*) Signature const, (&Class::Method), this,       \
-               __VA_ARGS__)
-
-#define LLDB_RECORD_METHOD_NO_ARGS(Result, Class, Method)                      \
-  LLDB_RECORD_(Result (Class::*)(), (&Class::Method), this)
-
-#define LLDB_RECORD_METHOD_CONST_NO_ARGS(Result, Class, Method)                \
-  LLDB_RECORD_(Result (Class::*)() const, (&Class::Method), this)
-
-#define LLDB_RECORD_STATIC_METHOD(Result, Class, Method, Signature, ...)       \
-  LLDB_RECORD_(Result(*) Signature, (&Class::Method), __VA_ARGS__)
-
-#define LLDB_RECORD_STATIC_METHOD_NO_ARGS(Result, Class, Method)               \
-  LLDB_RECORD_(Result (*)(), (&Class::Method), lldb_private::repro::EmptyArg())
-
-/// The LLDB_RECORD_DUMMY macro is special because it doesn't actually record
-/// anything. It's used to track API boundaries when we cannot record for
-/// technical reasons.
-#define LLDB_RECORD_DUMMY(Result, Class, Method, Signature, ...)               \
-  lldb_private::repro::Recorder _recorder;
-
-#define LLDB_RECORD_DUMMY_NO_ARGS(Result, Class, Method)                       \
-  lldb_private::repro::Recorder _recorder;
-
-namespace lldb_private {
-namespace repro {
-
-struct EmptyArg {};
-
-/// RAII object that records function invocations and their return value.
-///
-/// API calls are only captured when the API boundary is crossed. Once we're in
-/// the API layer, and another API function is called, it doesn't need to be
-/// recorded.
-///
-/// When a call is recored, its result is always recorded as well, even if the
-/// function returns a void. For functions that return by value, RecordResult
-/// should be used. Otherwise a sentinel value (0) will be serialized.
-///
-/// Because of the functional overlap between logging and recording API calls,
-/// this class is also used for logging.
-class Recorder {
-public:
-  Recorder();
-  Recorder(llvm::StringRef pretty_func, std::string &&pretty_args = {});
-  ~Recorder();
-
-private:
-  void UpdateBoundary();
-
-  /// Whether this function call was the one crossing the API boundary.
-  bool m_local_boundary = false;
-};
-
-} // namespace repro
-} // namespace lldb_private
-
-#endif // LLDB_UTILITY_REPRODUCERINSTRUMENTATION_H
diff --git a/lldb/source/API/SBAddress.cpp b/lldb/source/API/SBAddress.cpp
index af1b876d803d7..e519f0bcc83c6 100644
--- a/lldb/source/API/SBAddress.cpp
+++ b/lldb/source/API/SBAddress.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBAddress.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBProcess.h"
 #include "lldb/API/SBSection.h"
@@ -16,35 +15,34 @@
 #include "lldb/Core/Module.h"
 #include "lldb/Symbol/LineEntry.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/StreamString.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBAddress::SBAddress() : m_opaque_up(new Address()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBAddress);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBAddress::SBAddress(const Address &address)
     : m_opaque_up(std::make_unique
(address)) {} SBAddress::SBAddress(const SBAddress &rhs) : m_opaque_up(new Address()) { - LLDB_RECORD_CONSTRUCTOR(SBAddress, (const lldb::SBAddress &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } SBAddress::SBAddress(lldb::SBSection section, lldb::addr_t offset) : m_opaque_up(new Address(section.GetSP(), offset)) { - LLDB_RECORD_CONSTRUCTOR(SBAddress, (lldb::SBSection, lldb::addr_t), section, - offset); + LLDB_INSTRUMENT_VA(this, section, offset); } // Create an address by resolving a load address using the supplied target SBAddress::SBAddress(lldb::addr_t load_addr, lldb::SBTarget &target) : m_opaque_up(new Address()) { - LLDB_RECORD_CONSTRUCTOR(SBAddress, (lldb::addr_t, lldb::SBTarget &), - load_addr, target); + LLDB_INSTRUMENT_VA(this, load_addr, target); SetLoadAddress(load_addr, target); } @@ -52,8 +50,7 @@ SBAddress::SBAddress(lldb::addr_t load_addr, lldb::SBTarget &target) SBAddress::~SBAddress() = default; const SBAddress &SBAddress::operator=(const SBAddress &rhs) { - LLDB_RECORD_METHOD(const lldb::SBAddress &, - SBAddress, operator=,(const lldb::SBAddress &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -67,31 +64,29 @@ bool lldb::operator==(const SBAddress &lhs, const SBAddress &rhs) { } bool SBAddress::operator!=(const SBAddress &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBAddress, operator!=,(const SBAddress &), - &rhs); + LLDB_INSTRUMENT_VA(this, rhs); return !(*this == rhs); } bool SBAddress::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBAddress, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBAddress::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBAddress, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up != nullptr && m_opaque_up->IsValid(); } void SBAddress::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBAddress, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_up = std::make_unique
(); } void SBAddress::SetAddress(lldb::SBSection section, lldb::addr_t offset) { - LLDB_RECORD_METHOD(void, SBAddress, SetAddress, - (lldb::SBSection, lldb::addr_t), section, offset); + LLDB_INSTRUMENT_VA(this, section, offset); Address &addr = ref(); addr.SetSection(section.GetSP()); @@ -101,7 +96,7 @@ void SBAddress::SetAddress(lldb::SBSection section, lldb::addr_t offset) { void SBAddress::SetAddress(const Address &address) { ref() = address; } lldb::addr_t SBAddress::GetFileAddress() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBAddress, GetFileAddress); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up->IsValid()) return m_opaque_up->GetFileAddress(); @@ -110,8 +105,7 @@ lldb::addr_t SBAddress::GetFileAddress() const { } lldb::addr_t SBAddress::GetLoadAddress(const SBTarget &target) const { - LLDB_RECORD_METHOD_CONST(lldb::addr_t, SBAddress, GetLoadAddress, - (const lldb::SBTarget &), target); + LLDB_INSTRUMENT_VA(this, target); lldb::addr_t addr = LLDB_INVALID_ADDRESS; TargetSP target_sp(target.GetSP()); @@ -126,8 +120,7 @@ lldb::addr_t SBAddress::GetLoadAddress(const SBTarget &target) const { } void SBAddress::SetLoadAddress(lldb::addr_t load_addr, lldb::SBTarget &target) { - LLDB_RECORD_METHOD(void, SBAddress, SetLoadAddress, - (lldb::addr_t, lldb::SBTarget &), load_addr, target); + LLDB_INSTRUMENT_VA(this, load_addr, target); // Create the address object if we don't already have one ref(); @@ -144,7 +137,7 @@ void SBAddress::SetLoadAddress(lldb::addr_t load_addr, lldb::SBTarget &target) { } bool SBAddress::OffsetAddress(addr_t offset) { - LLDB_RECORD_METHOD(bool, SBAddress, OffsetAddress, (lldb::addr_t), offset); + LLDB_INSTRUMENT_VA(this, offset); if (m_opaque_up->IsValid()) { addr_t addr_offset = m_opaque_up->GetOffset(); @@ -157,7 +150,7 @@ bool SBAddress::OffsetAddress(addr_t offset) { } lldb::SBSection SBAddress::GetSection() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSection, SBAddress, GetSection); + LLDB_INSTRUMENT_VA(this); lldb::SBSection sb_section; if (m_opaque_up->IsValid()) @@ -166,7 +159,7 @@ lldb::SBSection SBAddress::GetSection() { } lldb::addr_t SBAddress::GetOffset() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBAddress, GetOffset); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up->IsValid()) return m_opaque_up->GetOffset(); @@ -193,8 +186,7 @@ const Address &SBAddress::ref() const { Address *SBAddress::get() { return m_opaque_up.get(); } bool SBAddress::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBAddress, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); // Call "ref()" on the stream to make sure it creates a backing stream in // case there isn't one already... @@ -209,7 +201,7 @@ bool SBAddress::GetDescription(SBStream &description) { } SBModule SBAddress::GetModule() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBModule, SBAddress, GetModule); + LLDB_INSTRUMENT_VA(this); SBModule sb_module; if (m_opaque_up->IsValid()) @@ -218,8 +210,7 @@ SBModule SBAddress::GetModule() { } SBSymbolContext SBAddress::GetSymbolContext(uint32_t resolve_scope) { - LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBAddress, GetSymbolContext, - (uint32_t), resolve_scope); + LLDB_INSTRUMENT_VA(this, resolve_scope); SBSymbolContext sb_sc; SymbolContextItem scope = static_cast(resolve_scope); @@ -229,7 +220,7 @@ SBSymbolContext SBAddress::GetSymbolContext(uint32_t resolve_scope) { } SBCompileUnit SBAddress::GetCompileUnit() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBCompileUnit, SBAddress, GetCompileUnit); + LLDB_INSTRUMENT_VA(this); SBCompileUnit sb_comp_unit; if (m_opaque_up->IsValid()) @@ -238,7 +229,7 @@ SBCompileUnit SBAddress::GetCompileUnit() { } SBFunction SBAddress::GetFunction() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFunction, SBAddress, GetFunction); + LLDB_INSTRUMENT_VA(this); SBFunction sb_function; if (m_opaque_up->IsValid()) @@ -247,7 +238,7 @@ SBFunction SBAddress::GetFunction() { } SBBlock SBAddress::GetBlock() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBAddress, GetBlock); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; if (m_opaque_up->IsValid()) @@ -256,7 +247,7 @@ SBBlock SBAddress::GetBlock() { } SBSymbol SBAddress::GetSymbol() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSymbol, SBAddress, GetSymbol); + LLDB_INSTRUMENT_VA(this); SBSymbol sb_symbol; if (m_opaque_up->IsValid()) @@ -265,7 +256,7 @@ SBSymbol SBAddress::GetSymbol() { } SBLineEntry SBAddress::GetLineEntry() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBLineEntry, SBAddress, GetLineEntry); + LLDB_INSTRUMENT_VA(this); SBLineEntry sb_line_entry; if (m_opaque_up->IsValid()) { diff --git a/lldb/source/API/SBAttachInfo.cpp b/lldb/source/API/SBAttachInfo.cpp index 8118088a38ec3..edb4f7104d411 100644 --- a/lldb/source/API/SBAttachInfo.cpp +++ b/lldb/source/API/SBAttachInfo.cpp @@ -7,29 +7,29 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBAttachInfo.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBListener.h" #include "lldb/Target/Process.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; SBAttachInfo::SBAttachInfo() : m_opaque_sp(new ProcessAttachInfo()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBAttachInfo); + LLDB_INSTRUMENT_VA(this); } SBAttachInfo::SBAttachInfo(lldb::pid_t pid) : m_opaque_sp(new ProcessAttachInfo()) { - LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (lldb::pid_t), pid); + LLDB_INSTRUMENT_VA(this, pid); m_opaque_sp->SetProcessID(pid); } SBAttachInfo::SBAttachInfo(const char *path, bool wait_for) : m_opaque_sp(new ProcessAttachInfo()) { - LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (const char *, bool), path, wait_for); + LLDB_INSTRUMENT_VA(this, path, wait_for); if (path && path[0]) m_opaque_sp->GetExecutableFile().SetFile(path, FileSpec::Style::native); @@ -38,8 +38,7 @@ SBAttachInfo::SBAttachInfo(const char *path, bool wait_for) SBAttachInfo::SBAttachInfo(const char *path, bool wait_for, bool async) : m_opaque_sp(new ProcessAttachInfo()) { - LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (const char *, bool, bool), path, - wait_for, async); + LLDB_INSTRUMENT_VA(this, path, wait_for, async); if (path && path[0]) m_opaque_sp->GetExecutableFile().SetFile(path, FileSpec::Style::native); @@ -49,7 +48,7 @@ SBAttachInfo::SBAttachInfo(const char *path, bool wait_for, bool async) SBAttachInfo::SBAttachInfo(const SBAttachInfo &rhs) : m_opaque_sp(new ProcessAttachInfo()) { - LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (const lldb::SBAttachInfo &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = clone(rhs.m_opaque_sp); } @@ -59,8 +58,7 @@ SBAttachInfo::~SBAttachInfo() = default; lldb_private::ProcessAttachInfo &SBAttachInfo::ref() { return *m_opaque_sp; } SBAttachInfo &SBAttachInfo::operator=(const SBAttachInfo &rhs) { - LLDB_RECORD_METHOD(lldb::SBAttachInfo &, - SBAttachInfo, operator=,(const lldb::SBAttachInfo &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = clone(rhs.m_opaque_sp); @@ -68,44 +66,43 @@ SBAttachInfo &SBAttachInfo::operator=(const SBAttachInfo &rhs) { } lldb::pid_t SBAttachInfo::GetProcessID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBAttachInfo, GetProcessID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetProcessID(); } void SBAttachInfo::SetProcessID(lldb::pid_t pid) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetProcessID, (lldb::pid_t), pid); + LLDB_INSTRUMENT_VA(this, pid); m_opaque_sp->SetProcessID(pid); } uint32_t SBAttachInfo::GetResumeCount() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetResumeCount); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetResumeCount(); } void SBAttachInfo::SetResumeCount(uint32_t c) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetResumeCount, (uint32_t), c); + LLDB_INSTRUMENT_VA(this, c); m_opaque_sp->SetResumeCount(c); } const char *SBAttachInfo::GetProcessPluginName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBAttachInfo, GetProcessPluginName); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetProcessPluginName(); } void SBAttachInfo::SetProcessPluginName(const char *plugin_name) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetProcessPluginName, (const char *), - plugin_name); + LLDB_INSTRUMENT_VA(this, plugin_name); return m_opaque_sp->SetProcessPluginName(plugin_name); } void SBAttachInfo::SetExecutable(const char *path) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetExecutable, (const char *), path); + LLDB_INSTRUMENT_VA(this, path); if (path && path[0]) m_opaque_sp->GetExecutableFile().SetFile(path, FileSpec::Style::native); @@ -114,8 +111,7 @@ void SBAttachInfo::SetExecutable(const char *path) { } void SBAttachInfo::SetExecutable(SBFileSpec exe_file) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetExecutable, (lldb::SBFileSpec), - exe_file); + LLDB_INSTRUMENT_VA(this, exe_file); if (exe_file.IsValid()) m_opaque_sp->GetExecutableFile() = exe_file.ref(); @@ -124,137 +120,134 @@ void SBAttachInfo::SetExecutable(SBFileSpec exe_file) { } bool SBAttachInfo::GetWaitForLaunch() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, GetWaitForLaunch); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetWaitForLaunch(); } void SBAttachInfo::SetWaitForLaunch(bool b) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetWaitForLaunch, (bool), b); + LLDB_INSTRUMENT_VA(this, b); m_opaque_sp->SetWaitForLaunch(b); } void SBAttachInfo::SetWaitForLaunch(bool b, bool async) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetWaitForLaunch, (bool, bool), b, - async); + LLDB_INSTRUMENT_VA(this, b, async); m_opaque_sp->SetWaitForLaunch(b); m_opaque_sp->SetAsync(async); } bool SBAttachInfo::GetIgnoreExisting() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, GetIgnoreExisting); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetIgnoreExisting(); } void SBAttachInfo::SetIgnoreExisting(bool b) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetIgnoreExisting, (bool), b); + LLDB_INSTRUMENT_VA(this, b); m_opaque_sp->SetIgnoreExisting(b); } uint32_t SBAttachInfo::GetUserID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetUserID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetUserID(); } uint32_t SBAttachInfo::GetGroupID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetGroupID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetGroupID(); } bool SBAttachInfo::UserIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, UserIDIsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->UserIDIsValid(); } bool SBAttachInfo::GroupIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, GroupIDIsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GroupIDIsValid(); } void SBAttachInfo::SetUserID(uint32_t uid) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetUserID, (uint32_t), uid); + LLDB_INSTRUMENT_VA(this, uid); m_opaque_sp->SetUserID(uid); } void SBAttachInfo::SetGroupID(uint32_t gid) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetGroupID, (uint32_t), gid); + LLDB_INSTRUMENT_VA(this, gid); m_opaque_sp->SetGroupID(gid); } uint32_t SBAttachInfo::GetEffectiveUserID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetEffectiveUserID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetEffectiveUserID(); } uint32_t SBAttachInfo::GetEffectiveGroupID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetEffectiveGroupID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetEffectiveGroupID(); } bool SBAttachInfo::EffectiveUserIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, EffectiveUserIDIsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->EffectiveUserIDIsValid(); } bool SBAttachInfo::EffectiveGroupIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, EffectiveGroupIDIsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->EffectiveGroupIDIsValid(); } void SBAttachInfo::SetEffectiveUserID(uint32_t uid) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetEffectiveUserID, (uint32_t), uid); + LLDB_INSTRUMENT_VA(this, uid); m_opaque_sp->SetEffectiveUserID(uid); } void SBAttachInfo::SetEffectiveGroupID(uint32_t gid) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetEffectiveGroupID, (uint32_t), gid); + LLDB_INSTRUMENT_VA(this, gid); m_opaque_sp->SetEffectiveGroupID(gid); } lldb::pid_t SBAttachInfo::GetParentProcessID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBAttachInfo, GetParentProcessID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetParentProcessID(); } void SBAttachInfo::SetParentProcessID(lldb::pid_t pid) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetParentProcessID, (lldb::pid_t), - pid); + LLDB_INSTRUMENT_VA(this, pid); m_opaque_sp->SetParentProcessID(pid); } bool SBAttachInfo::ParentProcessIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, ParentProcessIDIsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->ParentProcessIDIsValid(); } SBListener SBAttachInfo::GetListener() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBListener, SBAttachInfo, GetListener); + LLDB_INSTRUMENT_VA(this); return SBListener(m_opaque_sp->GetListener()); } void SBAttachInfo::SetListener(SBListener &listener) { - LLDB_RECORD_METHOD(void, SBAttachInfo, SetListener, (lldb::SBListener &), - listener); + LLDB_INSTRUMENT_VA(this, listener); m_opaque_sp->SetListener(listener.GetSP()); } diff --git a/lldb/source/API/SBBlock.cpp b/lldb/source/API/SBBlock.cpp index 720d630491f84..7d7565340836b 100644 --- a/lldb/source/API/SBBlock.cpp +++ b/lldb/source/API/SBBlock.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBBlock.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBFrame.h" @@ -21,22 +20,22 @@ #include "lldb/Symbol/VariableList.h" #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; -SBBlock::SBBlock() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBlock); } +SBBlock::SBBlock() { LLDB_INSTRUMENT_VA(this); } SBBlock::SBBlock(lldb_private::Block *lldb_object_ptr) : m_opaque_ptr(lldb_object_ptr) {} SBBlock::SBBlock(const SBBlock &rhs) : m_opaque_ptr(rhs.m_opaque_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBBlock, (const lldb::SBBlock &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBBlock &SBBlock::operator=(const SBBlock &rhs) { - LLDB_RECORD_METHOD(const lldb::SBBlock &, - SBBlock, operator=,(const lldb::SBBlock &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_ptr = rhs.m_opaque_ptr; return *this; @@ -45,17 +44,17 @@ const SBBlock &SBBlock::operator=(const SBBlock &rhs) { SBBlock::~SBBlock() { m_opaque_ptr = nullptr; } bool SBBlock::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBBlock::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr != nullptr; } bool SBBlock::IsInlined() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, IsInlined); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetInlinedFunctionInfo() != nullptr; @@ -63,7 +62,7 @@ bool SBBlock::IsInlined() const { } const char *SBBlock::GetInlinedName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBlock, GetInlinedName); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) { const InlineFunctionInfo *inlined_info = @@ -76,8 +75,7 @@ const char *SBBlock::GetInlinedName() const { } SBFileSpec SBBlock::GetInlinedCallSiteFile() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBBlock, - GetInlinedCallSiteFile); + LLDB_INSTRUMENT_VA(this); SBFileSpec sb_file; if (m_opaque_ptr) { @@ -90,7 +88,7 @@ SBFileSpec SBBlock::GetInlinedCallSiteFile() const { } uint32_t SBBlock::GetInlinedCallSiteLine() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBlock, GetInlinedCallSiteLine); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) { const InlineFunctionInfo *inlined_info = @@ -102,7 +100,7 @@ uint32_t SBBlock::GetInlinedCallSiteLine() const { } uint32_t SBBlock::GetInlinedCallSiteColumn() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBlock, GetInlinedCallSiteColumn); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) { const InlineFunctionInfo *inlined_info = @@ -123,7 +121,7 @@ void SBBlock::AppendVariables(bool can_create, bool get_parent_variables, } SBBlock SBBlock::GetParent() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetParent); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; if (m_opaque_ptr) @@ -132,7 +130,7 @@ SBBlock SBBlock::GetParent() { } lldb::SBBlock SBBlock::GetContainingInlinedBlock() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetContainingInlinedBlock); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; if (m_opaque_ptr) @@ -141,7 +139,7 @@ lldb::SBBlock SBBlock::GetContainingInlinedBlock() { } SBBlock SBBlock::GetSibling() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetSibling); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; if (m_opaque_ptr) @@ -150,7 +148,7 @@ SBBlock SBBlock::GetSibling() { } SBBlock SBBlock::GetFirstChild() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetFirstChild); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; if (m_opaque_ptr) @@ -163,8 +161,7 @@ lldb_private::Block *SBBlock::GetPtr() { return m_opaque_ptr; } void SBBlock::SetPtr(lldb_private::Block *block) { m_opaque_ptr = block; } bool SBBlock::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBBlock, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -188,7 +185,7 @@ bool SBBlock::GetDescription(SBStream &description) { } uint32_t SBBlock::GetNumRanges() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBBlock, GetNumRanges); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetNumRanges(); @@ -196,8 +193,7 @@ uint32_t SBBlock::GetNumRanges() { } lldb::SBAddress SBBlock::GetRangeStartAddress(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBAddress, SBBlock, GetRangeStartAddress, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); lldb::SBAddress sb_addr; if (m_opaque_ptr) { @@ -210,8 +206,7 @@ lldb::SBAddress SBBlock::GetRangeStartAddress(uint32_t idx) { } lldb::SBAddress SBBlock::GetRangeEndAddress(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBAddress, SBBlock, GetRangeEndAddress, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); lldb::SBAddress sb_addr; if (m_opaque_ptr) { @@ -225,8 +220,7 @@ lldb::SBAddress SBBlock::GetRangeEndAddress(uint32_t idx) { } uint32_t SBBlock::GetRangeIndexForBlockAddress(lldb::SBAddress block_addr) { - LLDB_RECORD_METHOD(uint32_t, SBBlock, GetRangeIndexForBlockAddress, - (lldb::SBAddress), block_addr); + LLDB_INSTRUMENT_VA(this, block_addr); if (m_opaque_ptr && block_addr.IsValid()) { return m_opaque_ptr->GetRangeIndexContainingAddress(block_addr.ref()); @@ -238,10 +232,7 @@ uint32_t SBBlock::GetRangeIndexForBlockAddress(lldb::SBAddress block_addr) { lldb::SBValueList SBBlock::GetVariables(lldb::SBFrame &frame, bool arguments, bool locals, bool statics, lldb::DynamicValueType use_dynamic) { - LLDB_RECORD_METHOD( - lldb::SBValueList, SBBlock, GetVariables, - (lldb::SBFrame &, bool, bool, bool, lldb::DynamicValueType), frame, - arguments, locals, statics, use_dynamic); + LLDB_INSTRUMENT_VA(this, frame, arguments, locals, statics, use_dynamic); Block *block = GetPtr(); SBValueList value_list; @@ -294,9 +285,7 @@ lldb::SBValueList SBBlock::GetVariables(lldb::SBFrame &frame, bool arguments, lldb::SBValueList SBBlock::GetVariables(lldb::SBTarget &target, bool arguments, bool locals, bool statics) { - LLDB_RECORD_METHOD(lldb::SBValueList, SBBlock, GetVariables, - (lldb::SBTarget &, bool, bool, bool), target, arguments, - locals, statics); + LLDB_INSTRUMENT_VA(this, target, arguments, locals, statics); Block *block = GetPtr(); diff --git a/lldb/source/API/SBBreakpoint.cpp b/lldb/source/API/SBBreakpoint.cpp index 709895e8bbc9f..5fe8f7fe05837 100644 --- a/lldb/source/API/SBBreakpoint.cpp +++ b/lldb/source/API/SBBreakpoint.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBBreakpoint.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBBreakpointLocation.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBEvent.h" @@ -16,6 +15,7 @@ #include "lldb/API/SBStringList.h" #include "lldb/API/SBStructuredData.h" #include "lldb/API/SBThread.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Breakpoint/Breakpoint.h" #include "lldb/Breakpoint/BreakpointIDList.h" @@ -45,44 +45,41 @@ using namespace lldb; using namespace lldb_private; -SBBreakpoint::SBBreakpoint() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBreakpoint); } +SBBreakpoint::SBBreakpoint() { LLDB_INSTRUMENT_VA(this); } SBBreakpoint::SBBreakpoint(const SBBreakpoint &rhs) : m_opaque_wp(rhs.m_opaque_wp) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpoint, (const lldb::SBBreakpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBBreakpoint::SBBreakpoint(const lldb::BreakpointSP &bp_sp) : m_opaque_wp(bp_sp) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpoint, (const lldb::BreakpointSP &), bp_sp); + LLDB_INSTRUMENT_VA(this, bp_sp); } SBBreakpoint::~SBBreakpoint() = default; const SBBreakpoint &SBBreakpoint::operator=(const SBBreakpoint &rhs) { - LLDB_RECORD_METHOD(const lldb::SBBreakpoint &, - SBBreakpoint, operator=,(const lldb::SBBreakpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_wp = rhs.m_opaque_wp; return *this; } bool SBBreakpoint::operator==(const lldb::SBBreakpoint &rhs) { - LLDB_RECORD_METHOD( - bool, SBBreakpoint, operator==,(const lldb::SBBreakpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_wp.lock() == rhs.m_opaque_wp.lock(); } bool SBBreakpoint::operator!=(const lldb::SBBreakpoint &rhs) { - LLDB_RECORD_METHOD( - bool, SBBreakpoint, operator!=,(const lldb::SBBreakpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_wp.lock() != rhs.m_opaque_wp.lock(); } SBTarget SBBreakpoint::GetTarget() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBTarget, SBBreakpoint, GetTarget); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) @@ -92,7 +89,7 @@ SBTarget SBBreakpoint::GetTarget() const { } break_id_t SBBreakpoint::GetID() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::break_id_t, SBBreakpoint, GetID); + LLDB_INSTRUMENT_VA(this); break_id_t break_id = LLDB_INVALID_BREAK_ID; BreakpointSP bkpt_sp = GetSP(); @@ -103,11 +100,11 @@ break_id_t SBBreakpoint::GetID() const { } bool SBBreakpoint::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBBreakpoint::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, operator bool); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (!bkpt_sp) @@ -119,7 +116,7 @@ SBBreakpoint::operator bool() const { } void SBBreakpoint::ClearAllBreakpointSites() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBBreakpoint, ClearAllBreakpointSites); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -130,8 +127,7 @@ void SBBreakpoint::ClearAllBreakpointSites() { } SBBreakpointLocation SBBreakpoint::FindLocationByAddress(addr_t vm_addr) { - LLDB_RECORD_METHOD(lldb::SBBreakpointLocation, SBBreakpoint, - FindLocationByAddress, (lldb::addr_t), vm_addr); + LLDB_INSTRUMENT_VA(this, vm_addr); SBBreakpointLocation sb_bp_location; @@ -152,8 +148,7 @@ SBBreakpointLocation SBBreakpoint::FindLocationByAddress(addr_t vm_addr) { } break_id_t SBBreakpoint::FindLocationIDByAddress(addr_t vm_addr) { - LLDB_RECORD_METHOD(lldb::break_id_t, SBBreakpoint, FindLocationIDByAddress, - (lldb::addr_t), vm_addr); + LLDB_INSTRUMENT_VA(this, vm_addr); break_id_t break_id = LLDB_INVALID_BREAK_ID; BreakpointSP bkpt_sp = GetSP(); @@ -173,8 +168,7 @@ break_id_t SBBreakpoint::FindLocationIDByAddress(addr_t vm_addr) { } SBBreakpointLocation SBBreakpoint::FindLocationByID(break_id_t bp_loc_id) { - LLDB_RECORD_METHOD(lldb::SBBreakpointLocation, SBBreakpoint, FindLocationByID, - (lldb::break_id_t), bp_loc_id); + LLDB_INSTRUMENT_VA(this, bp_loc_id); SBBreakpointLocation sb_bp_location; BreakpointSP bkpt_sp = GetSP(); @@ -189,8 +183,7 @@ SBBreakpointLocation SBBreakpoint::FindLocationByID(break_id_t bp_loc_id) { } SBBreakpointLocation SBBreakpoint::GetLocationAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBBreakpointLocation, SBBreakpoint, - GetLocationAtIndex, (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); SBBreakpointLocation sb_bp_location; BreakpointSP bkpt_sp = GetSP(); @@ -205,7 +198,7 @@ SBBreakpointLocation SBBreakpoint::GetLocationAtIndex(uint32_t index) { } void SBBreakpoint::SetEnabled(bool enable) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetEnabled, (bool), enable); + LLDB_INSTRUMENT_VA(this, enable); BreakpointSP bkpt_sp = GetSP(); @@ -217,7 +210,7 @@ void SBBreakpoint::SetEnabled(bool enable) { } bool SBBreakpoint::IsEnabled() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpoint, IsEnabled); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -229,7 +222,7 @@ bool SBBreakpoint::IsEnabled() { } void SBBreakpoint::SetOneShot(bool one_shot) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetOneShot, (bool), one_shot); + LLDB_INSTRUMENT_VA(this, one_shot); BreakpointSP bkpt_sp = GetSP(); @@ -241,7 +234,7 @@ void SBBreakpoint::SetOneShot(bool one_shot) { } bool SBBreakpoint::IsOneShot() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, IsOneShot); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -253,7 +246,7 @@ bool SBBreakpoint::IsOneShot() const { } bool SBBreakpoint::IsInternal() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpoint, IsInternal); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -265,7 +258,7 @@ bool SBBreakpoint::IsInternal() { } void SBBreakpoint::SetIgnoreCount(uint32_t count) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetIgnoreCount, (uint32_t), count); + LLDB_INSTRUMENT_VA(this, count); BreakpointSP bkpt_sp = GetSP(); @@ -277,8 +270,7 @@ void SBBreakpoint::SetIgnoreCount(uint32_t count) { } void SBBreakpoint::SetCondition(const char *condition) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetCondition, (const char *), - condition); + LLDB_INSTRUMENT_VA(this, condition); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -289,7 +281,7 @@ void SBBreakpoint::SetCondition(const char *condition) { } const char *SBBreakpoint::GetCondition() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBBreakpoint, GetCondition); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -301,8 +293,7 @@ const char *SBBreakpoint::GetCondition() { } void SBBreakpoint::SetAutoContinue(bool auto_continue) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetAutoContinue, (bool), - auto_continue); + LLDB_INSTRUMENT_VA(this, auto_continue); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -313,7 +304,7 @@ void SBBreakpoint::SetAutoContinue(bool auto_continue) { } bool SBBreakpoint::GetAutoContinue() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpoint, GetAutoContinue); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -325,7 +316,7 @@ bool SBBreakpoint::GetAutoContinue() { } uint32_t SBBreakpoint::GetHitCount() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpoint, GetHitCount); + LLDB_INSTRUMENT_VA(this); uint32_t count = 0; BreakpointSP bkpt_sp = GetSP(); @@ -339,7 +330,7 @@ uint32_t SBBreakpoint::GetHitCount() const { } uint32_t SBBreakpoint::GetIgnoreCount() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpoint, GetIgnoreCount); + LLDB_INSTRUMENT_VA(this); uint32_t count = 0; BreakpointSP bkpt_sp = GetSP(); @@ -353,7 +344,7 @@ uint32_t SBBreakpoint::GetIgnoreCount() const { } void SBBreakpoint::SetThreadID(tid_t tid) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetThreadID, (lldb::tid_t), tid); + LLDB_INSTRUMENT_VA(this, tid); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -364,7 +355,7 @@ void SBBreakpoint::SetThreadID(tid_t tid) { } tid_t SBBreakpoint::GetThreadID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::tid_t, SBBreakpoint, GetThreadID); + LLDB_INSTRUMENT_VA(this); tid_t tid = LLDB_INVALID_THREAD_ID; BreakpointSP bkpt_sp = GetSP(); @@ -378,7 +369,7 @@ tid_t SBBreakpoint::GetThreadID() { } void SBBreakpoint::SetThreadIndex(uint32_t index) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetThreadIndex, (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -389,7 +380,7 @@ void SBBreakpoint::SetThreadIndex(uint32_t index) { } uint32_t SBBreakpoint::GetThreadIndex() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpoint, GetThreadIndex); + LLDB_INSTRUMENT_VA(this); uint32_t thread_idx = UINT32_MAX; BreakpointSP bkpt_sp = GetSP(); @@ -406,8 +397,7 @@ uint32_t SBBreakpoint::GetThreadIndex() const { } void SBBreakpoint::SetThreadName(const char *thread_name) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetThreadName, (const char *), - thread_name); + LLDB_INSTRUMENT_VA(this, thread_name); BreakpointSP bkpt_sp = GetSP(); @@ -419,7 +409,7 @@ void SBBreakpoint::SetThreadName(const char *thread_name) { } const char *SBBreakpoint::GetThreadName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpoint, GetThreadName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; BreakpointSP bkpt_sp = GetSP(); @@ -436,8 +426,7 @@ const char *SBBreakpoint::GetThreadName() const { } void SBBreakpoint::SetQueueName(const char *queue_name) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetQueueName, (const char *), - queue_name); + LLDB_INSTRUMENT_VA(this, queue_name); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -448,7 +437,7 @@ void SBBreakpoint::SetQueueName(const char *queue_name) { } const char *SBBreakpoint::GetQueueName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpoint, GetQueueName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; BreakpointSP bkpt_sp = GetSP(); @@ -465,8 +454,7 @@ const char *SBBreakpoint::GetQueueName() const { } size_t SBBreakpoint::GetNumResolvedLocations() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBBreakpoint, - GetNumResolvedLocations); + LLDB_INSTRUMENT_VA(this); size_t num_resolved = 0; BreakpointSP bkpt_sp = GetSP(); @@ -479,7 +467,7 @@ size_t SBBreakpoint::GetNumResolvedLocations() const { } size_t SBBreakpoint::GetNumLocations() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBBreakpoint, GetNumLocations); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); size_t num_locs = 0; @@ -492,8 +480,7 @@ size_t SBBreakpoint::GetNumLocations() const { } void SBBreakpoint::SetCommandLineCommands(SBStringList &commands) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetCommandLineCommands, - (lldb::SBStringList &), commands); + LLDB_INSTRUMENT_VA(this, commands); BreakpointSP bkpt_sp = GetSP(); if (!bkpt_sp) @@ -510,8 +497,7 @@ void SBBreakpoint::SetCommandLineCommands(SBStringList &commands) { } bool SBBreakpoint::GetCommandLineCommands(SBStringList &commands) { - LLDB_RECORD_METHOD(bool, SBBreakpoint, GetCommandLineCommands, - (lldb::SBStringList &), commands); + LLDB_INSTRUMENT_VA(this, commands); BreakpointSP bkpt_sp = GetSP(); if (!bkpt_sp) @@ -525,14 +511,13 @@ bool SBBreakpoint::GetCommandLineCommands(SBStringList &commands) { } bool SBBreakpoint::GetDescription(SBStream &s) { - LLDB_RECORD_METHOD(bool, SBBreakpoint, GetDescription, (lldb::SBStream &), s); + LLDB_INSTRUMENT_VA(this, s); return GetDescription(s, true); } bool SBBreakpoint::GetDescription(SBStream &s, bool include_locations) { - LLDB_RECORD_METHOD(bool, SBBreakpoint, GetDescription, - (lldb::SBStream &, bool), s, include_locations); + LLDB_INSTRUMENT_VA(this, s, include_locations); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) { @@ -552,8 +537,7 @@ bool SBBreakpoint::GetDescription(SBStream &s, bool include_locations) { } SBError SBBreakpoint::AddLocation(SBAddress &address) { - LLDB_RECORD_METHOD(lldb::SBError, SBBreakpoint, AddLocation, - (lldb::SBAddress &), address); + LLDB_INSTRUMENT_VA(this, address); BreakpointSP bkpt_sp = GetSP(); SBError error; @@ -586,8 +570,7 @@ SBError SBBreakpoint::AddLocation(SBAddress &address) { } SBStructuredData SBBreakpoint::SerializeToStructuredData() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStructuredData, SBBreakpoint, - SerializeToStructuredData); + LLDB_INSTRUMENT_VA(this); SBStructuredData data; BreakpointSP bkpt_sp = GetSP(); @@ -601,8 +584,7 @@ SBStructuredData SBBreakpoint::SerializeToStructuredData() { } void SBBreakpoint::SetCallback(SBBreakpointHitCallback callback, void *baton) { - LLDB_RECORD_METHOD(void, SBBreakpoint, SetCallback, - (lldb::SBBreakpointHitCallback, void *), callback, baton); + LLDB_INSTRUMENT_VA(this, callback, baton); BreakpointSP bkpt_sp = GetSP(); @@ -618,8 +600,7 @@ void SBBreakpoint::SetCallback(SBBreakpointHitCallback callback, void *baton) { void SBBreakpoint::SetScriptCallbackFunction( const char *callback_function_name) { -LLDB_RECORD_METHOD(void, SBBreakpoint, SetScriptCallbackFunction, - (const char *), callback_function_name); + LLDB_INSTRUMENT_VA(this, callback_function_name); SBStructuredData empty_args; SetScriptCallbackFunction(callback_function_name, empty_args); } @@ -627,8 +608,7 @@ LLDB_RECORD_METHOD(void, SBBreakpoint, SetScriptCallbackFunction, SBError SBBreakpoint::SetScriptCallbackFunction( const char *callback_function_name, SBStructuredData &extra_args) { - LLDB_RECORD_METHOD(SBError, SBBreakpoint, SetScriptCallbackFunction, - (const char *, SBStructuredData &), callback_function_name, extra_args); + LLDB_INSTRUMENT_VA(this, callback_function_name, extra_args); SBError sb_error; BreakpointSP bkpt_sp = GetSP(); @@ -652,8 +632,7 @@ SBError SBBreakpoint::SetScriptCallbackFunction( } SBError SBBreakpoint::SetScriptCallbackBody(const char *callback_body_text) { - LLDB_RECORD_METHOD(lldb::SBError, SBBreakpoint, SetScriptCallbackBody, - (const char *), callback_body_text); + LLDB_INSTRUMENT_VA(this, callback_body_text); BreakpointSP bkpt_sp = GetSP(); @@ -675,15 +654,14 @@ SBError SBBreakpoint::SetScriptCallbackBody(const char *callback_body_text) { } bool SBBreakpoint::AddName(const char *new_name) { - LLDB_RECORD_METHOD(bool, SBBreakpoint, AddName, (const char *), new_name); + LLDB_INSTRUMENT_VA(this, new_name); SBError status = AddNameWithErrorHandling(new_name); return status.Success(); } SBError SBBreakpoint::AddNameWithErrorHandling(const char *new_name) { - LLDB_RECORD_METHOD(SBError, SBBreakpoint, AddNameWithErrorHandling, - (const char *), new_name); + LLDB_INSTRUMENT_VA(this, new_name); BreakpointSP bkpt_sp = GetSP(); @@ -702,8 +680,7 @@ SBError SBBreakpoint::AddNameWithErrorHandling(const char *new_name) { } void SBBreakpoint::RemoveName(const char *name_to_remove) { - LLDB_RECORD_METHOD(void, SBBreakpoint, RemoveName, (const char *), - name_to_remove); + LLDB_INSTRUMENT_VA(this, name_to_remove); BreakpointSP bkpt_sp = GetSP(); @@ -716,7 +693,7 @@ void SBBreakpoint::RemoveName(const char *name_to_remove) { } bool SBBreakpoint::MatchesName(const char *name) { - LLDB_RECORD_METHOD(bool, SBBreakpoint, MatchesName, (const char *), name); + LLDB_INSTRUMENT_VA(this, name); BreakpointSP bkpt_sp = GetSP(); @@ -730,8 +707,7 @@ bool SBBreakpoint::MatchesName(const char *name) { } void SBBreakpoint::GetNames(SBStringList &names) { - LLDB_RECORD_METHOD(void, SBBreakpoint, GetNames, (lldb::SBStringList &), - names); + LLDB_INSTRUMENT_VA(this, names); BreakpointSP bkpt_sp = GetSP(); @@ -747,8 +723,7 @@ void SBBreakpoint::GetNames(SBStringList &names) { } bool SBBreakpoint::EventIsBreakpointEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBBreakpoint, EventIsBreakpointEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Breakpoint::BreakpointEventData::GetEventDataFromEvent(event.get()) != nullptr; @@ -756,9 +731,7 @@ bool SBBreakpoint::EventIsBreakpointEvent(const lldb::SBEvent &event) { BreakpointEventType SBBreakpoint::GetBreakpointEventTypeFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::BreakpointEventType, SBBreakpoint, - GetBreakpointEventTypeFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); if (event.IsValid()) return Breakpoint::BreakpointEventData::GetBreakpointEventTypeFromEvent( @@ -767,9 +740,7 @@ SBBreakpoint::GetBreakpointEventTypeFromEvent(const SBEvent &event) { } SBBreakpoint SBBreakpoint::GetBreakpointFromEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBBreakpoint, SBBreakpoint, - GetBreakpointFromEvent, (const lldb::SBEvent &), - event); + LLDB_INSTRUMENT_VA(event); if (event.IsValid()) return SBBreakpoint( @@ -780,9 +751,7 @@ SBBreakpoint SBBreakpoint::GetBreakpointFromEvent(const lldb::SBEvent &event) { SBBreakpointLocation SBBreakpoint::GetBreakpointLocationAtIndexFromEvent(const lldb::SBEvent &event, uint32_t loc_idx) { - LLDB_RECORD_STATIC_METHOD(lldb::SBBreakpointLocation, SBBreakpoint, - GetBreakpointLocationAtIndexFromEvent, - (const lldb::SBEvent &, uint32_t), event, loc_idx); + LLDB_INSTRUMENT_VA(event, loc_idx); SBBreakpointLocation sb_breakpoint_loc; if (event.IsValid()) @@ -794,9 +763,7 @@ SBBreakpoint::GetBreakpointLocationAtIndexFromEvent(const lldb::SBEvent &event, uint32_t SBBreakpoint::GetNumBreakpointLocationsFromEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(uint32_t, SBBreakpoint, - GetNumBreakpointLocationsFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); uint32_t num_locations = 0; if (event.IsValid()) @@ -807,7 +774,7 @@ SBBreakpoint::GetNumBreakpointLocationsFromEvent(const lldb::SBEvent &event) { } bool SBBreakpoint::IsHardware() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, IsHardware); + LLDB_INSTRUMENT_VA(this); BreakpointSP bkpt_sp = GetSP(); if (bkpt_sp) @@ -903,13 +870,13 @@ class SBBreakpointListImpl { SBBreakpointList::SBBreakpointList(SBTarget &target) : m_opaque_sp(new SBBreakpointListImpl(target.GetSP())) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpointList, (lldb::SBTarget &), target); + LLDB_INSTRUMENT_VA(this, target); } SBBreakpointList::~SBBreakpointList() = default; size_t SBBreakpointList::GetSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBBreakpointList, GetSize); + LLDB_INSTRUMENT_VA(this); if (!m_opaque_sp) return 0; @@ -918,8 +885,7 @@ size_t SBBreakpointList::GetSize() const { } SBBreakpoint SBBreakpointList::GetBreakpointAtIndex(size_t idx) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBBreakpointList, GetBreakpointAtIndex, - (size_t), idx); + LLDB_INSTRUMENT_VA(this, idx); if (!m_opaque_sp) return SBBreakpoint(); @@ -929,8 +895,7 @@ SBBreakpoint SBBreakpointList::GetBreakpointAtIndex(size_t idx) { } SBBreakpoint SBBreakpointList::FindBreakpointByID(lldb::break_id_t id) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBBreakpointList, FindBreakpointByID, - (lldb::break_id_t), id); + LLDB_INSTRUMENT_VA(this, id); if (!m_opaque_sp) return SBBreakpoint(); @@ -939,8 +904,7 @@ SBBreakpoint SBBreakpointList::FindBreakpointByID(lldb::break_id_t id) { } void SBBreakpointList::Append(const SBBreakpoint &sb_bkpt) { - LLDB_RECORD_METHOD(void, SBBreakpointList, Append, - (const lldb::SBBreakpoint &), sb_bkpt); + LLDB_INSTRUMENT_VA(this, sb_bkpt); if (!sb_bkpt.IsValid()) return; @@ -950,8 +914,7 @@ void SBBreakpointList::Append(const SBBreakpoint &sb_bkpt) { } void SBBreakpointList::AppendByID(lldb::break_id_t id) { - LLDB_RECORD_METHOD(void, SBBreakpointList, AppendByID, (lldb::break_id_t), - id); + LLDB_INSTRUMENT_VA(this, id); if (!m_opaque_sp) return; @@ -959,8 +922,7 @@ void SBBreakpointList::AppendByID(lldb::break_id_t id) { } bool SBBreakpointList::AppendIfUnique(const SBBreakpoint &sb_bkpt) { - LLDB_RECORD_METHOD(bool, SBBreakpointList, AppendIfUnique, - (const lldb::SBBreakpoint &), sb_bkpt); + LLDB_INSTRUMENT_VA(this, sb_bkpt); if (!sb_bkpt.IsValid()) return false; @@ -970,7 +932,7 @@ bool SBBreakpointList::AppendIfUnique(const SBBreakpoint &sb_bkpt) { } void SBBreakpointList::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBBreakpointList, Clear); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) m_opaque_sp->Clear(); diff --git a/lldb/source/API/SBBreakpointLocation.cpp b/lldb/source/API/SBBreakpointLocation.cpp index c2e0001bd6c5a..9143174377236 100644 --- a/lldb/source/API/SBBreakpointLocation.cpp +++ b/lldb/source/API/SBBreakpointLocation.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBBreakpointLocation.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBStream.h" -#include "lldb/API/SBStructuredData.h" #include "lldb/API/SBStringList.h" +#include "lldb/API/SBStructuredData.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Breakpoint/Breakpoint.h" #include "lldb/Breakpoint/BreakpointLocation.h" @@ -31,29 +31,22 @@ using namespace lldb; using namespace lldb_private; -SBBreakpointLocation::SBBreakpointLocation() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBreakpointLocation); -} +SBBreakpointLocation::SBBreakpointLocation() { LLDB_INSTRUMENT_VA(this); } SBBreakpointLocation::SBBreakpointLocation( const lldb::BreakpointLocationSP &break_loc_sp) : m_opaque_wp(break_loc_sp) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpointLocation, - (const lldb::BreakpointLocationSP &), break_loc_sp); + LLDB_INSTRUMENT_VA(this, break_loc_sp); } SBBreakpointLocation::SBBreakpointLocation(const SBBreakpointLocation &rhs) : m_opaque_wp(rhs.m_opaque_wp) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpointLocation, - (const lldb::SBBreakpointLocation &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBBreakpointLocation &SBBreakpointLocation:: operator=(const SBBreakpointLocation &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBBreakpointLocation &, - SBBreakpointLocation, operator=,(const lldb::SBBreakpointLocation &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_wp = rhs.m_opaque_wp; return *this; @@ -66,17 +59,17 @@ BreakpointLocationSP SBBreakpointLocation::GetSP() const { } bool SBBreakpointLocation::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointLocation, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBBreakpointLocation::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointLocation, operator bool); + LLDB_INSTRUMENT_VA(this); return bool(GetSP()); } SBAddress SBBreakpointLocation::GetAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBBreakpointLocation, GetAddress); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -87,8 +80,7 @@ SBAddress SBBreakpointLocation::GetAddress() { } addr_t SBBreakpointLocation::GetLoadAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBBreakpointLocation, - GetLoadAddress); + LLDB_INSTRUMENT_VA(this); addr_t ret_addr = LLDB_INVALID_ADDRESS; BreakpointLocationSP loc_sp = GetSP(); @@ -103,7 +95,7 @@ addr_t SBBreakpointLocation::GetLoadAddress() { } void SBBreakpointLocation::SetEnabled(bool enabled) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetEnabled, (bool), enabled); + LLDB_INSTRUMENT_VA(this, enabled); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -114,7 +106,7 @@ void SBBreakpointLocation::SetEnabled(bool enabled) { } bool SBBreakpointLocation::IsEnabled() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointLocation, IsEnabled); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -126,7 +118,7 @@ bool SBBreakpointLocation::IsEnabled() { } uint32_t SBBreakpointLocation::GetHitCount() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBBreakpointLocation, GetHitCount); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -138,7 +130,7 @@ uint32_t SBBreakpointLocation::GetHitCount() { } uint32_t SBBreakpointLocation::GetIgnoreCount() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBBreakpointLocation, GetIgnoreCount); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -150,7 +142,7 @@ uint32_t SBBreakpointLocation::GetIgnoreCount() { } void SBBreakpointLocation::SetIgnoreCount(uint32_t n) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetIgnoreCount, (uint32_t), n); + LLDB_INSTRUMENT_VA(this, n); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -161,8 +153,7 @@ void SBBreakpointLocation::SetIgnoreCount(uint32_t n) { } void SBBreakpointLocation::SetCondition(const char *condition) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetCondition, (const char *), - condition); + LLDB_INSTRUMENT_VA(this, condition); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -173,7 +164,7 @@ void SBBreakpointLocation::SetCondition(const char *condition) { } const char *SBBreakpointLocation::GetCondition() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBBreakpointLocation, GetCondition); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -185,8 +176,7 @@ const char *SBBreakpointLocation::GetCondition() { } void SBBreakpointLocation::SetAutoContinue(bool auto_continue) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetAutoContinue, (bool), - auto_continue); + LLDB_INSTRUMENT_VA(this, auto_continue); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -197,7 +187,7 @@ void SBBreakpointLocation::SetAutoContinue(bool auto_continue) { } bool SBBreakpointLocation::GetAutoContinue() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointLocation, GetAutoContinue); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -210,16 +200,13 @@ bool SBBreakpointLocation::GetAutoContinue() { void SBBreakpointLocation::SetScriptCallbackFunction( const char *callback_function_name) { -LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetScriptCallbackFunction, - (const char *), callback_function_name); + LLDB_INSTRUMENT_VA(this, callback_function_name); } SBError SBBreakpointLocation::SetScriptCallbackFunction( const char *callback_function_name, SBStructuredData &extra_args) { - LLDB_RECORD_METHOD(SBError, SBBreakpointLocation, SetScriptCallbackFunction, - (const char *, SBStructuredData &), callback_function_name, - extra_args); + LLDB_INSTRUMENT_VA(this, callback_function_name, extra_args); SBError sb_error; BreakpointLocationSP loc_sp = GetSP(); @@ -245,8 +232,7 @@ SBError SBBreakpointLocation::SetScriptCallbackFunction( SBError SBBreakpointLocation::SetScriptCallbackBody(const char *callback_body_text) { - LLDB_RECORD_METHOD(lldb::SBError, SBBreakpointLocation, SetScriptCallbackBody, - (const char *), callback_body_text); + LLDB_INSTRUMENT_VA(this, callback_body_text); BreakpointLocationSP loc_sp = GetSP(); @@ -269,8 +255,7 @@ SBBreakpointLocation::SetScriptCallbackBody(const char *callback_body_text) { } void SBBreakpointLocation::SetCommandLineCommands(SBStringList &commands) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetCommandLineCommands, - (lldb::SBStringList &), commands); + LLDB_INSTRUMENT_VA(this, commands); BreakpointLocationSP loc_sp = GetSP(); if (!loc_sp) @@ -287,8 +272,7 @@ void SBBreakpointLocation::SetCommandLineCommands(SBStringList &commands) { } bool SBBreakpointLocation::GetCommandLineCommands(SBStringList &commands) { - LLDB_RECORD_METHOD(bool, SBBreakpointLocation, GetCommandLineCommands, - (lldb::SBStringList &), commands); + LLDB_INSTRUMENT_VA(this, commands); BreakpointLocationSP loc_sp = GetSP(); if (!loc_sp) @@ -302,8 +286,7 @@ bool SBBreakpointLocation::GetCommandLineCommands(SBStringList &commands) { } void SBBreakpointLocation::SetThreadID(tid_t thread_id) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetThreadID, (lldb::tid_t), - thread_id); + LLDB_INSTRUMENT_VA(this, thread_id); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -314,7 +297,7 @@ void SBBreakpointLocation::SetThreadID(tid_t thread_id) { } tid_t SBBreakpointLocation::GetThreadID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::tid_t, SBBreakpointLocation, GetThreadID); + LLDB_INSTRUMENT_VA(this); tid_t tid = LLDB_INVALID_THREAD_ID; BreakpointLocationSP loc_sp = GetSP(); @@ -327,8 +310,7 @@ tid_t SBBreakpointLocation::GetThreadID() { } void SBBreakpointLocation::SetThreadIndex(uint32_t index) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetThreadIndex, (uint32_t), - index); + LLDB_INSTRUMENT_VA(this, index); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -339,8 +321,7 @@ void SBBreakpointLocation::SetThreadIndex(uint32_t index) { } uint32_t SBBreakpointLocation::GetThreadIndex() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpointLocation, - GetThreadIndex); + LLDB_INSTRUMENT_VA(this); uint32_t thread_idx = UINT32_MAX; BreakpointLocationSP loc_sp = GetSP(); @@ -353,8 +334,7 @@ uint32_t SBBreakpointLocation::GetThreadIndex() const { } void SBBreakpointLocation::SetThreadName(const char *thread_name) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetThreadName, (const char *), - thread_name); + LLDB_INSTRUMENT_VA(this, thread_name); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -365,8 +345,7 @@ void SBBreakpointLocation::SetThreadName(const char *thread_name) { } const char *SBBreakpointLocation::GetThreadName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointLocation, - GetThreadName); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -378,8 +357,7 @@ const char *SBBreakpointLocation::GetThreadName() const { } void SBBreakpointLocation::SetQueueName(const char *queue_name) { - LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetQueueName, (const char *), - queue_name); + LLDB_INSTRUMENT_VA(this, queue_name); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -390,8 +368,7 @@ void SBBreakpointLocation::SetQueueName(const char *queue_name) { } const char *SBBreakpointLocation::GetQueueName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointLocation, - GetQueueName); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -403,7 +380,7 @@ const char *SBBreakpointLocation::GetQueueName() const { } bool SBBreakpointLocation::IsResolved() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointLocation, IsResolved); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -422,9 +399,7 @@ void SBBreakpointLocation::SetLocation( bool SBBreakpointLocation::GetDescription(SBStream &description, DescriptionLevel level) { - LLDB_RECORD_METHOD(bool, SBBreakpointLocation, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - level); + LLDB_INSTRUMENT_VA(this, description, level); Stream &strm = description.ref(); BreakpointLocationSP loc_sp = GetSP(); @@ -441,7 +416,7 @@ bool SBBreakpointLocation::GetDescription(SBStream &description, } break_id_t SBBreakpointLocation::GetID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::break_id_t, SBBreakpointLocation, GetID); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); if (loc_sp) { @@ -453,8 +428,7 @@ break_id_t SBBreakpointLocation::GetID() { } SBBreakpoint SBBreakpointLocation::GetBreakpoint() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBreakpoint, SBBreakpointLocation, - GetBreakpoint); + LLDB_INSTRUMENT_VA(this); BreakpointLocationSP loc_sp = GetSP(); diff --git a/lldb/source/API/SBBreakpointName.cpp b/lldb/source/API/SBBreakpointName.cpp index 2937cb9e03d8f..796229d04ce49 100644 --- a/lldb/source/API/SBBreakpointName.cpp +++ b/lldb/source/API/SBBreakpointName.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBBreakpointName.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBError.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBStringList.h" #include "lldb/API/SBStructuredData.h" #include "lldb/API/SBTarget.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Breakpoint/BreakpointName.h" #include "lldb/Breakpoint/StoppointCallbackContext.h" @@ -107,13 +107,10 @@ lldb_private::BreakpointName *SBBreakpointNameImpl::GetBreakpointName() const { } // namespace lldb -SBBreakpointName::SBBreakpointName() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBreakpointName); -} +SBBreakpointName::SBBreakpointName() { LLDB_INSTRUMENT_VA(this); } SBBreakpointName::SBBreakpointName(SBTarget &sb_target, const char *name) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpointName, (lldb::SBTarget &, const char *), - sb_target, name); + LLDB_INSTRUMENT_VA(this, sb_target, name); m_impl_up = std::make_unique(sb_target, name); // Call FindBreakpointName here to make sure the name is valid, reset if not: @@ -123,8 +120,7 @@ SBBreakpointName::SBBreakpointName(SBTarget &sb_target, const char *name) { } SBBreakpointName::SBBreakpointName(SBBreakpoint &sb_bkpt, const char *name) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpointName, - (lldb::SBBreakpoint &, const char *), sb_bkpt, name); + LLDB_INSTRUMENT_VA(this, sb_bkpt, name); if (!sb_bkpt.IsValid()) { m_impl_up.reset(); @@ -149,8 +145,7 @@ SBBreakpointName::SBBreakpointName(SBBreakpoint &sb_bkpt, const char *name) { } SBBreakpointName::SBBreakpointName(const SBBreakpointName &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBBreakpointName, (const lldb::SBBreakpointName &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!rhs.m_impl_up) return; @@ -163,9 +158,7 @@ SBBreakpointName::~SBBreakpointName() = default; const SBBreakpointName &SBBreakpointName:: operator=(const SBBreakpointName &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBBreakpointName &, - SBBreakpointName, operator=,(const lldb::SBBreakpointName &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!rhs.m_impl_up) { m_impl_up.reset(); @@ -178,25 +171,23 @@ operator=(const SBBreakpointName &rhs) { } bool SBBreakpointName::operator==(const lldb::SBBreakpointName &rhs) { - LLDB_RECORD_METHOD( - bool, SBBreakpointName, operator==,(const lldb::SBBreakpointName &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return *m_impl_up == *rhs.m_impl_up; } bool SBBreakpointName::operator!=(const lldb::SBBreakpointName &rhs) { - LLDB_RECORD_METHOD( - bool, SBBreakpointName, operator!=,(const lldb::SBBreakpointName &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return *m_impl_up != *rhs.m_impl_up; } bool SBBreakpointName::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBBreakpointName::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, operator bool); + LLDB_INSTRUMENT_VA(this); if (!m_impl_up) return false; @@ -204,7 +195,7 @@ SBBreakpointName::operator bool() const { } const char *SBBreakpointName::GetName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName, GetName); + LLDB_INSTRUMENT_VA(this); if (!m_impl_up) return ""; @@ -212,7 +203,7 @@ const char *SBBreakpointName::GetName() const { } void SBBreakpointName::SetEnabled(bool enable) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetEnabled, (bool), enable); + LLDB_INSTRUMENT_VA(this, enable); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -236,7 +227,7 @@ void SBBreakpointName::UpdateName(BreakpointName &bp_name) { } bool SBBreakpointName::IsEnabled() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, IsEnabled); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -249,7 +240,7 @@ bool SBBreakpointName::IsEnabled() { } void SBBreakpointName::SetOneShot(bool one_shot) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetOneShot, (bool), one_shot); + LLDB_INSTRUMENT_VA(this, one_shot); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -263,7 +254,7 @@ void SBBreakpointName::SetOneShot(bool one_shot) { } bool SBBreakpointName::IsOneShot() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, IsOneShot); + LLDB_INSTRUMENT_VA(this); const BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -276,7 +267,7 @@ bool SBBreakpointName::IsOneShot() const { } void SBBreakpointName::SetIgnoreCount(uint32_t count) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetIgnoreCount, (uint32_t), count); + LLDB_INSTRUMENT_VA(this, count); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -290,7 +281,7 @@ void SBBreakpointName::SetIgnoreCount(uint32_t count) { } uint32_t SBBreakpointName::GetIgnoreCount() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpointName, GetIgnoreCount); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -303,8 +294,7 @@ uint32_t SBBreakpointName::GetIgnoreCount() const { } void SBBreakpointName::SetCondition(const char *condition) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetCondition, (const char *), - condition); + LLDB_INSTRUMENT_VA(this, condition); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -318,7 +308,7 @@ void SBBreakpointName::SetCondition(const char *condition) { } const char *SBBreakpointName::GetCondition() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBBreakpointName, GetCondition); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -331,8 +321,7 @@ const char *SBBreakpointName::GetCondition() { } void SBBreakpointName::SetAutoContinue(bool auto_continue) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetAutoContinue, (bool), - auto_continue); + LLDB_INSTRUMENT_VA(this, auto_continue); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -346,7 +335,7 @@ void SBBreakpointName::SetAutoContinue(bool auto_continue) { } bool SBBreakpointName::GetAutoContinue() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, GetAutoContinue); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -359,7 +348,7 @@ bool SBBreakpointName::GetAutoContinue() { } void SBBreakpointName::SetThreadID(tid_t tid) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetThreadID, (lldb::tid_t), tid); + LLDB_INSTRUMENT_VA(this, tid); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -373,7 +362,7 @@ void SBBreakpointName::SetThreadID(tid_t tid) { } tid_t SBBreakpointName::GetThreadID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::tid_t, SBBreakpointName, GetThreadID); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -386,7 +375,7 @@ tid_t SBBreakpointName::GetThreadID() { } void SBBreakpointName::SetThreadIndex(uint32_t index) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetThreadIndex, (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -400,7 +389,7 @@ void SBBreakpointName::SetThreadIndex(uint32_t index) { } uint32_t SBBreakpointName::GetThreadIndex() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpointName, GetThreadIndex); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -413,8 +402,7 @@ uint32_t SBBreakpointName::GetThreadIndex() const { } void SBBreakpointName::SetThreadName(const char *thread_name) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetThreadName, (const char *), - thread_name); + LLDB_INSTRUMENT_VA(this, thread_name); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -428,8 +416,7 @@ void SBBreakpointName::SetThreadName(const char *thread_name) { } const char *SBBreakpointName::GetThreadName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName, - GetThreadName); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -442,8 +429,7 @@ const char *SBBreakpointName::GetThreadName() const { } void SBBreakpointName::SetQueueName(const char *queue_name) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetQueueName, (const char *), - queue_name); + LLDB_INSTRUMENT_VA(this, queue_name); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -457,8 +443,7 @@ void SBBreakpointName::SetQueueName(const char *queue_name) { } const char *SBBreakpointName::GetQueueName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName, - GetQueueName); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -471,8 +456,7 @@ const char *SBBreakpointName::GetQueueName() const { } void SBBreakpointName::SetCommandLineCommands(SBStringList &commands) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetCommandLineCommands, - (lldb::SBStringList &), commands); + LLDB_INSTRUMENT_VA(this, commands); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -491,8 +475,7 @@ void SBBreakpointName::SetCommandLineCommands(SBStringList &commands) { } bool SBBreakpointName::GetCommandLineCommands(SBStringList &commands) { - LLDB_RECORD_METHOD(bool, SBBreakpointName, GetCommandLineCommands, - (lldb::SBStringList &), commands); + LLDB_INSTRUMENT_VA(this, commands); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -507,8 +490,7 @@ bool SBBreakpointName::GetCommandLineCommands(SBStringList &commands) { } const char *SBBreakpointName::GetHelpString() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName, - GetHelpString); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -518,8 +500,7 @@ const char *SBBreakpointName::GetHelpString() const { } void SBBreakpointName::SetHelpString(const char *help_string) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetHelpString, (const char *), - help_string); + LLDB_INSTRUMENT_VA(this, help_string); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -532,8 +513,7 @@ void SBBreakpointName::SetHelpString(const char *help_string) { } bool SBBreakpointName::GetDescription(SBStream &s) { - LLDB_RECORD_METHOD(bool, SBBreakpointName, GetDescription, (lldb::SBStream &), - s); + LLDB_INSTRUMENT_VA(this, s); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -550,8 +530,7 @@ bool SBBreakpointName::GetDescription(SBStream &s) { void SBBreakpointName::SetCallback(SBBreakpointHitCallback callback, void *baton) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetCallback, - (lldb::SBBreakpointHitCallback, void *), callback, baton); + LLDB_INSTRUMENT_VA(this, callback, baton); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -569,8 +548,7 @@ void SBBreakpointName::SetCallback(SBBreakpointHitCallback callback, void SBBreakpointName::SetScriptCallbackFunction( const char *callback_function_name) { -LLDB_RECORD_METHOD(void, SBBreakpointName, SetScriptCallbackFunction, - (const char *), callback_function_name); + LLDB_INSTRUMENT_VA(this, callback_function_name); SBStructuredData empty_args; SetScriptCallbackFunction(callback_function_name, empty_args); } @@ -578,9 +556,7 @@ LLDB_RECORD_METHOD(void, SBBreakpointName, SetScriptCallbackFunction, SBError SBBreakpointName::SetScriptCallbackFunction( const char *callback_function_name, SBStructuredData &extra_args) { - LLDB_RECORD_METHOD(SBError, SBBreakpointName, SetScriptCallbackFunction, - (const char *, SBStructuredData &), - callback_function_name, extra_args); + LLDB_INSTRUMENT_VA(this, callback_function_name, extra_args); SBError sb_error; BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) { @@ -606,8 +582,7 @@ SBError SBBreakpointName::SetScriptCallbackFunction( SBError SBBreakpointName::SetScriptCallbackBody(const char *callback_body_text) { - LLDB_RECORD_METHOD(lldb::SBError, SBBreakpointName, SetScriptCallbackBody, - (const char *), callback_body_text); + LLDB_INSTRUMENT_VA(this, callback_body_text); SBError sb_error; BreakpointName *bp_name = GetBreakpointName(); @@ -631,7 +606,7 @@ SBBreakpointName::SetScriptCallbackBody(const char *callback_body_text) { } bool SBBreakpointName::GetAllowList() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, GetAllowList); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -640,8 +615,7 @@ bool SBBreakpointName::GetAllowList() const { } void SBBreakpointName::SetAllowList(bool value) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetAllowList, (bool), value); - + LLDB_INSTRUMENT_VA(this, value); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -650,7 +624,7 @@ void SBBreakpointName::SetAllowList(bool value) { } bool SBBreakpointName::GetAllowDelete() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, GetAllowDelete); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -659,8 +633,7 @@ bool SBBreakpointName::GetAllowDelete() { } void SBBreakpointName::SetAllowDelete(bool value) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetAllowDelete, (bool), value); - + LLDB_INSTRUMENT_VA(this, value); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -669,7 +642,7 @@ void SBBreakpointName::SetAllowDelete(bool value) { } bool SBBreakpointName::GetAllowDisable() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, GetAllowDisable); + LLDB_INSTRUMENT_VA(this); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) @@ -678,7 +651,7 @@ bool SBBreakpointName::GetAllowDisable() { } void SBBreakpointName::SetAllowDisable(bool value) { - LLDB_RECORD_METHOD(void, SBBreakpointName, SetAllowDisable, (bool), value); + LLDB_INSTRUMENT_VA(this, value); BreakpointName *bp_name = GetBreakpointName(); if (!bp_name) diff --git a/lldb/source/API/SBBreakpointOptionCommon.cpp b/lldb/source/API/SBBreakpointOptionCommon.cpp index 2ee47ff7795c6..685ed172c820a 100644 --- a/lldb/source/API/SBBreakpointOptionCommon.cpp +++ b/lldb/source/API/SBBreakpointOptionCommon.cpp @@ -26,6 +26,7 @@ #include "lldb/Target/Target.h" #include "lldb/Target/Thread.h" #include "lldb/Target/ThreadSpec.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Stream.h" @@ -38,19 +39,18 @@ using namespace lldb; using namespace lldb_private; -SBBreakpointCallbackBaton::SBBreakpointCallbackBaton(SBBreakpointHitCallback - callback, - void *baton) - : TypedBaton(std::make_unique()) { - getItem()->callback = callback; - getItem()->callback_baton = baton; - } +SBBreakpointCallbackBaton::SBBreakpointCallbackBaton( + SBBreakpointHitCallback callback, void *baton) + : TypedBaton(std::make_unique()) { + LLDB_INSTRUMENT_VA(this, callback, baton); + getItem()->callback = callback; + getItem()->callback_baton = baton; +} - bool SBBreakpointCallbackBaton::PrivateBreakpointHitCallback(void *baton, - StoppointCallbackContext *ctx, - lldb::user_id_t break_id, - lldb::user_id_t break_loc_id) -{ +bool SBBreakpointCallbackBaton::PrivateBreakpointHitCallback( + void *baton, StoppointCallbackContext *ctx, lldb::user_id_t break_id, + lldb::user_id_t break_loc_id) { + LLDB_INSTRUMENT_VA(baton, ctx, break_id, break_loc_id); ExecutionContext exe_ctx(ctx->exe_ctx_ref); BreakpointSP bp_sp( exe_ctx.GetTargetRef().GetBreakpointList().FindBreakpointByID(break_id)); diff --git a/lldb/source/API/SBBroadcaster.cpp b/lldb/source/API/SBBroadcaster.cpp index 4fa11cab5d9ff..f145bc6e99d8e 100644 --- a/lldb/source/API/SBBroadcaster.cpp +++ b/lldb/source/API/SBBroadcaster.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/Utility/Broadcaster.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBBroadcaster.h" #include "lldb/API/SBEvent.h" @@ -16,13 +16,11 @@ using namespace lldb; using namespace lldb_private; -SBBroadcaster::SBBroadcaster() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBroadcaster); -} +SBBroadcaster::SBBroadcaster() { LLDB_INSTRUMENT_VA(this); } SBBroadcaster::SBBroadcaster(const char *name) : m_opaque_sp(new Broadcaster(nullptr, name)), m_opaque_ptr(nullptr) { - LLDB_RECORD_CONSTRUCTOR(SBBroadcaster, (const char *), name); + LLDB_INSTRUMENT_VA(this, name); m_opaque_ptr = m_opaque_sp.get(); } @@ -32,13 +30,11 @@ SBBroadcaster::SBBroadcaster(lldb_private::Broadcaster *broadcaster, bool owns) SBBroadcaster::SBBroadcaster(const SBBroadcaster &rhs) : m_opaque_sp(rhs.m_opaque_sp), m_opaque_ptr(rhs.m_opaque_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBBroadcaster, (const lldb::SBBroadcaster &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBBroadcaster &SBBroadcaster::operator=(const SBBroadcaster &rhs) { - LLDB_RECORD_METHOD(const lldb::SBBroadcaster &, - SBBroadcaster, operator=,(const lldb::SBBroadcaster &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -50,8 +46,7 @@ const SBBroadcaster &SBBroadcaster::operator=(const SBBroadcaster &rhs) { SBBroadcaster::~SBBroadcaster() { reset(nullptr, false); } void SBBroadcaster::BroadcastEventByType(uint32_t event_type, bool unique) { - LLDB_RECORD_METHOD(void, SBBroadcaster, BroadcastEventByType, - (uint32_t, bool), event_type, unique); + LLDB_INSTRUMENT_VA(this, event_type, unique); if (m_opaque_ptr == nullptr) return; @@ -63,8 +58,7 @@ void SBBroadcaster::BroadcastEventByType(uint32_t event_type, bool unique) { } void SBBroadcaster::BroadcastEvent(const SBEvent &event, bool unique) { - LLDB_RECORD_METHOD(void, SBBroadcaster, BroadcastEvent, - (const lldb::SBEvent &, bool), event, unique); + LLDB_INSTRUMENT_VA(this, event, unique); if (m_opaque_ptr == nullptr) return; @@ -78,9 +72,7 @@ void SBBroadcaster::BroadcastEvent(const SBEvent &event, bool unique) { void SBBroadcaster::AddInitialEventsToListener(const SBListener &listener, uint32_t requested_events) { - LLDB_RECORD_METHOD(void, SBBroadcaster, AddInitialEventsToListener, - (const lldb::SBListener &, uint32_t), listener, - requested_events); + LLDB_INSTRUMENT_VA(this, listener, requested_events); if (m_opaque_ptr) m_opaque_ptr->AddInitialEventsToListener(listener.m_opaque_sp, @@ -89,9 +81,7 @@ void SBBroadcaster::AddInitialEventsToListener(const SBListener &listener, uint32_t SBBroadcaster::AddListener(const SBListener &listener, uint32_t event_mask) { - LLDB_RECORD_METHOD(uint32_t, SBBroadcaster, AddListener, - (const lldb::SBListener &, uint32_t), listener, - event_mask); + LLDB_INSTRUMENT_VA(this, listener, event_mask); if (m_opaque_ptr) return m_opaque_ptr->AddListener(listener.m_opaque_sp, event_mask); @@ -99,7 +89,7 @@ uint32_t SBBroadcaster::AddListener(const SBListener &listener, } const char *SBBroadcaster::GetName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBroadcaster, GetName); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetBroadcasterName().GetCString(); @@ -107,8 +97,7 @@ const char *SBBroadcaster::GetName() const { } bool SBBroadcaster::EventTypeHasListeners(uint32_t event_type) { - LLDB_RECORD_METHOD(bool, SBBroadcaster, EventTypeHasListeners, (uint32_t), - event_type); + LLDB_INSTRUMENT_VA(this, event_type); if (m_opaque_ptr) return m_opaque_ptr->EventTypeHasListeners(event_type); @@ -117,9 +106,7 @@ bool SBBroadcaster::EventTypeHasListeners(uint32_t event_type) { bool SBBroadcaster::RemoveListener(const SBListener &listener, uint32_t event_mask) { - LLDB_RECORD_METHOD(bool, SBBroadcaster, RemoveListener, - (const lldb::SBListener &, uint32_t), listener, - event_mask); + LLDB_INSTRUMENT_VA(this, listener, event_mask); if (m_opaque_ptr) return m_opaque_ptr->RemoveListener(listener.m_opaque_sp, event_mask); @@ -137,39 +124,36 @@ void SBBroadcaster::reset(Broadcaster *broadcaster, bool owns) { } bool SBBroadcaster::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBroadcaster, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBBroadcaster::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBroadcaster, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr != nullptr; } void SBBroadcaster::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBBroadcaster, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp.reset(); m_opaque_ptr = nullptr; } bool SBBroadcaster::operator==(const SBBroadcaster &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBBroadcaster, operator==,(const lldb::SBBroadcaster &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr == rhs.m_opaque_ptr; } bool SBBroadcaster::operator!=(const SBBroadcaster &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBBroadcaster, operator!=,(const lldb::SBBroadcaster &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr != rhs.m_opaque_ptr; } bool SBBroadcaster::operator<(const SBBroadcaster &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBBroadcaster, operator<,(const lldb::SBBroadcaster &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr < rhs.m_opaque_ptr; } diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp index d543246edcb44..073c1a1b042c2 100644 --- a/lldb/source/API/SBCommandInterpreter.cpp +++ b/lldb/source/API/SBCommandInterpreter.cpp @@ -8,11 +8,11 @@ #include "lldb/lldb-types.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/Interpreter/CommandInterpreter.h" #include "lldb/Interpreter/CommandObjectMultiword.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Listener.h" #include "lldb/API/SBBroadcaster.h" @@ -78,71 +78,62 @@ class CommandPluginInterfaceImplementation : public CommandObjectParsed { SBCommandInterpreter::SBCommandInterpreter(CommandInterpreter *interpreter) : m_opaque_ptr(interpreter) { - LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreter, - (lldb_private::CommandInterpreter *), interpreter); - + LLDB_INSTRUMENT_VA(this, interpreter); } SBCommandInterpreter::SBCommandInterpreter(const SBCommandInterpreter &rhs) : m_opaque_ptr(rhs.m_opaque_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreter, - (const lldb::SBCommandInterpreter &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBCommandInterpreter::~SBCommandInterpreter() = default; const SBCommandInterpreter &SBCommandInterpreter:: operator=(const SBCommandInterpreter &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBCommandInterpreter &, - SBCommandInterpreter, operator=,(const lldb::SBCommandInterpreter &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_ptr = rhs.m_opaque_ptr; return *this; } bool SBCommandInterpreter::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreter, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBCommandInterpreter::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreter, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr != nullptr; } bool SBCommandInterpreter::CommandExists(const char *cmd) { - LLDB_RECORD_METHOD(bool, SBCommandInterpreter, CommandExists, (const char *), - cmd); + LLDB_INSTRUMENT_VA(this, cmd); return (((cmd != nullptr) && IsValid()) ? m_opaque_ptr->CommandExists(cmd) : false); } bool SBCommandInterpreter::AliasExists(const char *cmd) { - LLDB_RECORD_METHOD(bool, SBCommandInterpreter, AliasExists, (const char *), - cmd); + LLDB_INSTRUMENT_VA(this, cmd); return (((cmd != nullptr) && IsValid()) ? m_opaque_ptr->AliasExists(cmd) : false); } bool SBCommandInterpreter::IsActive() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, IsActive); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? m_opaque_ptr->IsActive() : false); } bool SBCommandInterpreter::WasInterrupted() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreter, WasInterrupted); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? m_opaque_ptr->WasInterrupted() : false); } const char *SBCommandInterpreter::GetIOHandlerControlSequence(char ch) { - LLDB_RECORD_METHOD(const char *, SBCommandInterpreter, - GetIOHandlerControlSequence, (char), ch); + LLDB_INSTRUMENT_VA(this, ch); return (IsValid() ? m_opaque_ptr->GetDebugger() @@ -155,9 +146,7 @@ lldb::ReturnStatus SBCommandInterpreter::HandleCommand(const char *command_line, SBCommandReturnObject &result, bool add_to_history) { - LLDB_RECORD_METHOD(lldb::ReturnStatus, SBCommandInterpreter, HandleCommand, - (const char *, lldb::SBCommandReturnObject &, bool), - command_line, result, add_to_history); + LLDB_INSTRUMENT_VA(this, command_line, result, add_to_history); SBExecutionContext sb_exe_ctx; return HandleCommand(command_line, sb_exe_ctx, result, add_to_history); @@ -166,10 +155,8 @@ SBCommandInterpreter::HandleCommand(const char *command_line, lldb::ReturnStatus SBCommandInterpreter::HandleCommand( const char *command_line, SBExecutionContext &override_context, SBCommandReturnObject &result, bool add_to_history) { - LLDB_RECORD_METHOD(lldb::ReturnStatus, SBCommandInterpreter, HandleCommand, - (const char *, lldb::SBExecutionContext &, - lldb::SBCommandReturnObject &, bool), - command_line, override_context, result, add_to_history); + LLDB_INSTRUMENT_VA(this, command_line, override_context, result, + add_to_history); result.Clear(); if (command_line && IsValid()) { @@ -194,11 +181,7 @@ void SBCommandInterpreter::HandleCommandsFromFile( lldb::SBFileSpec &file, lldb::SBExecutionContext &override_context, lldb::SBCommandInterpreterRunOptions &options, lldb::SBCommandReturnObject result) { - LLDB_RECORD_METHOD(void, SBCommandInterpreter, HandleCommandsFromFile, - (lldb::SBFileSpec &, lldb::SBExecutionContext &, - lldb::SBCommandInterpreterRunOptions &, - lldb::SBCommandReturnObject), - file, override_context, options, result); + LLDB_INSTRUMENT_VA(this, file, override_context, options, result); if (!IsValid()) { result->AppendError("SBCommandInterpreter is not valid."); @@ -225,10 +208,7 @@ void SBCommandInterpreter::HandleCommandsFromFile( int SBCommandInterpreter::HandleCompletion( const char *current_line, const char *cursor, const char *last_char, int match_start_point, int max_return_elements, SBStringList &matches) { - LLDB_RECORD_METHOD(int, SBCommandInterpreter, HandleCompletion, - (const char *, const char *, const char *, int, int, - lldb::SBStringList &), - current_line, cursor, last_char, match_start_point, + LLDB_INSTRUMENT_VA(this, current_line, cursor, last_char, match_start_point, max_return_elements, matches); SBStringList dummy_descriptions; @@ -241,11 +221,7 @@ int SBCommandInterpreter::HandleCompletionWithDescriptions( const char *current_line, const char *cursor, const char *last_char, int match_start_point, int max_return_elements, SBStringList &matches, SBStringList &descriptions) { - LLDB_RECORD_METHOD(int, SBCommandInterpreter, - HandleCompletionWithDescriptions, - (const char *, const char *, const char *, int, int, - lldb::SBStringList &, lldb::SBStringList &), - current_line, cursor, last_char, match_start_point, + LLDB_INSTRUMENT_VA(this, current_line, cursor, last_char, match_start_point, max_return_elements, matches, descriptions); // Sanity check the arguments that are passed in: cursor & last_char have to @@ -311,11 +287,7 @@ int SBCommandInterpreter::HandleCompletionWithDescriptions( const char *current_line, uint32_t cursor_pos, int match_start_point, int max_return_elements, SBStringList &matches, SBStringList &descriptions) { - LLDB_RECORD_METHOD(int, SBCommandInterpreter, - HandleCompletionWithDescriptions, - (const char *, uint32_t, int, int, lldb::SBStringList &, - lldb::SBStringList &), - current_line, cursor_pos, match_start_point, + LLDB_INSTRUMENT_VA(this, current_line, cursor_pos, match_start_point, max_return_elements, matches, descriptions); const char *cursor = current_line + cursor_pos; @@ -330,9 +302,7 @@ int SBCommandInterpreter::HandleCompletion(const char *current_line, int match_start_point, int max_return_elements, lldb::SBStringList &matches) { - LLDB_RECORD_METHOD(int, SBCommandInterpreter, HandleCompletion, - (const char *, uint32_t, int, int, lldb::SBStringList &), - current_line, cursor_pos, match_start_point, + LLDB_INSTRUMENT_VA(this, current_line, cursor_pos, match_start_point, max_return_elements, matches); const char *cursor = current_line + cursor_pos; @@ -342,25 +312,25 @@ int SBCommandInterpreter::HandleCompletion(const char *current_line, } bool SBCommandInterpreter::HasCommands() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasCommands); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? m_opaque_ptr->HasCommands() : false); } bool SBCommandInterpreter::HasAliases() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasAliases); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? m_opaque_ptr->HasAliases() : false); } bool SBCommandInterpreter::HasAliasOptions() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasAliasOptions); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? m_opaque_ptr->HasAliasOptions() : false); } SBProcess SBCommandInterpreter::GetProcess() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBCommandInterpreter, GetProcess); + LLDB_INSTRUMENT_VA(this); SBProcess sb_process; ProcessSP process_sp; @@ -377,8 +347,7 @@ SBProcess SBCommandInterpreter::GetProcess() { } SBDebugger SBCommandInterpreter::GetDebugger() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBDebugger, SBCommandInterpreter, - GetDebugger); + LLDB_INSTRUMENT_VA(this); SBDebugger sb_debugger; if (IsValid()) @@ -388,28 +357,27 @@ SBDebugger SBCommandInterpreter::GetDebugger() { } bool SBCommandInterpreter::GetPromptOnQuit() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, GetPromptOnQuit); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? m_opaque_ptr->GetPromptOnQuit() : false); } void SBCommandInterpreter::SetPromptOnQuit(bool b) { - LLDB_RECORD_METHOD(void, SBCommandInterpreter, SetPromptOnQuit, (bool), b); + LLDB_INSTRUMENT_VA(this, b); if (IsValid()) m_opaque_ptr->SetPromptOnQuit(b); } void SBCommandInterpreter::AllowExitCodeOnQuit(bool allow) { - LLDB_RECORD_METHOD(void, SBCommandInterpreter, AllowExitCodeOnQuit, (bool), - allow); + LLDB_INSTRUMENT_VA(this, allow); if (m_opaque_ptr) m_opaque_ptr->AllowExitCodeOnQuit(allow); } bool SBCommandInterpreter::HasCustomQuitExitCode() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasCustomQuitExitCode); + LLDB_INSTRUMENT_VA(this); bool exited = false; if (m_opaque_ptr) @@ -418,7 +386,7 @@ bool SBCommandInterpreter::HasCustomQuitExitCode() { } int SBCommandInterpreter::GetQuitStatus() { - LLDB_RECORD_METHOD_NO_ARGS(int, SBCommandInterpreter, GetQuitStatus); + LLDB_INSTRUMENT_VA(this); bool exited = false; return (m_opaque_ptr ? m_opaque_ptr->GetQuitExitCode(exited) : 0); @@ -426,9 +394,7 @@ int SBCommandInterpreter::GetQuitStatus() { void SBCommandInterpreter::ResolveCommand(const char *command_line, SBCommandReturnObject &result) { - LLDB_RECORD_METHOD(void, SBCommandInterpreter, ResolveCommand, - (const char *, lldb::SBCommandReturnObject &), - command_line, result); + LLDB_INSTRUMENT_VA(this, command_line, result); result.Clear(); if (command_line && IsValid()) { @@ -453,8 +419,7 @@ void SBCommandInterpreter::reset( void SBCommandInterpreter::SourceInitFileInHomeDirectory( SBCommandReturnObject &result) { - LLDB_RECORD_METHOD(void, SBCommandInterpreter, SourceInitFileInHomeDirectory, - (lldb::SBCommandReturnObject &), result); + LLDB_INSTRUMENT_VA(this, result); result.Clear(); if (IsValid()) { @@ -470,8 +435,7 @@ void SBCommandInterpreter::SourceInitFileInHomeDirectory( void SBCommandInterpreter::SourceInitFileInHomeDirectory( SBCommandReturnObject &result, bool is_repl) { - LLDB_RECORD_METHOD(void, SBCommandInterpreter, SourceInitFileInHomeDirectory, - (lldb::SBCommandReturnObject &, bool), result, is_repl); + LLDB_INSTRUMENT_VA(this, result, is_repl); result.Clear(); if (IsValid()) { @@ -487,9 +451,7 @@ void SBCommandInterpreter::SourceInitFileInHomeDirectory( void SBCommandInterpreter::SourceInitFileInCurrentWorkingDirectory( SBCommandReturnObject &result) { - LLDB_RECORD_METHOD(void, SBCommandInterpreter, - SourceInitFileInCurrentWorkingDirectory, - (lldb::SBCommandReturnObject &), result); + LLDB_INSTRUMENT_VA(this, result); result.Clear(); if (IsValid()) { @@ -504,9 +466,7 @@ void SBCommandInterpreter::SourceInitFileInCurrentWorkingDirectory( } SBBroadcaster SBCommandInterpreter::GetBroadcaster() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBroadcaster, SBCommandInterpreter, - GetBroadcaster); - + LLDB_INSTRUMENT_VA(this); SBBroadcaster broadcaster(m_opaque_ptr, false); @@ -514,35 +474,28 @@ SBBroadcaster SBCommandInterpreter::GetBroadcaster() { } const char *SBCommandInterpreter::GetBroadcasterClass() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBCommandInterpreter, - GetBroadcasterClass); + LLDB_INSTRUMENT(); return CommandInterpreter::GetStaticBroadcasterClass().AsCString(); } const char *SBCommandInterpreter::GetArgumentTypeAsCString( const lldb::CommandArgumentType arg_type) { - LLDB_RECORD_STATIC_METHOD(const char *, SBCommandInterpreter, - GetArgumentTypeAsCString, - (const lldb::CommandArgumentType), arg_type); + LLDB_INSTRUMENT_VA(arg_type); return CommandObject::GetArgumentTypeAsCString(arg_type); } const char *SBCommandInterpreter::GetArgumentDescriptionAsCString( const lldb::CommandArgumentType arg_type) { - LLDB_RECORD_STATIC_METHOD(const char *, SBCommandInterpreter, - GetArgumentDescriptionAsCString, - (const lldb::CommandArgumentType), arg_type); + LLDB_INSTRUMENT_VA(arg_type); return CommandObject::GetArgumentDescriptionAsCString(arg_type); } bool SBCommandInterpreter::EventIsCommandInterpreterEvent( const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBCommandInterpreter, - EventIsCommandInterpreterEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return event.GetBroadcasterClass() == SBCommandInterpreter::GetBroadcasterClass(); @@ -551,9 +504,7 @@ bool SBCommandInterpreter::EventIsCommandInterpreterEvent( bool SBCommandInterpreter::SetCommandOverrideCallback( const char *command_name, lldb::CommandOverrideCallback callback, void *baton) { - LLDB_RECORD_METHOD(bool, SBCommandInterpreter, SetCommandOverrideCallback, - (const char *, lldb::CommandOverrideCallback, void *), - command_name, callback, baton); + LLDB_INSTRUMENT_VA(this, command_name, callback, baton); if (command_name && command_name[0] && IsValid()) { llvm::StringRef command_name_str = command_name; @@ -570,8 +521,7 @@ bool SBCommandInterpreter::SetCommandOverrideCallback( lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name, const char *help) { - LLDB_RECORD_METHOD(lldb::SBCommand, SBCommandInterpreter, AddMultiwordCommand, - (const char *, const char *), name, help); + LLDB_INSTRUMENT_VA(this, name, help); lldb::CommandObjectSP new_command_sp( new CommandObjectMultiword(*m_opaque_ptr, name, help)); @@ -584,10 +534,7 @@ lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name, lldb::SBCommand SBCommandInterpreter::AddCommand( const char *name, lldb::SBCommandPluginInterface *impl, const char *help) { - LLDB_RECORD_METHOD( - lldb::SBCommand, SBCommandInterpreter, AddCommand, - (const char *, lldb::SBCommandPluginInterface *, const char *), name, - impl, help); + LLDB_INSTRUMENT_VA(this, name, impl, help); return AddCommand(name, impl, help, /*syntax=*/nullptr, /*auto_repeat_command=*/""); @@ -597,20 +544,14 @@ lldb::SBCommand SBCommandInterpreter::AddCommand(const char *name, lldb::SBCommandPluginInterface *impl, const char *help, const char *syntax) { - LLDB_RECORD_METHOD(lldb::SBCommand, SBCommandInterpreter, AddCommand, - (const char *, lldb::SBCommandPluginInterface *, - const char *, const char *), - name, impl, help, syntax); + LLDB_INSTRUMENT_VA(this, name, impl, help, syntax); return AddCommand(name, impl, help, syntax, /*auto_repeat_command=*/""); } lldb::SBCommand SBCommandInterpreter::AddCommand( const char *name, lldb::SBCommandPluginInterface *impl, const char *help, const char *syntax, const char *auto_repeat_command) { - LLDB_RECORD_METHOD(lldb::SBCommand, SBCommandInterpreter, AddCommand, - (const char *, lldb::SBCommandPluginInterface *, - const char *, const char *, const char *), - name, impl, help, syntax, auto_repeat_command); + LLDB_INSTRUMENT_VA(this, name, impl, help, syntax, auto_repeat_command); lldb::CommandObjectSP new_command_sp; new_command_sp = std::make_shared( @@ -623,49 +564,49 @@ lldb::SBCommand SBCommandInterpreter::AddCommand( return lldb::SBCommand(); } -SBCommand::SBCommand() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommand); } +SBCommand::SBCommand() { LLDB_INSTRUMENT_VA(this); } SBCommand::SBCommand(lldb::CommandObjectSP cmd_sp) : m_opaque_sp(cmd_sp) {} bool SBCommand::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommand, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBCommand::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommand, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } const char *SBCommand::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommand, GetName); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? ConstString(m_opaque_sp->GetCommandName()).AsCString() : nullptr); } const char *SBCommand::GetHelp() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommand, GetHelp); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? ConstString(m_opaque_sp->GetHelp()).AsCString() : nullptr); } const char *SBCommand::GetHelpLong() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommand, GetHelpLong); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? ConstString(m_opaque_sp->GetHelpLong()).AsCString() : nullptr); } void SBCommand::SetHelp(const char *help) { - LLDB_RECORD_METHOD(void, SBCommand, SetHelp, (const char *), help); + LLDB_INSTRUMENT_VA(this, help); if (IsValid()) m_opaque_sp->SetHelp(help); } void SBCommand::SetHelpLong(const char *help) { - LLDB_RECORD_METHOD(void, SBCommand, SetHelpLong, (const char *), help); + LLDB_INSTRUMENT_VA(this, help); if (IsValid()) m_opaque_sp->SetHelpLong(help); @@ -673,8 +614,7 @@ void SBCommand::SetHelpLong(const char *help) { lldb::SBCommand SBCommand::AddMultiwordCommand(const char *name, const char *help) { - LLDB_RECORD_METHOD(lldb::SBCommand, SBCommand, AddMultiwordCommand, - (const char *, const char *), name, help); + LLDB_INSTRUMENT_VA(this, name, help); if (!IsValid()) return lldb::SBCommand(); @@ -692,10 +632,7 @@ lldb::SBCommand SBCommand::AddMultiwordCommand(const char *name, lldb::SBCommand SBCommand::AddCommand(const char *name, lldb::SBCommandPluginInterface *impl, const char *help) { - LLDB_RECORD_METHOD( - lldb::SBCommand, SBCommand, AddCommand, - (const char *, lldb::SBCommandPluginInterface *, const char *), name, - impl, help); + LLDB_INSTRUMENT_VA(this, name, impl, help); return AddCommand(name, impl, help, /*syntax=*/nullptr, /*auto_repeat_command=*/""); } @@ -703,10 +640,7 @@ lldb::SBCommand SBCommand::AddCommand(const char *name, lldb::SBCommand SBCommand::AddCommand(const char *name, lldb::SBCommandPluginInterface *impl, const char *help, const char *syntax) { - LLDB_RECORD_METHOD(lldb::SBCommand, SBCommand, AddCommand, - (const char *, lldb::SBCommandPluginInterface *, - const char *, const char *), - name, impl, help, syntax); + LLDB_INSTRUMENT_VA(this, name, impl, help, syntax); return AddCommand(name, impl, help, syntax, /*auto_repeat_command=*/""); } @@ -714,10 +648,7 @@ lldb::SBCommand SBCommand::AddCommand(const char *name, lldb::SBCommandPluginInterface *impl, const char *help, const char *syntax, const char *auto_repeat_command) { - LLDB_RECORD_METHOD(lldb::SBCommand, SBCommand, AddCommand, - (const char *, lldb::SBCommandPluginInterface *, - const char *, const char *, const char *), - name, impl, help, syntax, auto_repeat_command); + LLDB_INSTRUMENT_VA(this, name, impl, help, syntax, auto_repeat_command); if (!IsValid()) return lldb::SBCommand(); @@ -733,13 +664,13 @@ lldb::SBCommand SBCommand::AddCommand(const char *name, } uint32_t SBCommand::GetFlags() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBCommand, GetFlags); + LLDB_INSTRUMENT_VA(this); return (IsValid() ? m_opaque_sp->GetFlags().Get() : 0); } void SBCommand::SetFlags(uint32_t flags) { - LLDB_RECORD_METHOD(void, SBCommand, SetFlags, (uint32_t), flags); + LLDB_INSTRUMENT_VA(this, flags); if (IsValid()) m_opaque_sp->GetFlags().Set(flags); diff --git a/lldb/source/API/SBCommandInterpreterRunOptions.cpp b/lldb/source/API/SBCommandInterpreterRunOptions.cpp index eb1a4885f2079..6c6b2aa15a792 100644 --- a/lldb/source/API/SBCommandInterpreterRunOptions.cpp +++ b/lldb/source/API/SBCommandInterpreterRunOptions.cpp @@ -8,7 +8,7 @@ #include "lldb/lldb-types.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBCommandInterpreterRunOptions.h" #include "lldb/Interpreter/CommandInterpreter.h" @@ -19,15 +19,14 @@ using namespace lldb; using namespace lldb_private; SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandInterpreterRunOptions); + LLDB_INSTRUMENT_VA(this); m_opaque_up = std::make_unique(); } SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions( const SBCommandInterpreterRunOptions &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreterRunOptions, - (const lldb::SBCommandInterpreterRunOptions &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = std::make_unique(rhs.ref()); } @@ -36,9 +35,7 @@ SBCommandInterpreterRunOptions::~SBCommandInterpreterRunOptions() = default; SBCommandInterpreterRunOptions &SBCommandInterpreterRunOptions::operator=( const SBCommandInterpreterRunOptions &rhs) { - LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunOptions &, - SBCommandInterpreterRunOptions, operator=, - (const lldb::SBCommandInterpreterRunOptions &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this == &rhs) return *this; @@ -47,142 +44,122 @@ SBCommandInterpreterRunOptions &SBCommandInterpreterRunOptions::operator=( } bool SBCommandInterpreterRunOptions::GetStopOnContinue() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetStopOnContinue); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetStopOnContinue(); } void SBCommandInterpreterRunOptions::SetStopOnContinue(bool stop_on_continue) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnContinue, - (bool), stop_on_continue); + LLDB_INSTRUMENT_VA(this, stop_on_continue); m_opaque_up->SetStopOnContinue(stop_on_continue); } bool SBCommandInterpreterRunOptions::GetStopOnError() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetStopOnError); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetStopOnError(); } void SBCommandInterpreterRunOptions::SetStopOnError(bool stop_on_error) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnError, - (bool), stop_on_error); + LLDB_INSTRUMENT_VA(this, stop_on_error); m_opaque_up->SetStopOnError(stop_on_error); } bool SBCommandInterpreterRunOptions::GetStopOnCrash() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetStopOnCrash); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetStopOnCrash(); } void SBCommandInterpreterRunOptions::SetStopOnCrash(bool stop_on_crash) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnCrash, - (bool), stop_on_crash); + LLDB_INSTRUMENT_VA(this, stop_on_crash); m_opaque_up->SetStopOnCrash(stop_on_crash); } bool SBCommandInterpreterRunOptions::GetEchoCommands() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetEchoCommands); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetEchoCommands(); } void SBCommandInterpreterRunOptions::SetEchoCommands(bool echo_commands) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetEchoCommands, - (bool), echo_commands); + LLDB_INSTRUMENT_VA(this, echo_commands); m_opaque_up->SetEchoCommands(echo_commands); } bool SBCommandInterpreterRunOptions::GetEchoCommentCommands() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetEchoCommentCommands); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetEchoCommentCommands(); } void SBCommandInterpreterRunOptions::SetEchoCommentCommands(bool echo) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, - SetEchoCommentCommands, (bool), echo); + LLDB_INSTRUMENT_VA(this, echo); m_opaque_up->SetEchoCommentCommands(echo); } bool SBCommandInterpreterRunOptions::GetPrintResults() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetPrintResults); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetPrintResults(); } void SBCommandInterpreterRunOptions::SetPrintResults(bool print_results) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetPrintResults, - (bool), print_results); + LLDB_INSTRUMENT_VA(this, print_results); m_opaque_up->SetPrintResults(print_results); } bool SBCommandInterpreterRunOptions::GetPrintErrors() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetPrintErrors); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetPrintErrors(); } void SBCommandInterpreterRunOptions::SetPrintErrors(bool print_errors) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetPrintErrors, - (bool), print_errors); + LLDB_INSTRUMENT_VA(this, print_errors); m_opaque_up->SetPrintErrors(print_errors); } bool SBCommandInterpreterRunOptions::GetAddToHistory() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetAddToHistory); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetAddToHistory(); } void SBCommandInterpreterRunOptions::SetAddToHistory(bool add_to_history) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetAddToHistory, - (bool), add_to_history); + LLDB_INSTRUMENT_VA(this, add_to_history); m_opaque_up->SetAddToHistory(add_to_history); } bool SBCommandInterpreterRunOptions::GetAutoHandleEvents() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetAutoHandleEvents); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetAutoHandleEvents(); } void SBCommandInterpreterRunOptions::SetAutoHandleEvents( bool auto_handle_events) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetAutoHandleEvents, - (bool), auto_handle_events); + LLDB_INSTRUMENT_VA(this, auto_handle_events); m_opaque_up->SetAutoHandleEvents(auto_handle_events); } bool SBCommandInterpreterRunOptions::GetSpawnThread() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions, - GetSpawnThread); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetSpawnThread(); } void SBCommandInterpreterRunOptions::SetSpawnThread(bool spawn_thread) { - LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetSpawnThread, - (bool), spawn_thread); + LLDB_INSTRUMENT_VA(this, spawn_thread); m_opaque_up->SetSpawnThread(spawn_thread); } @@ -201,14 +178,13 @@ SBCommandInterpreterRunResult::SBCommandInterpreterRunResult() : m_opaque_up(new CommandInterpreterRunResult()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandInterpreterRunResult); + LLDB_INSTRUMENT_VA(this); } SBCommandInterpreterRunResult::SBCommandInterpreterRunResult( const SBCommandInterpreterRunResult &rhs) : m_opaque_up(new CommandInterpreterRunResult()) { - LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreterRunResult, - (const lldb::SBCommandInterpreterRunResult &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); *m_opaque_up = *rhs.m_opaque_up; } @@ -222,9 +198,7 @@ SBCommandInterpreterRunResult::~SBCommandInterpreterRunResult() = default; SBCommandInterpreterRunResult &SBCommandInterpreterRunResult::operator=( const SBCommandInterpreterRunResult &rhs) { - LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunResult &, - SBCommandInterpreterRunResult, operator=, - (const lldb::SBCommandInterpreterRunResult &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this == &rhs) return *this; @@ -233,16 +207,14 @@ SBCommandInterpreterRunResult &SBCommandInterpreterRunResult::operator=( } int SBCommandInterpreterRunResult::GetNumberOfErrors() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(int, SBCommandInterpreterRunResult, - GetNumberOfErrors); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetNumErrors(); } lldb::CommandInterpreterResult SBCommandInterpreterRunResult::GetResult() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::CommandInterpreterResult, - SBCommandInterpreterRunResult, GetResult); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetResult(); } diff --git a/lldb/source/API/SBCommandReturnObject.cpp b/lldb/source/API/SBCommandReturnObject.cpp index 505145d75ce8c..7d2c102b3d8c1 100644 --- a/lldb/source/API/SBCommandReturnObject.cpp +++ b/lldb/source/API/SBCommandReturnObject.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBCommandReturnObject.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBError.h" #include "lldb/API/SBFile.h" #include "lldb/API/SBStream.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Utility/ConstString.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Status.h" using namespace lldb; @@ -46,28 +46,23 @@ class lldb_private::SBCommandReturnObjectImpl { SBCommandReturnObject::SBCommandReturnObject() : m_opaque_up(new SBCommandReturnObjectImpl()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandReturnObject); + LLDB_INSTRUMENT_VA(this); } SBCommandReturnObject::SBCommandReturnObject(CommandReturnObject &ref) : m_opaque_up(new SBCommandReturnObjectImpl(ref)) { - LLDB_RECORD_CONSTRUCTOR(SBCommandReturnObject, - (lldb_private::CommandReturnObject &), ref); + LLDB_INSTRUMENT_VA(this, ref); } SBCommandReturnObject::SBCommandReturnObject(const SBCommandReturnObject &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBCommandReturnObject, - (const lldb::SBCommandReturnObject &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } SBCommandReturnObject &SBCommandReturnObject:: operator=(const SBCommandReturnObject &rhs) { - LLDB_RECORD_METHOD( - lldb::SBCommandReturnObject &, - SBCommandReturnObject, operator=,(const lldb::SBCommandReturnObject &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -77,44 +72,44 @@ operator=(const SBCommandReturnObject &rhs) { SBCommandReturnObject::~SBCommandReturnObject() = default; bool SBCommandReturnObject::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandReturnObject, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBCommandReturnObject::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandReturnObject, operator bool); + LLDB_INSTRUMENT_VA(this); // This method is not useful but it needs to stay to keep SB API stable. return true; } const char *SBCommandReturnObject::GetOutput() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommandReturnObject, GetOutput); + LLDB_INSTRUMENT_VA(this); ConstString output(ref().GetOutputData()); return output.AsCString(/*value_if_empty*/ ""); } const char *SBCommandReturnObject::GetError() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommandReturnObject, GetError); + LLDB_INSTRUMENT_VA(this); ConstString output(ref().GetErrorData()); return output.AsCString(/*value_if_empty*/ ""); } size_t SBCommandReturnObject::GetOutputSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBCommandReturnObject, GetOutputSize); + LLDB_INSTRUMENT_VA(this); return ref().GetOutputData().size(); } size_t SBCommandReturnObject::GetErrorSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBCommandReturnObject, GetErrorSize); + LLDB_INSTRUMENT_VA(this); return ref().GetErrorData().size(); } size_t SBCommandReturnObject::PutOutput(FILE *fh) { - LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (FILE *), fh); + LLDB_INSTRUMENT_VA(this, fh); if (fh) { size_t num_bytes = GetOutputSize(); if (num_bytes) @@ -124,22 +119,21 @@ size_t SBCommandReturnObject::PutOutput(FILE *fh) { } size_t SBCommandReturnObject::PutOutput(FileSP file_sp) { - LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (FileSP), - file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); if (!file_sp) return 0; return file_sp->Printf("%s", GetOutput()); } size_t SBCommandReturnObject::PutOutput(SBFile file) { - LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (SBFile), file); + LLDB_INSTRUMENT_VA(this, file); if (!file.m_opaque_sp) return 0; return file.m_opaque_sp->Printf("%s", GetOutput()); } size_t SBCommandReturnObject::PutError(FILE *fh) { - LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (FILE *), fh); + LLDB_INSTRUMENT_VA(this, fh); if (fh) { size_t num_bytes = GetErrorSize(); if (num_bytes) @@ -149,62 +143,57 @@ size_t SBCommandReturnObject::PutError(FILE *fh) { } size_t SBCommandReturnObject::PutError(FileSP file_sp) { - LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (FileSP), - file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); if (!file_sp) return 0; return file_sp->Printf("%s", GetError()); } size_t SBCommandReturnObject::PutError(SBFile file) { - LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (SBFile), file); + LLDB_INSTRUMENT_VA(this, file); if (!file.m_opaque_sp) return 0; return file.m_opaque_sp->Printf("%s", GetError()); } void SBCommandReturnObject::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBCommandReturnObject, Clear); + LLDB_INSTRUMENT_VA(this); ref().Clear(); } lldb::ReturnStatus SBCommandReturnObject::GetStatus() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::ReturnStatus, SBCommandReturnObject, - GetStatus); + LLDB_INSTRUMENT_VA(this); return ref().GetStatus(); } void SBCommandReturnObject::SetStatus(lldb::ReturnStatus status) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetStatus, - (lldb::ReturnStatus), status); + LLDB_INSTRUMENT_VA(this, status); ref().SetStatus(status); } bool SBCommandReturnObject::Succeeded() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandReturnObject, Succeeded); + LLDB_INSTRUMENT_VA(this); return ref().Succeeded(); } bool SBCommandReturnObject::HasResult() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandReturnObject, HasResult); + LLDB_INSTRUMENT_VA(this); return ref().HasResult(); } void SBCommandReturnObject::AppendMessage(const char *message) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, AppendMessage, (const char *), - message); + LLDB_INSTRUMENT_VA(this, message); ref().AppendMessage(message); } void SBCommandReturnObject::AppendWarning(const char *message) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, AppendWarning, (const char *), - message); + LLDB_INSTRUMENT_VA(this, message); ref().AppendWarning(message); } @@ -226,8 +215,7 @@ CommandReturnObject &SBCommandReturnObject::ref() const { } bool SBCommandReturnObject::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBCommandReturnObject, GetDescription, - (lldb::SBStream &), description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -252,62 +240,53 @@ bool SBCommandReturnObject::GetDescription(SBStream &description) { } void SBCommandReturnObject::SetImmediateOutputFile(FILE *fh) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, - (FILE *), fh); + LLDB_INSTRUMENT_VA(this, fh); SetImmediateOutputFile(fh, false); } void SBCommandReturnObject::SetImmediateErrorFile(FILE *fh) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, - (FILE *), fh); + LLDB_INSTRUMENT_VA(this, fh); SetImmediateErrorFile(fh, false); } void SBCommandReturnObject::SetImmediateOutputFile(FILE *fh, bool transfer_ownership) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, - (FILE *, bool), fh, transfer_ownership); + LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); FileSP file = std::make_shared(fh, transfer_ownership); ref().SetImmediateOutputFile(file); } void SBCommandReturnObject::SetImmediateErrorFile(FILE *fh, bool transfer_ownership) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, - (FILE *, bool), fh, transfer_ownership); + LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); FileSP file = std::make_shared(fh, transfer_ownership); ref().SetImmediateErrorFile(file); } void SBCommandReturnObject::SetImmediateOutputFile(SBFile file) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, - (SBFile), file); + LLDB_INSTRUMENT_VA(this, file); ref().SetImmediateOutputFile(file.m_opaque_sp); } void SBCommandReturnObject::SetImmediateErrorFile(SBFile file) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, - (SBFile), file); + LLDB_INSTRUMENT_VA(this, file); ref().SetImmediateErrorFile(file.m_opaque_sp); } void SBCommandReturnObject::SetImmediateOutputFile(FileSP file_sp) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile, - (FileSP), file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); SetImmediateOutputFile(SBFile(file_sp)); } void SBCommandReturnObject::SetImmediateErrorFile(FileSP file_sp) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile, - (FileSP), file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); SetImmediateErrorFile(SBFile(file_sp)); } void SBCommandReturnObject::PutCString(const char *string, int len) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, PutCString, - (const char *, int), string, len); + LLDB_INSTRUMENT_VA(this, string, len); if (len == 0 || string == nullptr || *string == 0) { return; @@ -319,8 +298,7 @@ void SBCommandReturnObject::PutCString(const char *string, int len) { } const char *SBCommandReturnObject::GetOutput(bool only_if_no_immediate) { - LLDB_RECORD_METHOD(const char *, SBCommandReturnObject, GetOutput, (bool), - only_if_no_immediate); + LLDB_INSTRUMENT_VA(this, only_if_no_immediate); if (!only_if_no_immediate || ref().GetImmediateOutputStream().get() == nullptr) @@ -329,8 +307,7 @@ const char *SBCommandReturnObject::GetOutput(bool only_if_no_immediate) { } const char *SBCommandReturnObject::GetError(bool only_if_no_immediate) { - LLDB_RECORD_METHOD(const char *, SBCommandReturnObject, GetError, (bool), - only_if_no_immediate); + LLDB_INSTRUMENT_VA(this, only_if_no_immediate); if (!only_if_no_immediate || ref().GetImmediateErrorStream().get() == nullptr) return GetError(); @@ -347,9 +324,7 @@ size_t SBCommandReturnObject::Printf(const char *format, ...) { void SBCommandReturnObject::SetError(lldb::SBError &error, const char *fallback_error_cstr) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetError, - (lldb::SBError &, const char *), error, - fallback_error_cstr); + LLDB_INSTRUMENT_VA(this, error, fallback_error_cstr); if (error.IsValid()) ref().SetError(error.ref(), fallback_error_cstr); @@ -358,8 +333,7 @@ void SBCommandReturnObject::SetError(lldb::SBError &error, } void SBCommandReturnObject::SetError(const char *error_cstr) { - LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetError, (const char *), - error_cstr); + LLDB_INSTRUMENT_VA(this, error_cstr); if (error_cstr) ref().AppendError(error_cstr); diff --git a/lldb/source/API/SBCommunication.cpp b/lldb/source/API/SBCommunication.cpp index 79a95c9dad626..0a1dad1e2e8fb 100644 --- a/lldb/source/API/SBCommunication.cpp +++ b/lldb/source/API/SBCommunication.cpp @@ -7,22 +7,20 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBCommunication.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBBroadcaster.h" #include "lldb/Core/Communication.h" #include "lldb/Host/ConnectionFileDescriptor.h" #include "lldb/Host/Host.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; -SBCommunication::SBCommunication() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommunication); -} +SBCommunication::SBCommunication() { LLDB_INSTRUMENT_VA(this); } SBCommunication::SBCommunication(const char *broadcaster_name) : m_opaque(new Communication(broadcaster_name)), m_opaque_owned(true) { - LLDB_RECORD_CONSTRUCTOR(SBCommunication, (const char *), broadcaster_name); + LLDB_INSTRUMENT_VA(this, broadcaster_name); } SBCommunication::~SBCommunication() { @@ -33,17 +31,17 @@ SBCommunication::~SBCommunication() { } bool SBCommunication::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommunication, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBCommunication::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommunication, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque != nullptr; } bool SBCommunication::GetCloseOnEOF() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, GetCloseOnEOF); + LLDB_INSTRUMENT_VA(this); if (m_opaque) return m_opaque->GetCloseOnEOF(); @@ -51,15 +49,14 @@ bool SBCommunication::GetCloseOnEOF() { } void SBCommunication::SetCloseOnEOF(bool b) { - LLDB_RECORD_METHOD(void, SBCommunication, SetCloseOnEOF, (bool), b); + LLDB_INSTRUMENT_VA(this, b); if (m_opaque) m_opaque->SetCloseOnEOF(b); } ConnectionStatus SBCommunication::Connect(const char *url) { - LLDB_RECORD_METHOD(lldb::ConnectionStatus, SBCommunication, Connect, - (const char *), url); + LLDB_INSTRUMENT_VA(this, url); if (m_opaque) { if (!m_opaque->HasConnection()) @@ -70,8 +67,7 @@ ConnectionStatus SBCommunication::Connect(const char *url) { } ConnectionStatus SBCommunication::AdoptFileDesriptor(int fd, bool owns_fd) { - LLDB_RECORD_METHOD(lldb::ConnectionStatus, SBCommunication, - AdoptFileDesriptor, (int, bool), fd, owns_fd); + LLDB_INSTRUMENT_VA(this, fd, owns_fd); ConnectionStatus status = eConnectionStatusNoConnection; if (m_opaque) { @@ -90,8 +86,7 @@ ConnectionStatus SBCommunication::AdoptFileDesriptor(int fd, bool owns_fd) { } ConnectionStatus SBCommunication::Disconnect() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::ConnectionStatus, SBCommunication, - Disconnect); + LLDB_INSTRUMENT_VA(this); ConnectionStatus status = eConnectionStatusNoConnection; if (m_opaque) @@ -100,16 +95,14 @@ ConnectionStatus SBCommunication::Disconnect() { } bool SBCommunication::IsConnected() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommunication, IsConnected); + LLDB_INSTRUMENT_VA(this); return m_opaque ? m_opaque->IsConnected() : false; } size_t SBCommunication::Read(void *dst, size_t dst_len, uint32_t timeout_usec, ConnectionStatus &status) { - LLDB_RECORD_METHOD(size_t, SBCommunication, Read, - (void *, size_t, uint32_t, lldb::ConnectionStatus &), dst, - dst_len, timeout_usec, status); + LLDB_INSTRUMENT_VA(this, dst, dst_len, timeout_usec, status); size_t bytes_read = 0; Timeout timeout = timeout_usec == UINT32_MAX @@ -125,9 +118,7 @@ size_t SBCommunication::Read(void *dst, size_t dst_len, uint32_t timeout_usec, size_t SBCommunication::Write(const void *src, size_t src_len, ConnectionStatus &status) { - LLDB_RECORD_METHOD(size_t, SBCommunication, Write, - (const void *, size_t, lldb::ConnectionStatus &), src, - src_len, status); + LLDB_INSTRUMENT_VA(this, src, src_len, status); size_t bytes_written = 0; if (m_opaque) @@ -139,28 +130,26 @@ size_t SBCommunication::Write(const void *src, size_t src_len, } bool SBCommunication::ReadThreadStart() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, ReadThreadStart); + LLDB_INSTRUMENT_VA(this); return m_opaque ? m_opaque->StartReadThread() : false; } bool SBCommunication::ReadThreadStop() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, ReadThreadStop); + LLDB_INSTRUMENT_VA(this); return m_opaque ? m_opaque->StopReadThread() : false; } bool SBCommunication::ReadThreadIsRunning() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, ReadThreadIsRunning); + LLDB_INSTRUMENT_VA(this); return m_opaque ? m_opaque->ReadThreadIsRunning() : false; } bool SBCommunication::SetReadThreadBytesReceivedCallback( ReadThreadBytesReceived callback, void *callback_baton) { - LLDB_RECORD_METHOD(bool, SBCommunication, SetReadThreadBytesReceivedCallback, - (lldb::SBCommunication::ReadThreadBytesReceived, void *), - callback, callback_baton); + LLDB_INSTRUMENT_VA(this, callback, callback_baton); bool result = false; if (m_opaque) { @@ -171,16 +160,14 @@ bool SBCommunication::SetReadThreadBytesReceivedCallback( } SBBroadcaster SBCommunication::GetBroadcaster() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBroadcaster, SBCommunication, - GetBroadcaster); + LLDB_INSTRUMENT_VA(this); SBBroadcaster broadcaster(m_opaque, false); return broadcaster; } const char *SBCommunication::GetBroadcasterClass() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBCommunication, - GetBroadcasterClass); + LLDB_INSTRUMENT(); return Communication::GetStaticBroadcasterClass().AsCString(); } diff --git a/lldb/source/API/SBCompileUnit.cpp b/lldb/source/API/SBCompileUnit.cpp index f3f3246ef2b5e..46a319c6b7a3a 100644 --- a/lldb/source/API/SBCompileUnit.cpp +++ b/lldb/source/API/SBCompileUnit.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBCompileUnit.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBLineEntry.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Module.h" @@ -17,26 +16,23 @@ #include "lldb/Symbol/SymbolFile.h" #include "lldb/Symbol/Type.h" #include "lldb/Symbol/TypeList.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; -SBCompileUnit::SBCompileUnit() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCompileUnit); -} +SBCompileUnit::SBCompileUnit() { LLDB_INSTRUMENT_VA(this); } SBCompileUnit::SBCompileUnit(lldb_private::CompileUnit *lldb_object_ptr) : m_opaque_ptr(lldb_object_ptr) {} SBCompileUnit::SBCompileUnit(const SBCompileUnit &rhs) : m_opaque_ptr(rhs.m_opaque_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBCompileUnit, (const lldb::SBCompileUnit &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBCompileUnit &SBCompileUnit::operator=(const SBCompileUnit &rhs) { - LLDB_RECORD_METHOD(const lldb::SBCompileUnit &, - SBCompileUnit, operator=,(const lldb::SBCompileUnit &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_ptr = rhs.m_opaque_ptr; return *this; @@ -45,8 +41,7 @@ const SBCompileUnit &SBCompileUnit::operator=(const SBCompileUnit &rhs) { SBCompileUnit::~SBCompileUnit() { m_opaque_ptr = nullptr; } SBFileSpec SBCompileUnit::GetFileSpec() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBCompileUnit, - GetFileSpec); + LLDB_INSTRUMENT_VA(this); SBFileSpec file_spec; if (m_opaque_ptr) @@ -55,7 +50,7 @@ SBFileSpec SBCompileUnit::GetFileSpec() const { } uint32_t SBCompileUnit::GetNumLineEntries() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBCompileUnit, GetNumLineEntries); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) { LineTable *line_table = m_opaque_ptr->GetLineTable(); @@ -67,8 +62,7 @@ uint32_t SBCompileUnit::GetNumLineEntries() const { } SBLineEntry SBCompileUnit::GetLineEntryAtIndex(uint32_t idx) const { - LLDB_RECORD_METHOD_CONST(lldb::SBLineEntry, SBCompileUnit, - GetLineEntryAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBLineEntry sb_line_entry; if (m_opaque_ptr) { @@ -85,9 +79,7 @@ SBLineEntry SBCompileUnit::GetLineEntryAtIndex(uint32_t idx) const { uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line, SBFileSpec *inline_file_spec) const { - LLDB_RECORD_METHOD_CONST(uint32_t, SBCompileUnit, FindLineEntryIndex, - (uint32_t, uint32_t, lldb::SBFileSpec *), start_idx, - line, inline_file_spec); + LLDB_INSTRUMENT_VA(this, start_idx, line, inline_file_spec); const bool exact = true; return FindLineEntryIndex(start_idx, line, inline_file_spec, exact); @@ -96,9 +88,7 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line, uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line, SBFileSpec *inline_file_spec, bool exact) const { - LLDB_RECORD_METHOD_CONST(uint32_t, SBCompileUnit, FindLineEntryIndex, - (uint32_t, uint32_t, lldb::SBFileSpec *, bool), - start_idx, line, inline_file_spec, exact); + LLDB_INSTRUMENT_VA(this, start_idx, line, inline_file_spec, exact); uint32_t index = UINT32_MAX; if (m_opaque_ptr) { @@ -118,7 +108,7 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line, } uint32_t SBCompileUnit::GetNumSupportFiles() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBCompileUnit, GetNumSupportFiles); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetSupportFiles().GetSize(); @@ -127,8 +117,7 @@ uint32_t SBCompileUnit::GetNumSupportFiles() const { } lldb::SBTypeList SBCompileUnit::GetTypes(uint32_t type_mask) { - LLDB_RECORD_METHOD(lldb::SBTypeList, SBCompileUnit, GetTypes, (uint32_t), - type_mask); + LLDB_INSTRUMENT_VA(this, type_mask); SBTypeList sb_type_list; @@ -151,8 +140,7 @@ lldb::SBTypeList SBCompileUnit::GetTypes(uint32_t type_mask) { } SBFileSpec SBCompileUnit::GetSupportFileAtIndex(uint32_t idx) const { - LLDB_RECORD_METHOD_CONST(lldb::SBFileSpec, SBCompileUnit, - GetSupportFileAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBFileSpec sb_file_spec; if (m_opaque_ptr) { @@ -166,9 +154,7 @@ SBFileSpec SBCompileUnit::GetSupportFileAtIndex(uint32_t idx) const { uint32_t SBCompileUnit::FindSupportFileIndex(uint32_t start_idx, const SBFileSpec &sb_file, bool full) { - LLDB_RECORD_METHOD(uint32_t, SBCompileUnit, FindSupportFileIndex, - (uint32_t, const lldb::SBFileSpec &, bool), start_idx, - sb_file, full); + LLDB_INSTRUMENT_VA(this, start_idx, sb_file, full); if (m_opaque_ptr) { const FileSpecList &support_files = m_opaque_ptr->GetSupportFiles(); @@ -178,7 +164,7 @@ uint32_t SBCompileUnit::FindSupportFileIndex(uint32_t start_idx, } lldb::LanguageType SBCompileUnit::GetLanguage() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::LanguageType, SBCompileUnit, GetLanguage); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetLanguage(); @@ -186,25 +172,23 @@ lldb::LanguageType SBCompileUnit::GetLanguage() { } bool SBCompileUnit::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCompileUnit, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBCompileUnit::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCompileUnit, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr != nullptr; } bool SBCompileUnit::operator==(const SBCompileUnit &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBCompileUnit, operator==,(const lldb::SBCompileUnit &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr == rhs.m_opaque_ptr; } bool SBCompileUnit::operator!=(const SBCompileUnit &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBCompileUnit, operator!=,(const lldb::SBCompileUnit &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr != rhs.m_opaque_ptr; } @@ -224,8 +208,7 @@ void SBCompileUnit::reset(lldb_private::CompileUnit *lldb_object_ptr) { } bool SBCompileUnit::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBCompileUnit, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBData.cpp b/lldb/source/API/SBData.cpp index e58f032a4f9cb..5232bdde1dedd 100644 --- a/lldb/source/API/SBData.cpp +++ b/lldb/source/API/SBData.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBData.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBError.h" #include "lldb/API/SBStream.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Core/DumpDataExtractor.h" #include "lldb/Utility/DataBufferHeap.h" @@ -23,18 +23,17 @@ using namespace lldb; using namespace lldb_private; SBData::SBData() : m_opaque_sp(new DataExtractor()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBData); + LLDB_INSTRUMENT_VA(this); } SBData::SBData(const lldb::DataExtractorSP &data_sp) : m_opaque_sp(data_sp) {} SBData::SBData(const SBData &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBData, (const lldb::SBData &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBData &SBData::operator=(const SBData &rhs) { - LLDB_RECORD_METHOD(const lldb::SBData &, - SBData, operator=,(const lldb::SBData &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; @@ -58,17 +57,17 @@ lldb::DataExtractorSP &SBData::operator*() { return m_opaque_sp; } const lldb::DataExtractorSP &SBData::operator*() const { return m_opaque_sp; } bool SBData::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBData, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBData::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBData, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } uint8_t SBData::GetAddressByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint8_t, SBData, GetAddressByteSize); + LLDB_INSTRUMENT_VA(this); uint8_t value = 0; if (m_opaque_sp.get()) @@ -77,22 +76,21 @@ uint8_t SBData::GetAddressByteSize() { } void SBData::SetAddressByteSize(uint8_t addr_byte_size) { - LLDB_RECORD_METHOD(void, SBData, SetAddressByteSize, (uint8_t), - addr_byte_size); + LLDB_INSTRUMENT_VA(this, addr_byte_size); if (m_opaque_sp.get()) m_opaque_sp->SetAddressByteSize(addr_byte_size); } void SBData::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBData, Clear); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp.get()) m_opaque_sp->Clear(); } size_t SBData::GetByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBData, GetByteSize); + LLDB_INSTRUMENT_VA(this); size_t value = 0; if (m_opaque_sp.get()) @@ -101,7 +99,7 @@ size_t SBData::GetByteSize() { } lldb::ByteOrder SBData::GetByteOrder() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::ByteOrder, SBData, GetByteOrder); + LLDB_INSTRUMENT_VA(this); lldb::ByteOrder value = eByteOrderInvalid; if (m_opaque_sp.get()) @@ -110,15 +108,14 @@ lldb::ByteOrder SBData::GetByteOrder() { } void SBData::SetByteOrder(lldb::ByteOrder endian) { - LLDB_RECORD_METHOD(void, SBData, SetByteOrder, (lldb::ByteOrder), endian); + LLDB_INSTRUMENT_VA(this, endian); if (m_opaque_sp.get()) m_opaque_sp->SetByteOrder(endian); } float SBData::GetFloat(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(float, SBData, GetFloat, (lldb::SBError &, lldb::offset_t), - error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); float value = 0; if (!m_opaque_sp.get()) { @@ -133,8 +130,7 @@ float SBData::GetFloat(lldb::SBError &error, lldb::offset_t offset) { } double SBData::GetDouble(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(double, SBData, GetDouble, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); double value = 0; if (!m_opaque_sp.get()) { @@ -149,8 +145,7 @@ double SBData::GetDouble(lldb::SBError &error, lldb::offset_t offset) { } long double SBData::GetLongDouble(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(long double, SBData, GetLongDouble, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); long double value = 0; if (!m_opaque_sp.get()) { @@ -165,8 +160,7 @@ long double SBData::GetLongDouble(lldb::SBError &error, lldb::offset_t offset) { } lldb::addr_t SBData::GetAddress(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(lldb::addr_t, SBData, GetAddress, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); lldb::addr_t value = 0; if (!m_opaque_sp.get()) { @@ -181,8 +175,7 @@ lldb::addr_t SBData::GetAddress(lldb::SBError &error, lldb::offset_t offset) { } uint8_t SBData::GetUnsignedInt8(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(uint8_t, SBData, GetUnsignedInt8, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); uint8_t value = 0; if (!m_opaque_sp.get()) { @@ -197,8 +190,7 @@ uint8_t SBData::GetUnsignedInt8(lldb::SBError &error, lldb::offset_t offset) { } uint16_t SBData::GetUnsignedInt16(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(uint16_t, SBData, GetUnsignedInt16, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); uint16_t value = 0; if (!m_opaque_sp.get()) { @@ -213,8 +205,7 @@ uint16_t SBData::GetUnsignedInt16(lldb::SBError &error, lldb::offset_t offset) { } uint32_t SBData::GetUnsignedInt32(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(uint32_t, SBData, GetUnsignedInt32, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); uint32_t value = 0; if (!m_opaque_sp.get()) { @@ -229,8 +220,7 @@ uint32_t SBData::GetUnsignedInt32(lldb::SBError &error, lldb::offset_t offset) { } uint64_t SBData::GetUnsignedInt64(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(uint64_t, SBData, GetUnsignedInt64, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); uint64_t value = 0; if (!m_opaque_sp.get()) { @@ -245,8 +235,7 @@ uint64_t SBData::GetUnsignedInt64(lldb::SBError &error, lldb::offset_t offset) { } int8_t SBData::GetSignedInt8(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(int8_t, SBData, GetSignedInt8, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); int8_t value = 0; if (!m_opaque_sp.get()) { @@ -261,8 +250,7 @@ int8_t SBData::GetSignedInt8(lldb::SBError &error, lldb::offset_t offset) { } int16_t SBData::GetSignedInt16(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(int16_t, SBData, GetSignedInt16, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); int16_t value = 0; if (!m_opaque_sp.get()) { @@ -277,8 +265,7 @@ int16_t SBData::GetSignedInt16(lldb::SBError &error, lldb::offset_t offset) { } int32_t SBData::GetSignedInt32(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(int32_t, SBData, GetSignedInt32, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); int32_t value = 0; if (!m_opaque_sp.get()) { @@ -293,8 +280,7 @@ int32_t SBData::GetSignedInt32(lldb::SBError &error, lldb::offset_t offset) { } int64_t SBData::GetSignedInt64(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(int64_t, SBData, GetSignedInt64, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); int64_t value = 0; if (!m_opaque_sp.get()) { @@ -309,8 +295,7 @@ int64_t SBData::GetSignedInt64(lldb::SBError &error, lldb::offset_t offset) { } const char *SBData::GetString(lldb::SBError &error, lldb::offset_t offset) { - LLDB_RECORD_METHOD(const char *, SBData, GetString, - (lldb::SBError &, lldb::offset_t), error, offset); + LLDB_INSTRUMENT_VA(this, error, offset); const char *value = nullptr; if (!m_opaque_sp.get()) { @@ -326,8 +311,7 @@ const char *SBData::GetString(lldb::SBError &error, lldb::offset_t offset) { bool SBData::GetDescription(lldb::SBStream &description, lldb::addr_t base_addr) { - LLDB_RECORD_METHOD(bool, SBData, GetDescription, - (lldb::SBStream &, lldb::addr_t), description, base_addr); + LLDB_INSTRUMENT_VA(this, description, base_addr); Stream &strm = description.ref(); @@ -342,9 +326,7 @@ bool SBData::GetDescription(lldb::SBStream &description, size_t SBData::ReadRawData(lldb::SBError &error, lldb::offset_t offset, void *buf, size_t size) { - LLDB_RECORD_METHOD(size_t, SBData, ReadRawData, - (lldb::SBError &, lldb::offset_t, void *, size_t), error, - offset, buf, size); + LLDB_INSTRUMENT_VA(this, error, offset, buf, size); void *ok = nullptr; if (!m_opaque_sp.get()) { @@ -360,10 +342,7 @@ size_t SBData::ReadRawData(lldb::SBError &error, lldb::offset_t offset, void SBData::SetData(lldb::SBError &error, const void *buf, size_t size, lldb::ByteOrder endian, uint8_t addr_size) { - LLDB_RECORD_METHOD( - void, SBData, SetData, - (lldb::SBError &, const void *, size_t, lldb::ByteOrder, uint8_t), error, - buf, size, endian, addr_size); + LLDB_INSTRUMENT_VA(this, error, buf, size, endian, addr_size); if (!m_opaque_sp.get()) m_opaque_sp = std::make_shared(buf, size, endian, addr_size); @@ -377,10 +356,7 @@ void SBData::SetData(lldb::SBError &error, const void *buf, size_t size, void SBData::SetDataWithOwnership(lldb::SBError &error, const void *buf, size_t size, lldb::ByteOrder endian, uint8_t addr_size) { - LLDB_RECORD_METHOD( - void, SBData, SetData, - (lldb::SBError &, const void *, size_t, lldb::ByteOrder, uint8_t, bool), - error, buf, size, endian, addr_size); + LLDB_INSTRUMENT_VA(this, error, buf, size, endian, addr_size); lldb::DataBufferSP buffer_sp = std::make_shared(buf, size); @@ -394,7 +370,7 @@ void SBData::SetDataWithOwnership(lldb::SBError &error, const void *buf, } bool SBData::Append(const SBData &rhs) { - LLDB_RECORD_METHOD(bool, SBData, Append, (const lldb::SBData &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); bool value = false; if (m_opaque_sp.get() && rhs.m_opaque_sp.get()) @@ -405,9 +381,7 @@ bool SBData::Append(const SBData &rhs) { lldb::SBData SBData::CreateDataFromCString(lldb::ByteOrder endian, uint32_t addr_byte_size, const char *data) { - LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromCString, - (lldb::ByteOrder, uint32_t, const char *), endian, - addr_byte_size, data); + LLDB_INSTRUMENT_VA(endian, addr_byte_size, data); if (!data || !data[0]) return SBData(); @@ -427,9 +401,7 @@ lldb::SBData SBData::CreateDataFromUInt64Array(lldb::ByteOrder endian, uint32_t addr_byte_size, uint64_t *array, size_t array_len) { - LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromUInt64Array, - (lldb::ByteOrder, uint32_t, uint64_t *, size_t), - endian, addr_byte_size, array, array_len); + LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len); if (!array || array_len == 0) return SBData(); @@ -449,9 +421,7 @@ lldb::SBData SBData::CreateDataFromUInt32Array(lldb::ByteOrder endian, uint32_t addr_byte_size, uint32_t *array, size_t array_len) { - LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromUInt32Array, - (lldb::ByteOrder, uint32_t, uint32_t *, size_t), - endian, addr_byte_size, array, array_len); + LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len); if (!array || array_len == 0) return SBData(); @@ -471,9 +441,7 @@ lldb::SBData SBData::CreateDataFromSInt64Array(lldb::ByteOrder endian, uint32_t addr_byte_size, int64_t *array, size_t array_len) { - LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromSInt64Array, - (lldb::ByteOrder, uint32_t, int64_t *, size_t), - endian, addr_byte_size, array, array_len); + LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len); if (!array || array_len == 0) return SBData(); @@ -493,9 +461,7 @@ lldb::SBData SBData::CreateDataFromSInt32Array(lldb::ByteOrder endian, uint32_t addr_byte_size, int32_t *array, size_t array_len) { - LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromSInt32Array, - (lldb::ByteOrder, uint32_t, int32_t *, size_t), - endian, addr_byte_size, array, array_len); + LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len); if (!array || array_len == 0) return SBData(); @@ -515,9 +481,7 @@ lldb::SBData SBData::CreateDataFromDoubleArray(lldb::ByteOrder endian, uint32_t addr_byte_size, double *array, size_t array_len) { - LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromDoubleArray, - (lldb::ByteOrder, uint32_t, double *, size_t), - endian, addr_byte_size, array, array_len); + LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len); if (!array || array_len == 0) return SBData(); @@ -534,8 +498,7 @@ lldb::SBData SBData::CreateDataFromDoubleArray(lldb::ByteOrder endian, } bool SBData::SetDataFromCString(const char *data) { - LLDB_RECORD_METHOD(bool, SBData, SetDataFromCString, (const char *), data); - + LLDB_INSTRUMENT_VA(this, data); if (!data) { return false; @@ -556,9 +519,7 @@ bool SBData::SetDataFromCString(const char *data) { } bool SBData::SetDataFromUInt64Array(uint64_t *array, size_t array_len) { - LLDB_RECORD_METHOD(bool, SBData, SetDataFromUInt64Array, (uint64_t *, size_t), - array, array_len); - + LLDB_INSTRUMENT_VA(this, array, array_len); if (!array || array_len == 0) { return false; @@ -579,9 +540,7 @@ bool SBData::SetDataFromUInt64Array(uint64_t *array, size_t array_len) { } bool SBData::SetDataFromUInt32Array(uint32_t *array, size_t array_len) { - LLDB_RECORD_METHOD(bool, SBData, SetDataFromUInt32Array, (uint32_t *, size_t), - array, array_len); - + LLDB_INSTRUMENT_VA(this, array, array_len); if (!array || array_len == 0) { return false; @@ -601,9 +560,7 @@ bool SBData::SetDataFromUInt32Array(uint32_t *array, size_t array_len) { } bool SBData::SetDataFromSInt64Array(int64_t *array, size_t array_len) { - LLDB_RECORD_METHOD(bool, SBData, SetDataFromSInt64Array, (int64_t *, size_t), - array, array_len); - + LLDB_INSTRUMENT_VA(this, array, array_len); if (!array || array_len == 0) { return false; @@ -623,9 +580,7 @@ bool SBData::SetDataFromSInt64Array(int64_t *array, size_t array_len) { } bool SBData::SetDataFromSInt32Array(int32_t *array, size_t array_len) { - LLDB_RECORD_METHOD(bool, SBData, SetDataFromSInt32Array, (int32_t *, size_t), - array, array_len); - + LLDB_INSTRUMENT_VA(this, array, array_len); if (!array || array_len == 0) { return false; @@ -645,9 +600,7 @@ bool SBData::SetDataFromSInt32Array(int32_t *array, size_t array_len) { } bool SBData::SetDataFromDoubleArray(double *array, size_t array_len) { - LLDB_RECORD_METHOD(bool, SBData, SetDataFromDoubleArray, (double *, size_t), - array, array_len); - + LLDB_INSTRUMENT_VA(this, array, array_len); if (!array || array_len == 0) { return false; diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index e8d723810fb4b..8b09d6a8e435c 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "lldb/Utility/ReproducerInstrumentation.h" #include "SystemInitializerFull.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBDebugger.h" @@ -105,43 +105,34 @@ SBError SBInputReader::Initialize( unsigned long), void *a, lldb::InputReaderGranularity b, char const *c, char const *d, bool e) { - LLDB_RECORD_METHOD( - lldb::SBError, SBInputReader, Initialize, - (lldb::SBDebugger &, - unsigned long (*)(void *, lldb::SBInputReader *, lldb::InputReaderAction, - const char *, unsigned long), - void *, lldb::InputReaderGranularity, const char *, const char *, bool), - sb_debugger, callback, a, b, c, d, e); + LLDB_INSTRUMENT_VA(this, sb_debugger, callback, a, b, c, d, e); return SBError(); } -void SBInputReader::SetIsDone(bool b) { - LLDB_RECORD_METHOD(void, SBInputReader, SetIsDone, (bool), b); -} +void SBInputReader::SetIsDone(bool b) { LLDB_INSTRUMENT_VA(this, b); } bool SBInputReader::IsActive() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInputReader, IsActive); + LLDB_INSTRUMENT_VA(this); return false; } -SBDebugger::SBDebugger() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBDebugger); } +SBDebugger::SBDebugger() { LLDB_INSTRUMENT_VA(this); } SBDebugger::SBDebugger(const lldb::DebuggerSP &debugger_sp) : m_opaque_sp(debugger_sp) { - LLDB_RECORD_CONSTRUCTOR(SBDebugger, (const lldb::DebuggerSP &), debugger_sp); + LLDB_INSTRUMENT_VA(this, debugger_sp); } SBDebugger::SBDebugger(const SBDebugger &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBDebugger, (const lldb::SBDebugger &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBDebugger::~SBDebugger() = default; SBDebugger &SBDebugger::operator=(const SBDebugger &rhs) { - LLDB_RECORD_METHOD(lldb::SBDebugger &, - SBDebugger, operator=,(const lldb::SBDebugger &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -150,8 +141,7 @@ SBDebugger &SBDebugger::operator=(const SBDebugger &rhs) { } const char *SBDebugger::GetBroadcasterClass() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBDebugger, - GetBroadcasterClass); + LLDB_INSTRUMENT(); return Debugger::GetStaticBroadcasterClass().AsCString(); } @@ -161,6 +151,8 @@ const char *SBDebugger::GetProgressFromEvent(const lldb::SBEvent &event, uint64_t &completed, uint64_t &total, bool &is_debugger_specific) { + LLDB_INSTRUMENT_VA(event, progress_id, completed, total, + is_debugger_specific); const Debugger::ProgressEventData *progress_data = Debugger::ProgressEventData::GetEventDataFromEvent(event.get()); if (progress_data == nullptr) @@ -169,29 +161,22 @@ const char *SBDebugger::GetProgressFromEvent(const lldb::SBEvent &event, completed = progress_data->GetCompleted(); total = progress_data->GetTotal(); is_debugger_specific = progress_data->IsDebuggerSpecific(); - // We must record the static method _after_ the out parameters have been - // filled in. - LLDB_RECORD_STATIC_METHOD( - const char *, SBDebugger, GetProgressFromEvent, - (const lldb::SBEvent &, uint64_t &, uint64_t &, uint64_t &, bool &), - event, progress_id, completed, total, is_debugger_specific); return progress_data->GetMessage().c_str(); } SBBroadcaster SBDebugger::GetBroadcaster() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBroadcaster, SBDebugger, GetBroadcaster); + LLDB_INSTRUMENT_VA(this); SBBroadcaster broadcaster(&m_opaque_sp->GetBroadcaster(), false); return broadcaster; } void SBDebugger::Initialize() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBDebugger, Initialize); + LLDB_INSTRUMENT(); SBError ignored = SBDebugger::InitializeWithErrorHandling(); } lldb::SBError SBDebugger::InitializeWithErrorHandling() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBError, SBDebugger, - InitializeWithErrorHandling); + LLDB_INSTRUMENT(); SBError error; if (auto e = g_debugger_lifetime->Initialize( @@ -202,13 +187,13 @@ lldb::SBError SBDebugger::InitializeWithErrorHandling() { } void SBDebugger::Terminate() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBDebugger, Terminate); + LLDB_INSTRUMENT(); g_debugger_lifetime->Terminate(); } void SBDebugger::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, Clear); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) m_opaque_sp->ClearIOHandlers(); @@ -217,14 +202,13 @@ void SBDebugger::Clear() { } SBDebugger SBDebugger::Create() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBDebugger, SBDebugger, Create); + LLDB_INSTRUMENT(); return SBDebugger::Create(false, nullptr, nullptr); } SBDebugger SBDebugger::Create(bool source_init_files) { - LLDB_RECORD_STATIC_METHOD(lldb::SBDebugger, SBDebugger, Create, (bool), - source_init_files); + LLDB_INSTRUMENT_VA(source_init_files); return SBDebugger::Create(source_init_files, nullptr, nullptr); } @@ -233,9 +217,7 @@ SBDebugger SBDebugger::Create(bool source_init_files, lldb::LogOutputCallback callback, void *baton) { - LLDB_RECORD_STATIC_METHOD(lldb::SBDebugger, SBDebugger, Create, - (bool, lldb::LogOutputCallback, void *), - source_init_files, callback, baton); + LLDB_INSTRUMENT_VA(source_init_files, callback, baton); SBDebugger debugger; @@ -263,8 +245,7 @@ SBDebugger SBDebugger::Create(bool source_init_files, } void SBDebugger::Destroy(SBDebugger &debugger) { - LLDB_RECORD_STATIC_METHOD(void, SBDebugger, Destroy, (lldb::SBDebugger &), - debugger); + LLDB_INSTRUMENT_VA(debugger); Debugger::Destroy(debugger.m_opaque_sp); @@ -273,7 +254,7 @@ void SBDebugger::Destroy(SBDebugger &debugger) { } void SBDebugger::MemoryPressureDetected() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBDebugger, MemoryPressureDetected); + LLDB_INSTRUMENT(); // Since this function can be call asynchronously, we allow it to be non- // mandatory. We have seen deadlocks with this function when called so we @@ -286,52 +267,51 @@ void SBDebugger::MemoryPressureDetected() { } bool SBDebugger::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBDebugger::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } void SBDebugger::SetAsync(bool b) { - LLDB_RECORD_METHOD(void, SBDebugger, SetAsync, (bool), b); + LLDB_INSTRUMENT_VA(this, b); if (m_opaque_sp) m_opaque_sp->SetAsyncExecution(b); } bool SBDebugger::GetAsync() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBDebugger, GetAsync); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetAsyncExecution() : false); } void SBDebugger::SkipLLDBInitFiles(bool b) { - LLDB_RECORD_METHOD(void, SBDebugger, SkipLLDBInitFiles, (bool), b); + LLDB_INSTRUMENT_VA(this, b); if (m_opaque_sp) m_opaque_sp->GetCommandInterpreter().SkipLLDBInitFiles(b); } void SBDebugger::SkipAppInitFiles(bool b) { - LLDB_RECORD_METHOD(void, SBDebugger, SkipAppInitFiles, (bool), b); + LLDB_INSTRUMENT_VA(this, b); if (m_opaque_sp) m_opaque_sp->GetCommandInterpreter().SkipAppInitFiles(b); } void SBDebugger::SetInputFileHandle(FILE *fh, bool transfer_ownership) { - LLDB_RECORD_METHOD(void, SBDebugger, SetInputFileHandle, (FILE *, bool), fh, - transfer_ownership); + LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); if (m_opaque_sp) m_opaque_sp->SetInputFile( (FileSP)std::make_shared(fh, transfer_ownership)); } SBError SBDebugger::SetInputString(const char *data) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputString, (const char *), data); + LLDB_INSTRUMENT_VA(this, data); SBError sb_error; if (data == nullptr) { sb_error.SetErrorString("String data is null"); @@ -357,7 +337,7 @@ SBError SBDebugger::SetInputString(const char *data) { // of problems; don't want users trying to switch modes in the middle of a // debugging session. SBError SBDebugger::SetInputFile(SBFile file) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (SBFile), file); + LLDB_INSTRUMENT_VA(this, file); SBError error; if (!m_opaque_sp) { @@ -369,23 +349,22 @@ SBError SBDebugger::SetInputFile(SBFile file) { } SBError SBDebugger::SetInputFile(FileSP file_sp) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); return SetInputFile(SBFile(file_sp)); } SBError SBDebugger::SetOutputFile(FileSP file_sp) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (FileSP), file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); return SetOutputFile(SBFile(file_sp)); } void SBDebugger::SetOutputFileHandle(FILE *fh, bool transfer_ownership) { - LLDB_RECORD_METHOD(void, SBDebugger, SetOutputFileHandle, (FILE *, bool), fh, - transfer_ownership); + LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); SetOutputFile((FileSP)std::make_shared(fh, transfer_ownership)); } SBError SBDebugger::SetOutputFile(SBFile file) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (SBFile file), file); + LLDB_INSTRUMENT_VA(this, file); SBError error; if (!m_opaque_sp) { error.ref().SetErrorString("invalid debugger"); @@ -400,18 +379,17 @@ SBError SBDebugger::SetOutputFile(SBFile file) { } void SBDebugger::SetErrorFileHandle(FILE *fh, bool transfer_ownership) { - LLDB_RECORD_METHOD(void, SBDebugger, SetErrorFileHandle, (FILE *, bool), fh, - transfer_ownership); + LLDB_INSTRUMENT_VA(this, fh, transfer_ownership); SetErrorFile((FileSP)std::make_shared(fh, transfer_ownership)); } SBError SBDebugger::SetErrorFile(FileSP file_sp) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetErrorFile, (FileSP), file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); return SetErrorFile(SBFile(file_sp)); } SBError SBDebugger::SetErrorFile(SBFile file) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetErrorFile, (SBFile file), file); + LLDB_INSTRUMENT_VA(this, file); SBError error; if (!m_opaque_sp) { error.ref().SetErrorString("invalid debugger"); @@ -426,7 +404,7 @@ SBError SBDebugger::SetErrorFile(SBFile file) { } FILE *SBDebugger::GetInputFileHandle() { - LLDB_RECORD_METHOD_NO_ARGS(FILE *, SBDebugger, GetInputFileHandle); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { File &file_sp = m_opaque_sp->GetInputFile(); return file_sp.GetStream(); @@ -435,7 +413,7 @@ FILE *SBDebugger::GetInputFileHandle() { } SBFile SBDebugger::GetInputFile() { - LLDB_RECORD_METHOD_NO_ARGS(SBFile, SBDebugger, GetInputFile); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { return SBFile(m_opaque_sp->GetInputFileSP()); } @@ -443,7 +421,7 @@ SBFile SBDebugger::GetInputFile() { } FILE *SBDebugger::GetOutputFileHandle() { - LLDB_RECORD_METHOD_NO_ARGS(FILE *, SBDebugger, GetOutputFileHandle); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { StreamFile &stream_file = m_opaque_sp->GetOutputStream(); return stream_file.GetFile().GetStream(); @@ -452,7 +430,7 @@ FILE *SBDebugger::GetOutputFileHandle() { } SBFile SBDebugger::GetOutputFile() { - LLDB_RECORD_METHOD_NO_ARGS(SBFile, SBDebugger, GetOutputFile); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { SBFile file(m_opaque_sp->GetOutputStream().GetFileSP()); return file; @@ -461,7 +439,7 @@ SBFile SBDebugger::GetOutputFile() { } FILE *SBDebugger::GetErrorFileHandle() { - LLDB_RECORD_METHOD_NO_ARGS(FILE *, SBDebugger, GetErrorFileHandle); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { StreamFile &stream_file = m_opaque_sp->GetErrorStream(); @@ -471,7 +449,7 @@ FILE *SBDebugger::GetErrorFileHandle() { } SBFile SBDebugger::GetErrorFile() { - LLDB_RECORD_METHOD_NO_ARGS(SBFile, SBDebugger, GetErrorFile); + LLDB_INSTRUMENT_VA(this); SBFile file; if (m_opaque_sp) { SBFile file(m_opaque_sp->GetErrorStream().GetFileSP()); @@ -481,21 +459,20 @@ SBFile SBDebugger::GetErrorFile() { } void SBDebugger::SaveInputTerminalState() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, SaveInputTerminalState); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) m_opaque_sp->SaveInputTerminalState(); } void SBDebugger::RestoreInputTerminalState() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, RestoreInputTerminalState); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) m_opaque_sp->RestoreInputTerminalState(); } SBCommandInterpreter SBDebugger::GetCommandInterpreter() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBCommandInterpreter, SBDebugger, - GetCommandInterpreter); + LLDB_INSTRUMENT_VA(this); SBCommandInterpreter sb_interpreter; if (m_opaque_sp) @@ -505,7 +482,7 @@ SBCommandInterpreter SBDebugger::GetCommandInterpreter() { } void SBDebugger::HandleCommand(const char *command) { - LLDB_RECORD_METHOD(void, SBDebugger, HandleCommand, (const char *), command); + LLDB_INSTRUMENT_VA(this, command); if (m_opaque_sp) { TargetSP target_sp(m_opaque_sp->GetSelectedTarget()); @@ -538,7 +515,7 @@ void SBDebugger::HandleCommand(const char *command) { } SBListener SBDebugger::GetListener() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBListener, SBDebugger, GetListener); + LLDB_INSTRUMENT_VA(this); SBListener sb_listener; if (m_opaque_sp) @@ -550,10 +527,7 @@ SBListener SBDebugger::GetListener() { void SBDebugger::HandleProcessEvent(const SBProcess &process, const SBEvent &event, SBFile out, SBFile err) { - LLDB_RECORD_METHOD( - void, SBDebugger, HandleProcessEvent, - (const lldb::SBProcess &, const lldb::SBEvent &, SBFile, SBFile), process, - event, out, err); + LLDB_INSTRUMENT_VA(this, process, event, out, err); return HandleProcessEvent(process, event, out.m_opaque_sp, err.m_opaque_sp); } @@ -561,10 +535,7 @@ void SBDebugger::HandleProcessEvent(const SBProcess &process, void SBDebugger::HandleProcessEvent(const SBProcess &process, const SBEvent &event, FILE *out, FILE *err) { - LLDB_RECORD_METHOD( - void, SBDebugger, HandleProcessEvent, - (const lldb::SBProcess &, const lldb::SBEvent &, FILE *, FILE *), process, - event, out, err); + LLDB_INSTRUMENT_VA(this, process, event, out, err); FileSP outfile = std::make_shared(out, false); FileSP errfile = std::make_shared(err, false); @@ -575,10 +546,7 @@ void SBDebugger::HandleProcessEvent(const SBProcess &process, const SBEvent &event, FileSP out_sp, FileSP err_sp) { - LLDB_RECORD_METHOD( - void, SBDebugger, HandleProcessEvent, - (const lldb::SBProcess &, const lldb::SBEvent &, FileSP, FileSP), process, - event, out_sp, err_sp); + LLDB_INSTRUMENT_VA(this, process, event, out_sp, err_sp); if (!process.IsValid()) return; @@ -622,16 +590,14 @@ void SBDebugger::HandleProcessEvent(const SBProcess &process, } SBSourceManager SBDebugger::GetSourceManager() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSourceManager, SBDebugger, - GetSourceManager); + LLDB_INSTRUMENT_VA(this); SBSourceManager sb_source_manager(*this); return sb_source_manager; } bool SBDebugger::GetDefaultArchitecture(char *arch_name, size_t arch_name_len) { - LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, GetDefaultArchitecture, - (char *, size_t), arch_name, "", arch_name_len); + LLDB_INSTRUMENT_VA(arch_name, arch_name_len); if (arch_name && arch_name_len) { ArchSpec default_arch = Target::GetDefaultArchitecture(); @@ -652,8 +618,7 @@ bool SBDebugger::GetDefaultArchitecture(char *arch_name, size_t arch_name_len) { } bool SBDebugger::SetDefaultArchitecture(const char *arch_name) { - LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, SetDefaultArchitecture, - (const char *), arch_name); + LLDB_INSTRUMENT_VA(arch_name); if (arch_name) { ArchSpec arch(arch_name); @@ -667,8 +632,7 @@ bool SBDebugger::SetDefaultArchitecture(const char *arch_name) { ScriptLanguage SBDebugger::GetScriptingLanguage(const char *script_language_name) { - LLDB_RECORD_METHOD(lldb::ScriptLanguage, SBDebugger, GetScriptingLanguage, - (const char *), script_language_name); + LLDB_INSTRUMENT_VA(this, script_language_name); if (!script_language_name) return eScriptLanguageDefault; @@ -678,8 +642,7 @@ SBDebugger::GetScriptingLanguage(const char *script_language_name) { SBStructuredData SBDebugger::GetScriptInterpreterInfo(lldb::ScriptLanguage language) { - LLDB_RECORD_METHOD(SBStructuredData, SBDebugger, GetScriptInterpreterInfo, - (lldb::ScriptLanguage), language); + LLDB_INSTRUMENT_VA(this, language); SBStructuredData data; if (m_opaque_sp) { lldb_private::ScriptInterpreter *interp = @@ -692,14 +655,13 @@ SBDebugger::GetScriptInterpreterInfo(lldb::ScriptLanguage language) { } const char *SBDebugger::GetVersionString() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBDebugger, GetVersionString); + LLDB_INSTRUMENT(); return lldb_private::GetVersion(); } const char *SBDebugger::StateAsCString(StateType state) { - LLDB_RECORD_STATIC_METHOD(const char *, SBDebugger, StateAsCString, - (lldb::StateType), state); + LLDB_INSTRUMENT_VA(state); return lldb_private::StateAsCString(state); } @@ -725,8 +687,7 @@ static void AddLLVMTargets(StructuredData::Dictionary &dict) { } SBStructuredData SBDebugger::GetBuildConfiguration() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBStructuredData, SBDebugger, - GetBuildConfiguration); + LLDB_INSTRUMENT(); auto config_up = std::make_unique(); AddBoolConfigEntry( @@ -758,8 +719,7 @@ SBStructuredData SBDebugger::GetBuildConfiguration() { } bool SBDebugger::StateIsRunningState(StateType state) { - LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, StateIsRunningState, - (lldb::StateType), state); + LLDB_INSTRUMENT_VA(state); const bool result = lldb_private::StateIsRunningState(state); @@ -767,8 +727,7 @@ bool SBDebugger::StateIsRunningState(StateType state) { } bool SBDebugger::StateIsStoppedState(StateType state) { - LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, StateIsStoppedState, - (lldb::StateType), state); + LLDB_INSTRUMENT_VA(state); const bool result = lldb_private::StateIsStoppedState(state, false); @@ -780,10 +739,8 @@ lldb::SBTarget SBDebugger::CreateTarget(const char *filename, const char *platform_name, bool add_dependent_modules, lldb::SBError &sb_error) { - LLDB_RECORD_METHOD( - lldb::SBTarget, SBDebugger, CreateTarget, - (const char *, const char *, const char *, bool, lldb::SBError &), - filename, target_triple, platform_name, add_dependent_modules, sb_error); + LLDB_INSTRUMENT_VA(this, filename, target_triple, platform_name, + add_dependent_modules, sb_error); SBTarget sb_target; TargetSP target_sp; @@ -818,9 +775,7 @@ lldb::SBTarget SBDebugger::CreateTarget(const char *filename, SBTarget SBDebugger::CreateTargetWithFileAndTargetTriple(const char *filename, const char *target_triple) { - LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, - CreateTargetWithFileAndTargetTriple, - (const char *, const char *), filename, target_triple); + LLDB_INSTRUMENT_VA(this, filename, target_triple); SBTarget sb_target; TargetSP target_sp; @@ -845,8 +800,7 @@ SBDebugger::CreateTargetWithFileAndTargetTriple(const char *filename, SBTarget SBDebugger::CreateTargetWithFileAndArch(const char *filename, const char *arch_cstr) { - LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, CreateTargetWithFileAndArch, - (const char *, const char *), filename, arch_cstr); + LLDB_INSTRUMENT_VA(this, filename, arch_cstr); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); @@ -858,16 +812,18 @@ SBTarget SBDebugger::CreateTargetWithFileAndArch(const char *filename, // The version of CreateTarget that takes an ArchSpec won't accept an // empty ArchSpec, so when the arch hasn't been specified, we need to // call the target triple version. - error = m_opaque_sp->GetTargetList().CreateTarget(*m_opaque_sp, filename, - arch_cstr, eLoadDependentsYes, nullptr, target_sp); + error = m_opaque_sp->GetTargetList().CreateTarget( + *m_opaque_sp, filename, arch_cstr, eLoadDependentsYes, nullptr, + target_sp); } else { - PlatformSP platform_sp = m_opaque_sp->GetPlatformList() - .GetSelectedPlatform(); - ArchSpec arch = Platform::GetAugmentedArchSpec(platform_sp.get(), - arch_cstr); + PlatformSP platform_sp = + m_opaque_sp->GetPlatformList().GetSelectedPlatform(); + ArchSpec arch = + Platform::GetAugmentedArchSpec(platform_sp.get(), arch_cstr); if (arch.IsValid()) - error = m_opaque_sp->GetTargetList().CreateTarget(*m_opaque_sp, filename, - arch, eLoadDependentsYes, platform_sp, target_sp); + error = m_opaque_sp->GetTargetList().CreateTarget( + *m_opaque_sp, filename, arch, eLoadDependentsYes, platform_sp, + target_sp); else error.SetErrorStringWithFormat("invalid arch_cstr: %s", arch_cstr); } @@ -887,8 +843,7 @@ SBTarget SBDebugger::CreateTargetWithFileAndArch(const char *filename, } SBTarget SBDebugger::CreateTarget(const char *filename) { - LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, CreateTarget, (const char *), - filename); + LLDB_INSTRUMENT_VA(this, filename); SBTarget sb_target; TargetSP target_sp; @@ -912,7 +867,7 @@ SBTarget SBDebugger::CreateTarget(const char *filename) { } SBTarget SBDebugger::GetDummyTarget() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTarget, SBDebugger, GetDummyTarget); + LLDB_INSTRUMENT_VA(this); SBTarget sb_target; if (m_opaque_sp) { @@ -926,8 +881,7 @@ SBTarget SBDebugger::GetDummyTarget() { } bool SBDebugger::DeleteTarget(lldb::SBTarget &target) { - LLDB_RECORD_METHOD(bool, SBDebugger, DeleteTarget, (lldb::SBTarget &), - target); + LLDB_INSTRUMENT_VA(this, target); bool result = false; if (m_opaque_sp) { @@ -949,8 +903,7 @@ bool SBDebugger::DeleteTarget(lldb::SBTarget &target) { } SBTarget SBDebugger::GetTargetAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, GetTargetAtIndex, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); SBTarget sb_target; if (m_opaque_sp) { @@ -961,8 +914,7 @@ SBTarget SBDebugger::GetTargetAtIndex(uint32_t idx) { } uint32_t SBDebugger::GetIndexOfTarget(lldb::SBTarget target) { - LLDB_RECORD_METHOD(uint32_t, SBDebugger, GetIndexOfTarget, (lldb::SBTarget), - target); + LLDB_INSTRUMENT_VA(this, target); lldb::TargetSP target_sp = target.GetSP(); if (!target_sp) @@ -975,8 +927,7 @@ uint32_t SBDebugger::GetIndexOfTarget(lldb::SBTarget target) { } SBTarget SBDebugger::FindTargetWithProcessID(lldb::pid_t pid) { - LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, FindTargetWithProcessID, - (lldb::pid_t), pid); + LLDB_INSTRUMENT_VA(this, pid); SBTarget sb_target; if (m_opaque_sp) { @@ -988,8 +939,7 @@ SBTarget SBDebugger::FindTargetWithProcessID(lldb::pid_t pid) { SBTarget SBDebugger::FindTargetWithFileAndArch(const char *filename, const char *arch_name) { - LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, FindTargetWithFileAndArch, - (const char *, const char *), filename, arch_name); + LLDB_INSTRUMENT_VA(this, filename, arch_name); SBTarget sb_target; if (m_opaque_sp && filename && filename[0]) { @@ -1015,7 +965,7 @@ SBTarget SBDebugger::FindTargetWithLLDBProcess(const ProcessSP &process_sp) { } uint32_t SBDebugger::GetNumTargets() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumTargets); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { // No need to lock, the target list is thread safe @@ -1025,7 +975,7 @@ uint32_t SBDebugger::GetNumTargets() { } SBTarget SBDebugger::GetSelectedTarget() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTarget, SBDebugger, GetSelectedTarget); + LLDB_INSTRUMENT_VA(this); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); @@ -1049,8 +999,7 @@ SBTarget SBDebugger::GetSelectedTarget() { } void SBDebugger::SetSelectedTarget(SBTarget &sb_target) { - LLDB_RECORD_METHOD(void, SBDebugger, SetSelectedTarget, (lldb::SBTarget &), - sb_target); + LLDB_INSTRUMENT_VA(this, sb_target); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); @@ -1068,7 +1017,7 @@ void SBDebugger::SetSelectedTarget(SBTarget &sb_target) { } SBPlatform SBDebugger::GetSelectedPlatform() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBPlatform, SBDebugger, GetSelectedPlatform); + LLDB_INSTRUMENT_VA(this); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); @@ -1085,8 +1034,7 @@ SBPlatform SBDebugger::GetSelectedPlatform() { } void SBDebugger::SetSelectedPlatform(SBPlatform &sb_platform) { - LLDB_RECORD_METHOD(void, SBDebugger, SetSelectedPlatform, - (lldb::SBPlatform &), sb_platform); + LLDB_INSTRUMENT_VA(this, sb_platform); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); @@ -1102,7 +1050,7 @@ void SBDebugger::SetSelectedPlatform(SBPlatform &sb_platform) { } uint32_t SBDebugger::GetNumPlatforms() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumPlatforms); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { // No need to lock, the platform list is thread safe @@ -1112,8 +1060,7 @@ uint32_t SBDebugger::GetNumPlatforms() { } SBPlatform SBDebugger::GetPlatformAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBPlatform, SBDebugger, GetPlatformAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBPlatform sb_platform; if (m_opaque_sp) { @@ -1124,7 +1071,7 @@ SBPlatform SBDebugger::GetPlatformAtIndex(uint32_t idx) { } uint32_t SBDebugger::GetNumAvailablePlatforms() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumAvailablePlatforms); + LLDB_INSTRUMENT_VA(this); uint32_t idx = 0; while (true) { @@ -1138,8 +1085,7 @@ uint32_t SBDebugger::GetNumAvailablePlatforms() { } SBStructuredData SBDebugger::GetAvailablePlatformInfoAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBStructuredData, SBDebugger, - GetAvailablePlatformInfoAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBStructuredData data; auto platform_dict = std::make_unique(); @@ -1169,15 +1115,13 @@ SBStructuredData SBDebugger::GetAvailablePlatformInfoAtIndex(uint32_t idx) { } void SBDebugger::DispatchInput(void *baton, const void *data, size_t data_len) { - LLDB_RECORD_METHOD(void, SBDebugger, DispatchInput, - (void *, const void *, size_t), baton, data, data_len); + LLDB_INSTRUMENT_VA(this, baton, data, data_len); DispatchInput(data, data_len); } void SBDebugger::DispatchInput(const void *data, size_t data_len) { - LLDB_RECORD_METHOD(void, SBDebugger, DispatchInput, (const void *, size_t), - data, data_len); + LLDB_INSTRUMENT_VA(this, data, data_len); // Log *log(GetLogIfAllCategoriesSet (LIBLLDB_LOG_API)); // @@ -1194,28 +1138,26 @@ void SBDebugger::DispatchInput(const void *data, size_t data_len) { } void SBDebugger::DispatchInputInterrupt() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, DispatchInputInterrupt); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) m_opaque_sp->DispatchInputInterrupt(); } void SBDebugger::DispatchInputEndOfFile() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, DispatchInputEndOfFile); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) m_opaque_sp->DispatchInputEndOfFile(); } void SBDebugger::PushInputReader(SBInputReader &reader) { - LLDB_RECORD_METHOD(void, SBDebugger, PushInputReader, (lldb::SBInputReader &), - reader); + LLDB_INSTRUMENT_VA(this, reader); } void SBDebugger::RunCommandInterpreter(bool auto_handle_events, bool spawn_thread) { - LLDB_RECORD_METHOD(void, SBDebugger, RunCommandInterpreter, (bool, bool), - auto_handle_events, spawn_thread); + LLDB_INSTRUMENT_VA(this, auto_handle_events, spawn_thread); if (m_opaque_sp) { CommandInterpreterRunOptions options; @@ -1232,11 +1174,8 @@ void SBDebugger::RunCommandInterpreter(bool auto_handle_events, bool &stopped_for_crash) { - LLDB_RECORD_METHOD(void, SBDebugger, RunCommandInterpreter, - (bool, bool, lldb::SBCommandInterpreterRunOptions &, int &, - bool &, bool &), - auto_handle_events, spawn_thread, options, num_errors, - quit_requested, stopped_for_crash); + LLDB_INSTRUMENT_VA(this, auto_handle_events, spawn_thread, options, + num_errors, quit_requested, stopped_for_crash); if (m_opaque_sp) { options.SetAutoHandleEvents(auto_handle_events); @@ -1254,9 +1193,7 @@ void SBDebugger::RunCommandInterpreter(bool auto_handle_events, SBCommandInterpreterRunResult SBDebugger::RunCommandInterpreter( const SBCommandInterpreterRunOptions &options) { - LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunResult, SBDebugger, - RunCommandInterpreter, - (const lldb::SBCommandInterpreterRunOptions &), options); + LLDB_INSTRUMENT_VA(this, options); if (!m_opaque_sp) return SBCommandInterpreterRunResult(); @@ -1270,9 +1207,7 @@ SBCommandInterpreterRunResult SBDebugger::RunCommandInterpreter( SBError SBDebugger::RunREPL(lldb::LanguageType language, const char *repl_options) { - LLDB_RECORD_METHOD(lldb::SBError, SBDebugger, RunREPL, - (lldb::LanguageType, const char *), language, - repl_options); + LLDB_INSTRUMENT_VA(this, language, repl_options); SBError error; if (m_opaque_sp) @@ -1296,8 +1231,7 @@ Debugger &SBDebugger::ref() const { const lldb::DebuggerSP &SBDebugger::get_sp() const { return m_opaque_sp; } SBDebugger SBDebugger::FindDebuggerWithID(int id) { - LLDB_RECORD_STATIC_METHOD(lldb::SBDebugger, SBDebugger, FindDebuggerWithID, - (int), id); + LLDB_INSTRUMENT_VA(id); // No need to lock, the debugger list is thread safe SBDebugger sb_debugger; @@ -1308,16 +1242,14 @@ SBDebugger SBDebugger::FindDebuggerWithID(int id) { } const char *SBDebugger::GetInstanceName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBDebugger, GetInstanceName); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetInstanceName().AsCString() : nullptr); } SBError SBDebugger::SetInternalVariable(const char *var_name, const char *value, const char *debugger_instance_name) { - LLDB_RECORD_STATIC_METHOD(lldb::SBError, SBDebugger, SetInternalVariable, - (const char *, const char *, const char *), - var_name, value, debugger_instance_name); + LLDB_INSTRUMENT_VA(var_name, value, debugger_instance_name); SBError sb_error; DebuggerSP debugger_sp(Debugger::FindDebuggerWithInstanceName( @@ -1340,9 +1272,7 @@ SBError SBDebugger::SetInternalVariable(const char *var_name, const char *value, SBStringList SBDebugger::GetInternalVariableValue(const char *var_name, const char *debugger_instance_name) { - LLDB_RECORD_STATIC_METHOD( - lldb::SBStringList, SBDebugger, GetInternalVariableValue, - (const char *, const char *), var_name, debugger_instance_name); + LLDB_INSTRUMENT_VA(var_name, debugger_instance_name); DebuggerSP debugger_sp(Debugger::FindDebuggerWithInstanceName( ConstString(debugger_instance_name))); @@ -1367,21 +1297,20 @@ SBDebugger::GetInternalVariableValue(const char *var_name, } uint32_t SBDebugger::GetTerminalWidth() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBDebugger, GetTerminalWidth); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetTerminalWidth() : 0); } void SBDebugger::SetTerminalWidth(uint32_t term_width) { - LLDB_RECORD_METHOD(void, SBDebugger, SetTerminalWidth, (uint32_t), - term_width); + LLDB_INSTRUMENT_VA(this, term_width); if (m_opaque_sp) m_opaque_sp->SetTerminalWidth(term_width); } const char *SBDebugger::GetPrompt() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBDebugger, GetPrompt); + LLDB_INSTRUMENT_VA(this); Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); @@ -1394,14 +1323,14 @@ const char *SBDebugger::GetPrompt() const { } void SBDebugger::SetPrompt(const char *prompt) { - LLDB_RECORD_METHOD(void, SBDebugger, SetPrompt, (const char *), prompt); + LLDB_INSTRUMENT_VA(this, prompt); if (m_opaque_sp) m_opaque_sp->SetPrompt(llvm::StringRef(prompt)); } const char *SBDebugger::GetReproducerPath() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBDebugger, GetReproducerPath); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? ConstString(m_opaque_sp->GetReproducerPath()).GetCString() @@ -1409,15 +1338,13 @@ const char *SBDebugger::GetReproducerPath() const { } ScriptLanguage SBDebugger::GetScriptLanguage() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ScriptLanguage, SBDebugger, - GetScriptLanguage); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetScriptLanguage() : eScriptLanguageNone); } void SBDebugger::SetScriptLanguage(ScriptLanguage script_lang) { - LLDB_RECORD_METHOD(void, SBDebugger, SetScriptLanguage, - (lldb::ScriptLanguage), script_lang); + LLDB_INSTRUMENT_VA(this, script_lang); if (m_opaque_sp) { m_opaque_sp->SetScriptLanguage(script_lang); @@ -1425,15 +1352,13 @@ void SBDebugger::SetScriptLanguage(ScriptLanguage script_lang) { } LanguageType SBDebugger::GetREPLLanguage() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::LanguageType, SBDebugger, - GetREPLLanguage); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetREPLLanguage() : eLanguageTypeUnknown); } void SBDebugger::SetREPLLanguage(LanguageType repl_lang) { - LLDB_RECORD_METHOD(void, SBDebugger, SetREPLLanguage, (lldb::LanguageType), - repl_lang); + LLDB_INSTRUMENT_VA(this, repl_lang); if (m_opaque_sp) { m_opaque_sp->SetREPLLanguage(repl_lang); @@ -1441,44 +1366,43 @@ void SBDebugger::SetREPLLanguage(LanguageType repl_lang) { } bool SBDebugger::SetUseExternalEditor(bool value) { - LLDB_RECORD_METHOD(bool, SBDebugger, SetUseExternalEditor, (bool), value); + LLDB_INSTRUMENT_VA(this, value); return (m_opaque_sp ? m_opaque_sp->SetUseExternalEditor(value) : false); } bool SBDebugger::GetUseExternalEditor() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBDebugger, GetUseExternalEditor); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetUseExternalEditor() : false); } bool SBDebugger::SetUseColor(bool value) { - LLDB_RECORD_METHOD(bool, SBDebugger, SetUseColor, (bool), value); + LLDB_INSTRUMENT_VA(this, value); return (m_opaque_sp ? m_opaque_sp->SetUseColor(value) : false); } bool SBDebugger::GetUseColor() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, GetUseColor); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetUseColor() : false); } bool SBDebugger::SetUseSourceCache(bool value) { - LLDB_RECORD_METHOD(bool, SBDebugger, SetUseSourceCache, (bool), value); + LLDB_INSTRUMENT_VA(this, value); return (m_opaque_sp ? m_opaque_sp->SetUseSourceCache(value) : false); } bool SBDebugger::GetUseSourceCache() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, GetUseSourceCache); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetUseSourceCache() : false); } bool SBDebugger::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBDebugger, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -1493,14 +1417,13 @@ bool SBDebugger::GetDescription(SBStream &description) { } user_id_t SBDebugger::GetID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::user_id_t, SBDebugger, GetID); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetID() : LLDB_INVALID_UID); } SBError SBDebugger::SetCurrentPlatform(const char *platform_name_cstr) { - LLDB_RECORD_METHOD(lldb::SBError, SBDebugger, SetCurrentPlatform, - (const char *), platform_name_cstr); + LLDB_INSTRUMENT_VA(this, platform_name_cstr); SBError sb_error; if (m_opaque_sp) { @@ -1530,8 +1453,7 @@ SBError SBDebugger::SetCurrentPlatform(const char *platform_name_cstr) { } bool SBDebugger::SetCurrentPlatformSDKRoot(const char *sysroot) { - LLDB_RECORD_METHOD(bool, SBDebugger, SetCurrentPlatformSDKRoot, - (const char *), sysroot); + LLDB_INSTRUMENT_VA(this, sysroot); if (SBPlatform platform = GetSelectedPlatform()) { platform.SetSDKRoot(sysroot); @@ -1541,21 +1463,20 @@ bool SBDebugger::SetCurrentPlatformSDKRoot(const char *sysroot) { } bool SBDebugger::GetCloseInputOnEOF() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, GetCloseInputOnEOF); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp ? m_opaque_sp->GetCloseInputOnEOF() : false); } void SBDebugger::SetCloseInputOnEOF(bool b) { - LLDB_RECORD_METHOD(void, SBDebugger, SetCloseInputOnEOF, (bool), b); + LLDB_INSTRUMENT_VA(this, b); if (m_opaque_sp) m_opaque_sp->SetCloseInputOnEOF(b); } SBTypeCategory SBDebugger::GetCategory(const char *category_name) { - LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, GetCategory, - (const char *), category_name); + LLDB_INSTRUMENT_VA(this, category_name); if (!category_name || *category_name == 0) return SBTypeCategory(); @@ -1571,8 +1492,7 @@ SBTypeCategory SBDebugger::GetCategory(const char *category_name) { } SBTypeCategory SBDebugger::GetCategory(lldb::LanguageType lang_type) { - LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, GetCategory, - (lldb::LanguageType), lang_type); + LLDB_INSTRUMENT_VA(this, lang_type); TypeCategoryImplSP category_sp; if (DataVisualization::Categories::GetCategory(lang_type, category_sp)) { @@ -1583,8 +1503,7 @@ SBTypeCategory SBDebugger::GetCategory(lldb::LanguageType lang_type) { } SBTypeCategory SBDebugger::CreateCategory(const char *category_name) { - LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, CreateCategory, - (const char *), category_name); + LLDB_INSTRUMENT_VA(this, category_name); if (!category_name || *category_name == 0) return SBTypeCategory(); @@ -1600,8 +1519,7 @@ SBTypeCategory SBDebugger::CreateCategory(const char *category_name) { } bool SBDebugger::DeleteCategory(const char *category_name) { - LLDB_RECORD_METHOD(bool, SBDebugger, DeleteCategory, (const char *), - category_name); + LLDB_INSTRUMENT_VA(this, category_name); if (!category_name || *category_name == 0) return false; @@ -1610,29 +1528,26 @@ bool SBDebugger::DeleteCategory(const char *category_name) { } uint32_t SBDebugger::GetNumCategories() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumCategories); + LLDB_INSTRUMENT_VA(this); return DataVisualization::Categories::GetCount(); } SBTypeCategory SBDebugger::GetCategoryAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, GetCategoryAtIndex, - (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); return SBTypeCategory( DataVisualization::Categories::GetCategoryAtIndex(index)); } SBTypeCategory SBDebugger::GetDefaultCategory() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeCategory, SBDebugger, - GetDefaultCategory); + LLDB_INSTRUMENT_VA(this); return GetCategory("default"); } SBTypeFormat SBDebugger::GetFormatForType(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(lldb::SBTypeFormat, SBDebugger, GetFormatForType, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); SBTypeCategory default_category_sb = GetDefaultCategory(); if (default_category_sb.GetEnabled()) @@ -1641,8 +1556,7 @@ SBTypeFormat SBDebugger::GetFormatForType(SBTypeNameSpecifier type_name) { } SBTypeSummary SBDebugger::GetSummaryForType(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(lldb::SBTypeSummary, SBDebugger, GetSummaryForType, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); if (!type_name.IsValid()) return SBTypeSummary(); @@ -1650,8 +1564,7 @@ SBTypeSummary SBDebugger::GetSummaryForType(SBTypeNameSpecifier type_name) { } SBTypeFilter SBDebugger::GetFilterForType(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(lldb::SBTypeFilter, SBDebugger, GetFilterForType, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); if (!type_name.IsValid()) return SBTypeFilter(); @@ -1659,8 +1572,7 @@ SBTypeFilter SBDebugger::GetFilterForType(SBTypeNameSpecifier type_name) { } SBTypeSynthetic SBDebugger::GetSyntheticForType(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(lldb::SBTypeSynthetic, SBDebugger, GetSyntheticForType, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); if (!type_name.IsValid()) return SBTypeSynthetic(); @@ -1678,8 +1590,7 @@ static llvm::ArrayRef GetCategoryArray(const char **categories) { } bool SBDebugger::EnableLog(const char *channel, const char **categories) { - LLDB_RECORD_METHOD(bool, SBDebugger, EnableLog, (const char *, const char **), - channel, categories); + LLDB_INSTRUMENT_VA(this, channel, categories); if (m_opaque_sp) { uint32_t log_options = @@ -1694,8 +1605,7 @@ bool SBDebugger::EnableLog(const char *channel, const char **categories) { void SBDebugger::SetLoggingCallback(lldb::LogOutputCallback log_callback, void *baton) { - LLDB_RECORD_METHOD(void, SBDebugger, SetLoggingCallback, - (lldb::LogOutputCallback, void *), log_callback, baton); + LLDB_INSTRUMENT_VA(this, log_callback, baton); if (m_opaque_sp) { return m_opaque_sp->SetLoggingCallback(log_callback, baton); diff --git a/lldb/source/API/SBDeclaration.cpp b/lldb/source/API/SBDeclaration.cpp index f0273b081bb86..5b7def09b5ccf 100644 --- a/lldb/source/API/SBDeclaration.cpp +++ b/lldb/source/API/SBDeclaration.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBDeclaration.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Declaration.h" #include "lldb/Host/PosixApi.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" #include @@ -19,12 +19,10 @@ using namespace lldb; using namespace lldb_private; -SBDeclaration::SBDeclaration() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBDeclaration); -} +SBDeclaration::SBDeclaration() { LLDB_INSTRUMENT_VA(this); } SBDeclaration::SBDeclaration(const SBDeclaration &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBDeclaration, (const lldb::SBDeclaration &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -35,9 +33,7 @@ SBDeclaration::SBDeclaration(const lldb_private::Declaration *lldb_object_ptr) { } const SBDeclaration &SBDeclaration::operator=(const SBDeclaration &rhs) { - LLDB_RECORD_METHOD(const lldb::SBDeclaration &, - SBDeclaration, operator=,(const lldb::SBDeclaration &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -52,19 +48,17 @@ void SBDeclaration::SetDeclaration( SBDeclaration::~SBDeclaration() = default; bool SBDeclaration::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDeclaration, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBDeclaration::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDeclaration, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up.get() && m_opaque_up->IsValid(); } SBFileSpec SBDeclaration::GetFileSpec() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBDeclaration, - GetFileSpec); - + LLDB_INSTRUMENT_VA(this); SBFileSpec sb_file_spec; if (m_opaque_up.get() && m_opaque_up->GetFile()) @@ -74,8 +68,7 @@ SBFileSpec SBDeclaration::GetFileSpec() const { } uint32_t SBDeclaration::GetLine() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBDeclaration, GetLine); - + LLDB_INSTRUMENT_VA(this); uint32_t line = 0; if (m_opaque_up) @@ -86,7 +79,7 @@ uint32_t SBDeclaration::GetLine() const { } uint32_t SBDeclaration::GetColumn() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBDeclaration, GetColumn); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->GetColumn(); @@ -94,8 +87,7 @@ uint32_t SBDeclaration::GetColumn() const { } void SBDeclaration::SetFileSpec(lldb::SBFileSpec filespec) { - LLDB_RECORD_METHOD(void, SBDeclaration, SetFileSpec, (lldb::SBFileSpec), - filespec); + LLDB_INSTRUMENT_VA(this, filespec); if (filespec.IsValid()) ref().SetFile(filespec.ref()); @@ -103,20 +95,19 @@ void SBDeclaration::SetFileSpec(lldb::SBFileSpec filespec) { ref().SetFile(FileSpec()); } void SBDeclaration::SetLine(uint32_t line) { - LLDB_RECORD_METHOD(void, SBDeclaration, SetLine, (uint32_t), line); + LLDB_INSTRUMENT_VA(this, line); ref().SetLine(line); } void SBDeclaration::SetColumn(uint32_t column) { - LLDB_RECORD_METHOD(void, SBDeclaration, SetColumn, (uint32_t), column); + LLDB_INSTRUMENT_VA(this, column); ref().SetColumn(column); } bool SBDeclaration::operator==(const SBDeclaration &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBDeclaration, operator==,(const lldb::SBDeclaration &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); lldb_private::Declaration *lhs_ptr = m_opaque_up.get(); lldb_private::Declaration *rhs_ptr = rhs.m_opaque_up.get(); @@ -128,8 +119,7 @@ bool SBDeclaration::operator==(const SBDeclaration &rhs) const { } bool SBDeclaration::operator!=(const SBDeclaration &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBDeclaration, operator!=,(const lldb::SBDeclaration &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); lldb_private::Declaration *lhs_ptr = m_opaque_up.get(); lldb_private::Declaration *rhs_ptr = rhs.m_opaque_up.get(); @@ -155,8 +145,7 @@ const lldb_private::Declaration &SBDeclaration::ref() const { } bool SBDeclaration::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBDeclaration, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBEnvironment.cpp b/lldb/source/API/SBEnvironment.cpp index d1df3ad123450..5fafabe02e014 100644 --- a/lldb/source/API/SBEnvironment.cpp +++ b/lldb/source/API/SBEnvironment.cpp @@ -7,22 +7,22 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBEnvironment.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStringList.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Environment.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; SBEnvironment::SBEnvironment() : m_opaque_up(new Environment()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBEnvironment); + LLDB_INSTRUMENT_VA(this); } SBEnvironment::SBEnvironment(const SBEnvironment &rhs) : m_opaque_up(clone(rhs.m_opaque_up)) { - LLDB_RECORD_CONSTRUCTOR(SBEnvironment, (const lldb::SBEnvironment &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBEnvironment::SBEnvironment(Environment rhs) @@ -31,9 +31,7 @@ SBEnvironment::SBEnvironment(Environment rhs) SBEnvironment::~SBEnvironment() = default; const SBEnvironment &SBEnvironment::operator=(const SBEnvironment &rhs) { - LLDB_RECORD_METHOD(const lldb::SBEnvironment &, - SBEnvironment, operator=,(const lldb::SBEnvironment &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -41,13 +39,13 @@ const SBEnvironment &SBEnvironment::operator=(const SBEnvironment &rhs) { } size_t SBEnvironment::GetNumValues() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBEnvironment, GetNumValues); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->size(); } const char *SBEnvironment::Get(const char *name) { - LLDB_RECORD_METHOD(const char *, SBEnvironment, Get, (const char *), name); + LLDB_INSTRUMENT_VA(this, name); auto entry = m_opaque_up->find(name); if (entry == m_opaque_up->end()) { @@ -57,8 +55,7 @@ const char *SBEnvironment::Get(const char *name) { } const char *SBEnvironment::GetNameAtIndex(size_t index) { - LLDB_RECORD_METHOD(const char *, SBEnvironment, GetNameAtIndex, (size_t), - index); + LLDB_INSTRUMENT_VA(this, index); if (index >= GetNumValues()) return nullptr; @@ -67,8 +64,7 @@ const char *SBEnvironment::GetNameAtIndex(size_t index) { } const char *SBEnvironment::GetValueAtIndex(size_t index) { - LLDB_RECORD_METHOD(const char *, SBEnvironment, GetValueAtIndex, (size_t), - index); + LLDB_INSTRUMENT_VA(this, index); if (index >= GetNumValues()) return nullptr; @@ -77,9 +73,7 @@ const char *SBEnvironment::GetValueAtIndex(size_t index) { } bool SBEnvironment::Set(const char *name, const char *value, bool overwrite) { - LLDB_RECORD_METHOD(bool, SBEnvironment, Set, - (const char *, const char *, bool), name, value, - overwrite); + LLDB_INSTRUMENT_VA(this, name, value, overwrite); if (overwrite) { m_opaque_up->insert_or_assign(name, std::string(value)); @@ -89,13 +83,13 @@ bool SBEnvironment::Set(const char *name, const char *value, bool overwrite) { } bool SBEnvironment::Unset(const char *name) { - LLDB_RECORD_METHOD(bool, SBEnvironment, Unset, (const char *), name); + LLDB_INSTRUMENT_VA(this, name); return m_opaque_up->erase(name); } SBStringList SBEnvironment::GetEntries() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStringList, SBEnvironment, GetEntries); + LLDB_INSTRUMENT_VA(this); SBStringList entries; for (const auto &KV : *m_opaque_up) { @@ -105,16 +99,14 @@ SBStringList SBEnvironment::GetEntries() { } void SBEnvironment::PutEntry(const char *name_and_value) { - LLDB_RECORD_METHOD(void, SBEnvironment, PutEntry, (const char *), - name_and_value); + LLDB_INSTRUMENT_VA(this, name_and_value); auto split = llvm::StringRef(name_and_value).split('='); m_opaque_up->insert_or_assign(split.first.str(), split.second.str()); } void SBEnvironment::SetEntries(const SBStringList &entries, bool append) { - LLDB_RECORD_METHOD(void, SBEnvironment, SetEntries, - (const lldb::SBStringList &, bool), entries, append); + LLDB_INSTRUMENT_VA(this, entries, append); if (!append) m_opaque_up->clear(); @@ -124,7 +116,7 @@ void SBEnvironment::SetEntries(const SBStringList &entries, bool append) { } void SBEnvironment::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBEnvironment, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_up->clear(); } diff --git a/lldb/source/API/SBError.cpp b/lldb/source/API/SBError.cpp index 793ebb7df5eb9..ef4f7266f083b 100644 --- a/lldb/source/API/SBError.cpp +++ b/lldb/source/API/SBError.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBError.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Status.h" #include @@ -17,10 +17,10 @@ using namespace lldb; using namespace lldb_private; -SBError::SBError() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBError); } +SBError::SBError() { LLDB_INSTRUMENT_VA(this); } SBError::SBError(const SBError &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBError, (const lldb::SBError &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -28,8 +28,7 @@ SBError::SBError(const SBError &rhs) { SBError::~SBError() = default; const SBError &SBError::operator=(const SBError &rhs) { - LLDB_RECORD_METHOD(const lldb::SBError &, - SBError, operator=,(const lldb::SBError &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -37,7 +36,7 @@ const SBError &SBError::operator=(const SBError &rhs) { } const char *SBError::GetCString() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBError, GetCString); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->AsCString(); @@ -45,14 +44,14 @@ const char *SBError::GetCString() const { } void SBError::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBError, Clear); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) m_opaque_up->Clear(); } bool SBError::Fail() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, Fail); + LLDB_INSTRUMENT_VA(this); bool ret_value = false; if (m_opaque_up) @@ -63,7 +62,7 @@ bool SBError::Fail() const { } bool SBError::Success() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, Success); + LLDB_INSTRUMENT_VA(this); bool ret_value = true; if (m_opaque_up) @@ -73,8 +72,7 @@ bool SBError::Success() const { } uint32_t SBError::GetError() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBError, GetError); - + LLDB_INSTRUMENT_VA(this); uint32_t err = 0; if (m_opaque_up) @@ -85,7 +83,7 @@ uint32_t SBError::GetError() const { } ErrorType SBError::GetType() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ErrorType, SBError, GetType); + LLDB_INSTRUMENT_VA(this); ErrorType err_type = eErrorTypeInvalid; if (m_opaque_up) @@ -95,8 +93,7 @@ ErrorType SBError::GetType() const { } void SBError::SetError(uint32_t err, ErrorType type) { - LLDB_RECORD_METHOD(void, SBError, SetError, (uint32_t, lldb::ErrorType), err, - type); + LLDB_INSTRUMENT_VA(this, err, type); CreateIfNeeded(); m_opaque_up->SetError(err, type); @@ -108,21 +105,21 @@ void SBError::SetError(const Status &lldb_error) { } void SBError::SetErrorToErrno() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBError, SetErrorToErrno); + LLDB_INSTRUMENT_VA(this); CreateIfNeeded(); m_opaque_up->SetErrorToErrno(); } void SBError::SetErrorToGenericError() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBError, SetErrorToGenericError); + LLDB_INSTRUMENT_VA(this); CreateIfNeeded(); m_opaque_up->SetErrorToGenericError(); } void SBError::SetErrorString(const char *err_str) { - LLDB_RECORD_METHOD(void, SBError, SetErrorString, (const char *), err_str); + LLDB_INSTRUMENT_VA(this, err_str); CreateIfNeeded(); m_opaque_up->SetErrorString(err_str); @@ -138,11 +135,11 @@ int SBError::SetErrorStringWithFormat(const char *format, ...) { } bool SBError::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBError::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up != nullptr; } @@ -167,8 +164,7 @@ const lldb_private::Status &SBError::operator*() const { } bool SBError::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBError, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); if (m_opaque_up) { if (m_opaque_up->Success()) diff --git a/lldb/source/API/SBEvent.cpp b/lldb/source/API/SBEvent.cpp index 710ef2cf81188..536680bd1c5ed 100644 --- a/lldb/source/API/SBEvent.cpp +++ b/lldb/source/API/SBEvent.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBEvent.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBBroadcaster.h" #include "lldb/API/SBStream.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Breakpoint/Breakpoint.h" #include "lldb/Core/StreamFile.h" @@ -22,32 +22,30 @@ using namespace lldb; using namespace lldb_private; -SBEvent::SBEvent() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBEvent); } +SBEvent::SBEvent() { LLDB_INSTRUMENT_VA(this); } SBEvent::SBEvent(uint32_t event_type, const char *cstr, uint32_t cstr_len) : m_event_sp(new Event(event_type, new EventDataBytes(cstr, cstr_len))), m_opaque_ptr(m_event_sp.get()) { - LLDB_RECORD_CONSTRUCTOR(SBEvent, (uint32_t, const char *, uint32_t), - event_type, cstr, cstr_len); + LLDB_INSTRUMENT_VA(this, event_type, cstr, cstr_len); } SBEvent::SBEvent(EventSP &event_sp) : m_event_sp(event_sp), m_opaque_ptr(event_sp.get()) { - LLDB_RECORD_CONSTRUCTOR(SBEvent, (lldb::EventSP &), event_sp); + LLDB_INSTRUMENT_VA(this, event_sp); } SBEvent::SBEvent(Event *event_ptr) : m_opaque_ptr(event_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBEvent, (lldb_private::Event *), event_ptr); + LLDB_INSTRUMENT_VA(this, event_ptr); } SBEvent::SBEvent(const SBEvent &rhs) : m_event_sp(rhs.m_event_sp), m_opaque_ptr(rhs.m_opaque_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBEvent, (const lldb::SBEvent &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBEvent &SBEvent::operator=(const SBEvent &rhs) { - LLDB_RECORD_METHOD(const lldb::SBEvent &, - SBEvent, operator=,(const lldb::SBEvent &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_event_sp = rhs.m_event_sp; @@ -59,7 +57,7 @@ const SBEvent &SBEvent::operator=(const SBEvent &rhs) { SBEvent::~SBEvent() = default; const char *SBEvent::GetDataFlavor() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBEvent, GetDataFlavor); + LLDB_INSTRUMENT_VA(this); Event *lldb_event = get(); if (lldb_event) { @@ -71,8 +69,7 @@ const char *SBEvent::GetDataFlavor() { } uint32_t SBEvent::GetType() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBEvent, GetType); - + LLDB_INSTRUMENT_VA(this); const Event *lldb_event = get(); uint32_t event_type = 0; @@ -84,8 +81,7 @@ uint32_t SBEvent::GetType() const { } SBBroadcaster SBEvent::GetBroadcaster() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBroadcaster, SBEvent, - GetBroadcaster); + LLDB_INSTRUMENT_VA(this); SBBroadcaster broadcaster; const Event *lldb_event = get(); @@ -95,7 +91,7 @@ SBBroadcaster SBEvent::GetBroadcaster() const { } const char *SBEvent::GetBroadcasterClass() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBEvent, GetBroadcasterClass); + LLDB_INSTRUMENT_VA(this); const Event *lldb_event = get(); if (lldb_event) @@ -105,8 +101,7 @@ const char *SBEvent::GetBroadcasterClass() const { } bool SBEvent::BroadcasterMatchesPtr(const SBBroadcaster *broadcaster) { - LLDB_RECORD_METHOD(bool, SBEvent, BroadcasterMatchesPtr, - (const lldb::SBBroadcaster *), broadcaster); + LLDB_INSTRUMENT_VA(this, broadcaster); if (broadcaster) return BroadcasterMatchesRef(*broadcaster); @@ -114,8 +109,7 @@ bool SBEvent::BroadcasterMatchesPtr(const SBBroadcaster *broadcaster) { } bool SBEvent::BroadcasterMatchesRef(const SBBroadcaster &broadcaster) { - LLDB_RECORD_METHOD(bool, SBEvent, BroadcasterMatchesRef, - (const lldb::SBBroadcaster &), broadcaster); + LLDB_INSTRUMENT_VA(this, broadcaster); Event *lldb_event = get(); bool success = false; @@ -127,7 +121,7 @@ bool SBEvent::BroadcasterMatchesRef(const SBBroadcaster &broadcaster) { } void SBEvent::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBEvent, Clear); + LLDB_INSTRUMENT_VA(this); Event *lldb_event = get(); if (lldb_event) @@ -158,11 +152,11 @@ void SBEvent::reset(Event *event_ptr) { } bool SBEvent::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBEvent, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBEvent::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBEvent, operator bool); + LLDB_INSTRUMENT_VA(this); // Do NOT use m_opaque_ptr directly!!! Must use the SBEvent::get() accessor. // See comments in SBEvent::get().... @@ -170,16 +164,14 @@ SBEvent::operator bool() const { } const char *SBEvent::GetCStringFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(const char *, SBEvent, GetCStringFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return static_cast( EventDataBytes::GetBytesFromEvent(event.get())); } bool SBEvent::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBEvent, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -192,8 +184,7 @@ bool SBEvent::GetDescription(SBStream &description) { } bool SBEvent::GetDescription(SBStream &description) const { - LLDB_RECORD_METHOD_CONST(bool, SBEvent, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBExecutionContext.cpp b/lldb/source/API/SBExecutionContext.cpp index 1f53e84ac6e64..a0b68e6efe384 100644 --- a/lldb/source/API/SBExecutionContext.cpp +++ b/lldb/source/API/SBExecutionContext.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBExecutionContext.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBFrame.h" #include "lldb/API/SBProcess.h" @@ -19,48 +19,43 @@ using namespace lldb; using namespace lldb_private; -SBExecutionContext::SBExecutionContext() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBExecutionContext); -} +SBExecutionContext::SBExecutionContext() { LLDB_INSTRUMENT_VA(this); } SBExecutionContext::SBExecutionContext(const lldb::SBExecutionContext &rhs) : m_exe_ctx_sp(rhs.m_exe_ctx_sp) { - LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, - (const lldb::SBExecutionContext &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBExecutionContext::SBExecutionContext( lldb::ExecutionContextRefSP exe_ctx_ref_sp) : m_exe_ctx_sp(exe_ctx_ref_sp) { - LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (lldb::ExecutionContextRefSP), - exe_ctx_ref_sp); + LLDB_INSTRUMENT_VA(this, exe_ctx_ref_sp); } SBExecutionContext::SBExecutionContext(const lldb::SBTarget &target) : m_exe_ctx_sp(new ExecutionContextRef()) { - LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (const lldb::SBTarget &), target); + LLDB_INSTRUMENT_VA(this, target); m_exe_ctx_sp->SetTargetSP(target.GetSP()); } SBExecutionContext::SBExecutionContext(const lldb::SBProcess &process) : m_exe_ctx_sp(new ExecutionContextRef()) { - LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (const lldb::SBProcess &), - process); + LLDB_INSTRUMENT_VA(this, process); m_exe_ctx_sp->SetProcessSP(process.GetSP()); } SBExecutionContext::SBExecutionContext(lldb::SBThread thread) : m_exe_ctx_sp(new ExecutionContextRef()) { - LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (lldb::SBThread), thread); + LLDB_INSTRUMENT_VA(this, thread); m_exe_ctx_sp->SetThreadPtr(thread.get()); } SBExecutionContext::SBExecutionContext(const lldb::SBFrame &frame) : m_exe_ctx_sp(new ExecutionContextRef()) { - LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (const lldb::SBFrame &), frame); + LLDB_INSTRUMENT_VA(this, frame); m_exe_ctx_sp->SetFrameSP(frame.GetFrameSP()); } @@ -69,9 +64,7 @@ SBExecutionContext::~SBExecutionContext() = default; const SBExecutionContext &SBExecutionContext:: operator=(const lldb::SBExecutionContext &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBExecutionContext &, - SBExecutionContext, operator=,(const lldb::SBExecutionContext &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_exe_ctx_sp = rhs.m_exe_ctx_sp; return *this; @@ -82,8 +75,7 @@ ExecutionContextRef *SBExecutionContext::get() const { } SBTarget SBExecutionContext::GetTarget() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBTarget, SBExecutionContext, - GetTarget); + LLDB_INSTRUMENT_VA(this); SBTarget sb_target; if (m_exe_ctx_sp) { @@ -95,8 +87,7 @@ SBTarget SBExecutionContext::GetTarget() const { } SBProcess SBExecutionContext::GetProcess() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBProcess, SBExecutionContext, - GetProcess); + LLDB_INSTRUMENT_VA(this); SBProcess sb_process; if (m_exe_ctx_sp) { @@ -108,8 +99,7 @@ SBProcess SBExecutionContext::GetProcess() const { } SBThread SBExecutionContext::GetThread() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBExecutionContext, - GetThread); + LLDB_INSTRUMENT_VA(this); SBThread sb_thread; if (m_exe_ctx_sp) { @@ -121,7 +111,7 @@ SBThread SBExecutionContext::GetThread() const { } SBFrame SBExecutionContext::GetFrame() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFrame, SBExecutionContext, GetFrame); + LLDB_INSTRUMENT_VA(this); SBFrame sb_frame; if (m_exe_ctx_sp) { diff --git a/lldb/source/API/SBExpressionOptions.cpp b/lldb/source/API/SBExpressionOptions.cpp index 6dfff7ee46036..191e38fe5cfc4 100644 --- a/lldb/source/API/SBExpressionOptions.cpp +++ b/lldb/source/API/SBExpressionOptions.cpp @@ -7,31 +7,28 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBExpressionOptions.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; SBExpressionOptions::SBExpressionOptions() : m_opaque_up(new EvaluateExpressionOptions()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBExpressionOptions); + LLDB_INSTRUMENT_VA(this); } SBExpressionOptions::SBExpressionOptions(const SBExpressionOptions &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBExpressionOptions, - (const lldb::SBExpressionOptions &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } const SBExpressionOptions &SBExpressionOptions:: operator=(const SBExpressionOptions &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBExpressionOptions &, - SBExpressionOptions, operator=,(const lldb::SBExpressionOptions &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -41,78 +38,68 @@ operator=(const SBExpressionOptions &rhs) { SBExpressionOptions::~SBExpressionOptions() = default; bool SBExpressionOptions::GetCoerceResultToId() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, - GetCoerceResultToId); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->DoesCoerceToId(); } void SBExpressionOptions::SetCoerceResultToId(bool coerce) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetCoerceResultToId, (bool), - coerce); + LLDB_INSTRUMENT_VA(this, coerce); m_opaque_up->SetCoerceToId(coerce); } bool SBExpressionOptions::GetUnwindOnError() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, GetUnwindOnError); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->DoesUnwindOnError(); } void SBExpressionOptions::SetUnwindOnError(bool unwind) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetUnwindOnError, (bool), - unwind); + LLDB_INSTRUMENT_VA(this, unwind); m_opaque_up->SetUnwindOnError(unwind); } bool SBExpressionOptions::GetIgnoreBreakpoints() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, - GetIgnoreBreakpoints); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->DoesIgnoreBreakpoints(); } void SBExpressionOptions::SetIgnoreBreakpoints(bool ignore) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetIgnoreBreakpoints, (bool), - ignore); + LLDB_INSTRUMENT_VA(this, ignore); m_opaque_up->SetIgnoreBreakpoints(ignore); } lldb::DynamicValueType SBExpressionOptions::GetFetchDynamicValue() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::DynamicValueType, SBExpressionOptions, - GetFetchDynamicValue); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetUseDynamic(); } void SBExpressionOptions::SetFetchDynamicValue(lldb::DynamicValueType dynamic) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetFetchDynamicValue, - (lldb::DynamicValueType), dynamic); + LLDB_INSTRUMENT_VA(this, dynamic); m_opaque_up->SetUseDynamic(dynamic); } uint32_t SBExpressionOptions::GetTimeoutInMicroSeconds() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBExpressionOptions, - GetTimeoutInMicroSeconds); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetTimeout() ? m_opaque_up->GetTimeout()->count() : 0; } void SBExpressionOptions::SetTimeoutInMicroSeconds(uint32_t timeout) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTimeoutInMicroSeconds, - (uint32_t), timeout); + LLDB_INSTRUMENT_VA(this, timeout); m_opaque_up->SetTimeout(timeout == 0 ? Timeout(llvm::None) : std::chrono::microseconds(timeout)); } uint32_t SBExpressionOptions::GetOneThreadTimeoutInMicroSeconds() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBExpressionOptions, - GetOneThreadTimeoutInMicroSeconds); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetOneThreadTimeout() ? m_opaque_up->GetOneThreadTimeout()->count() @@ -120,8 +107,7 @@ uint32_t SBExpressionOptions::GetOneThreadTimeoutInMicroSeconds() const { } void SBExpressionOptions::SetOneThreadTimeoutInMicroSeconds(uint32_t timeout) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, - SetOneThreadTimeoutInMicroSeconds, (uint32_t), timeout); + LLDB_INSTRUMENT_VA(this, timeout); m_opaque_up->SetOneThreadTimeout(timeout == 0 ? Timeout(llvm::None) @@ -129,148 +115,135 @@ void SBExpressionOptions::SetOneThreadTimeoutInMicroSeconds(uint32_t timeout) { } bool SBExpressionOptions::GetTryAllThreads() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, GetTryAllThreads); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetTryAllThreads(); } void SBExpressionOptions::SetTryAllThreads(bool run_others) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTryAllThreads, (bool), - run_others); + LLDB_INSTRUMENT_VA(this, run_others); m_opaque_up->SetTryAllThreads(run_others); } bool SBExpressionOptions::GetStopOthers() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, GetStopOthers); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetStopOthers(); } void SBExpressionOptions::SetStopOthers(bool run_others) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetStopOthers, (bool), - run_others); + LLDB_INSTRUMENT_VA(this, run_others); m_opaque_up->SetStopOthers(run_others); } bool SBExpressionOptions::GetTrapExceptions() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, - GetTrapExceptions); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetTrapExceptions(); } void SBExpressionOptions::SetTrapExceptions(bool trap_exceptions) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTrapExceptions, (bool), - trap_exceptions); + LLDB_INSTRUMENT_VA(this, trap_exceptions); m_opaque_up->SetTrapExceptions(trap_exceptions); } void SBExpressionOptions::SetLanguage(lldb::LanguageType language) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetLanguage, - (lldb::LanguageType), language); + LLDB_INSTRUMENT_VA(this, language); m_opaque_up->SetLanguage(language); } void SBExpressionOptions::SetCancelCallback( lldb::ExpressionCancelCallback callback, void *baton) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetCancelCallback, - (lldb::ExpressionCancelCallback, void *), callback, baton); + LLDB_INSTRUMENT_VA(this, callback, baton); m_opaque_up->SetCancelCallback(callback, baton); } bool SBExpressionOptions::GetGenerateDebugInfo() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetGenerateDebugInfo); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetGenerateDebugInfo(); } void SBExpressionOptions::SetGenerateDebugInfo(bool b) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetGenerateDebugInfo, (bool), - b); + LLDB_INSTRUMENT_VA(this, b); return m_opaque_up->SetGenerateDebugInfo(b); } bool SBExpressionOptions::GetSuppressPersistentResult() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, - GetSuppressPersistentResult); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetResultIsInternal(); } void SBExpressionOptions::SetSuppressPersistentResult(bool b) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetSuppressPersistentResult, - (bool), b); + LLDB_INSTRUMENT_VA(this, b); return m_opaque_up->SetResultIsInternal(b); } const char *SBExpressionOptions::GetPrefix() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBExpressionOptions, - GetPrefix); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetPrefix(); } void SBExpressionOptions::SetPrefix(const char *prefix) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetPrefix, (const char *), - prefix); + LLDB_INSTRUMENT_VA(this, prefix); return m_opaque_up->SetPrefix(prefix); } bool SBExpressionOptions::GetAutoApplyFixIts() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetAutoApplyFixIts); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetAutoApplyFixIts(); } void SBExpressionOptions::SetAutoApplyFixIts(bool b) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetAutoApplyFixIts, (bool), b); + LLDB_INSTRUMENT_VA(this, b); return m_opaque_up->SetAutoApplyFixIts(b); } uint64_t SBExpressionOptions::GetRetriesWithFixIts() { - LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBExpressionOptions, - GetRetriesWithFixIts); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetRetriesWithFixIts(); } void SBExpressionOptions::SetRetriesWithFixIts(uint64_t retries) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetRetriesWithFixIts, - (uint64_t), retries); + LLDB_INSTRUMENT_VA(this, retries); return m_opaque_up->SetRetriesWithFixIts(retries); } bool SBExpressionOptions::GetTopLevel() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetTopLevel); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetExecutionPolicy() == eExecutionPolicyTopLevel; } void SBExpressionOptions::SetTopLevel(bool b) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTopLevel, (bool), b); + LLDB_INSTRUMENT_VA(this, b); m_opaque_up->SetExecutionPolicy(b ? eExecutionPolicyTopLevel : m_opaque_up->default_execution_policy); } bool SBExpressionOptions::GetAllowJIT() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetAllowJIT); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetExecutionPolicy() != eExecutionPolicyNever; } void SBExpressionOptions::SetAllowJIT(bool allow) { - LLDB_RECORD_METHOD(void, SBExpressionOptions, SetAllowJIT, (bool), allow); + LLDB_INSTRUMENT_VA(this, allow); m_opaque_up->SetExecutionPolicy(allow ? m_opaque_up->default_execution_policy : eExecutionPolicyNever); diff --git a/lldb/source/API/SBFile.cpp b/lldb/source/API/SBFile.cpp index 4fae46ea10cb4..0db859c3b7468 100644 --- a/lldb/source/API/SBFile.cpp +++ b/lldb/source/API/SBFile.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBFile.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBError.h" #include "lldb/Host/File.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; @@ -19,33 +19,31 @@ SBFile::~SBFile() = default; SBFile::SBFile(FileSP file_sp) : m_opaque_sp(file_sp) { // We have no way to capture the incoming FileSP as the class isn't // instrumented, so pretend that it's always null. - LLDB_RECORD_CONSTRUCTOR(SBFile, (lldb::FileSP), nullptr); + LLDB_INSTRUMENT_VA(this, file_sp); } SBFile::SBFile(const SBFile &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBFile, (const lldb::SBFile&), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBFile &SBFile ::operator=(const SBFile &rhs) { - LLDB_RECORD_METHOD(lldb::SBFile &, - SBFile, operator=,(const lldb::SBFile &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; return *this; } -SBFile::SBFile() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFile); } +SBFile::SBFile() { LLDB_INSTRUMENT_VA(this); } SBFile::SBFile(FILE *file, bool transfer_ownership) { - LLDB_RECORD_CONSTRUCTOR(SBFile, (FILE *, bool), file, transfer_ownership); + LLDB_INSTRUMENT_VA(this, file, transfer_ownership); m_opaque_sp = std::make_shared(file, transfer_ownership); } SBFile::SBFile(int fd, const char *mode, bool transfer_owndership) { - LLDB_RECORD_CONSTRUCTOR(SBFile, (int, const char *, bool), fd, mode, - transfer_owndership); + LLDB_INSTRUMENT_VA(this, fd, mode, transfer_owndership); auto options = File::GetOptionsFromMode(mode); if (!options) { @@ -57,8 +55,7 @@ SBFile::SBFile(int fd, const char *mode, bool transfer_owndership) { } SBError SBFile::Read(uint8_t *buf, size_t num_bytes, size_t *bytes_read) { - LLDB_RECORD_METHOD(lldb::SBError, SBFile, Read, (uint8_t *, size_t, size_t *), - buf, num_bytes, bytes_read); + LLDB_INSTRUMENT_VA(this, buf, num_bytes, bytes_read); SBError error; if (!m_opaque_sp) { @@ -74,9 +71,7 @@ SBError SBFile::Read(uint8_t *buf, size_t num_bytes, size_t *bytes_read) { SBError SBFile::Write(const uint8_t *buf, size_t num_bytes, size_t *bytes_written) { - LLDB_RECORD_METHOD(lldb::SBError, SBFile, Write, - (const uint8_t *, size_t, size_t *), buf, num_bytes, - bytes_written); + LLDB_INSTRUMENT_VA(this, buf, num_bytes, bytes_written); SBError error; if (!m_opaque_sp) { @@ -91,7 +86,7 @@ SBError SBFile::Write(const uint8_t *buf, size_t num_bytes, } SBError SBFile::Flush() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBFile, Flush); + LLDB_INSTRUMENT_VA(this); SBError error; if (!m_opaque_sp) { @@ -104,12 +99,12 @@ SBError SBFile::Flush() { } bool SBFile::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, IsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp && m_opaque_sp->IsValid(); } SBError SBFile::Close() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBFile, Close); + LLDB_INSTRUMENT_VA(this); SBError error; if (m_opaque_sp) { Status status = m_opaque_sp->Close(); @@ -119,16 +114,16 @@ SBError SBFile::Close() { } SBFile::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, operator bool); + LLDB_INSTRUMENT_VA(this); return IsValid(); } bool SBFile::operator!() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, operator!); + LLDB_INSTRUMENT_VA(this); return !IsValid(); } FileSP SBFile::GetFile() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(FileSP, SBFile, GetFile); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp; } diff --git a/lldb/source/API/SBFileSpec.cpp b/lldb/source/API/SBFileSpec.cpp index b1b676af2d5c0..2bec9a7a1e770 100644 --- a/lldb/source/API/SBFileSpec.cpp +++ b/lldb/source/API/SBFileSpec.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBFileSpec.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/PosixApi.h" #include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" #include "llvm/ADT/SmallString.h" @@ -24,11 +24,11 @@ using namespace lldb; using namespace lldb_private; SBFileSpec::SBFileSpec() : m_opaque_up(new lldb_private::FileSpec()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFileSpec); + LLDB_INSTRUMENT_VA(this); } SBFileSpec::SBFileSpec(const SBFileSpec &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBFileSpec, (const lldb::SBFileSpec &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -38,14 +38,14 @@ SBFileSpec::SBFileSpec(const lldb_private::FileSpec &fspec) // Deprecated!!! SBFileSpec::SBFileSpec(const char *path) : m_opaque_up(new FileSpec(path)) { - LLDB_RECORD_CONSTRUCTOR(SBFileSpec, (const char *), path); + LLDB_INSTRUMENT_VA(this, path); FileSystem::Instance().Resolve(*m_opaque_up); } SBFileSpec::SBFileSpec(const char *path, bool resolve) : m_opaque_up(new FileSpec(path)) { - LLDB_RECORD_CONSTRUCTOR(SBFileSpec, (const char *, bool), path, resolve); + LLDB_INSTRUMENT_VA(this, path, resolve); if (resolve) FileSystem::Instance().Resolve(*m_opaque_up); @@ -54,8 +54,7 @@ SBFileSpec::SBFileSpec(const char *path, bool resolve) SBFileSpec::~SBFileSpec() = default; const SBFileSpec &SBFileSpec::operator=(const SBFileSpec &rhs) { - LLDB_RECORD_METHOD(const lldb::SBFileSpec &, - SBFileSpec, operator=,(const lldb::SBFileSpec &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -63,46 +62,42 @@ const SBFileSpec &SBFileSpec::operator=(const SBFileSpec &rhs) { } bool SBFileSpec::operator==(const SBFileSpec &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBFileSpec, operator==,(const SBFileSpec &rhs), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return ref() == rhs.ref(); } bool SBFileSpec::operator!=(const SBFileSpec &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBFileSpec, operator!=,(const SBFileSpec &rhs), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return !(*this == rhs); } bool SBFileSpec::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFileSpec, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBFileSpec::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFileSpec, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->operator bool(); } bool SBFileSpec::Exists() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFileSpec, Exists); + LLDB_INSTRUMENT_VA(this); return FileSystem::Instance().Exists(*m_opaque_up); } bool SBFileSpec::ResolveExecutableLocation() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBFileSpec, ResolveExecutableLocation); + LLDB_INSTRUMENT_VA(this); return FileSystem::Instance().ResolveExecutableLocation(*m_opaque_up); } int SBFileSpec::ResolvePath(const char *src_path, char *dst_path, size_t dst_len) { - LLDB_RECORD_STATIC_METHOD(int, SBFileSpec, ResolvePath, - (const char *, char *, size_t), src_path, dst_path, - dst_len); + LLDB_INSTRUMENT_VA(src_path, dst_path, dst_len); llvm::SmallString<64> result(src_path); FileSystem::Instance().Resolve(result); @@ -111,13 +106,13 @@ int SBFileSpec::ResolvePath(const char *src_path, char *dst_path, } const char *SBFileSpec::GetFilename() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFileSpec, GetFilename); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetFilename().AsCString(); } const char *SBFileSpec::GetDirectory() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFileSpec, GetDirectory); + LLDB_INSTRUMENT_VA(this); FileSpec directory{*m_opaque_up}; directory.GetFilename().Clear(); @@ -125,7 +120,7 @@ const char *SBFileSpec::GetDirectory() const { } void SBFileSpec::SetFilename(const char *filename) { - LLDB_RECORD_METHOD(void, SBFileSpec, SetFilename, (const char *), filename); + LLDB_INSTRUMENT_VA(this, filename); if (filename && filename[0]) m_opaque_up->GetFilename().SetCString(filename); @@ -134,7 +129,7 @@ void SBFileSpec::SetFilename(const char *filename) { } void SBFileSpec::SetDirectory(const char *directory) { - LLDB_RECORD_METHOD(void, SBFileSpec, SetDirectory, (const char *), directory); + LLDB_INSTRUMENT_VA(this, directory); if (directory && directory[0]) m_opaque_up->GetDirectory().SetCString(directory); @@ -143,8 +138,7 @@ void SBFileSpec::SetDirectory(const char *directory) { } uint32_t SBFileSpec::GetPath(char *dst_path, size_t dst_len) const { - LLDB_RECORD_METHOD_CONST(uint32_t, SBFileSpec, GetPath, (char *, size_t), - dst_path, "", dst_len); + LLDB_INSTRUMENT_VA(this, dst_path, dst_len); uint32_t result = m_opaque_up->GetPath(dst_path, dst_len); @@ -172,8 +166,7 @@ void SBFileSpec::SetFileSpec(const lldb_private::FileSpec &fs) { } bool SBFileSpec::GetDescription(SBStream &description) const { - LLDB_RECORD_METHOD_CONST(bool, SBFileSpec, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); char path[PATH_MAX]; @@ -183,7 +176,7 @@ bool SBFileSpec::GetDescription(SBStream &description) const { } void SBFileSpec::AppendPathComponent(const char *fn) { - LLDB_RECORD_METHOD(void, SBFileSpec, AppendPathComponent, (const char *), fn); + LLDB_INSTRUMENT_VA(this, fn); m_opaque_up->AppendPathComponent(fn); } diff --git a/lldb/source/API/SBFileSpecList.cpp b/lldb/source/API/SBFileSpecList.cpp index 6e85957cb088a..cf81c42340879 100644 --- a/lldb/source/API/SBFileSpecList.cpp +++ b/lldb/source/API/SBFileSpecList.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBFileSpecList.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBStream.h" #include "lldb/Core/FileSpecList.h" #include "lldb/Host/PosixApi.h" #include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" #include @@ -22,11 +22,11 @@ using namespace lldb; using namespace lldb_private; SBFileSpecList::SBFileSpecList() : m_opaque_up(new FileSpecList()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFileSpecList); + LLDB_INSTRUMENT_VA(this); } SBFileSpecList::SBFileSpecList(const SBFileSpecList &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBFileSpecList, (const lldb::SBFileSpecList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -34,9 +34,7 @@ SBFileSpecList::SBFileSpecList(const SBFileSpecList &rhs) { SBFileSpecList::~SBFileSpecList() = default; const SBFileSpecList &SBFileSpecList::operator=(const SBFileSpecList &rhs) { - LLDB_RECORD_METHOD(const lldb::SBFileSpecList &, - SBFileSpecList, operator=,(const lldb::SBFileSpecList &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -44,43 +42,38 @@ const SBFileSpecList &SBFileSpecList::operator=(const SBFileSpecList &rhs) { } uint32_t SBFileSpecList::GetSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBFileSpecList, GetSize); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetSize(); } void SBFileSpecList::Append(const SBFileSpec &sb_file) { - LLDB_RECORD_METHOD(void, SBFileSpecList, Append, (const lldb::SBFileSpec &), - sb_file); + LLDB_INSTRUMENT_VA(this, sb_file); m_opaque_up->Append(sb_file.ref()); } bool SBFileSpecList::AppendIfUnique(const SBFileSpec &sb_file) { - LLDB_RECORD_METHOD(bool, SBFileSpecList, AppendIfUnique, - (const lldb::SBFileSpec &), sb_file); + LLDB_INSTRUMENT_VA(this, sb_file); return m_opaque_up->AppendIfUnique(sb_file.ref()); } void SBFileSpecList::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBFileSpecList, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_up->Clear(); } uint32_t SBFileSpecList::FindFileIndex(uint32_t idx, const SBFileSpec &sb_file, bool full) { - LLDB_RECORD_METHOD(uint32_t, SBFileSpecList, FindFileIndex, - (uint32_t, const lldb::SBFileSpec &, bool), idx, sb_file, - full); + LLDB_INSTRUMENT_VA(this, idx, sb_file, full); return m_opaque_up->FindFileIndex(idx, sb_file.ref(), full); } const SBFileSpec SBFileSpecList::GetFileSpecAtIndex(uint32_t idx) const { - LLDB_RECORD_METHOD_CONST(const lldb::SBFileSpec, SBFileSpecList, - GetFileSpecAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBFileSpec new_spec; new_spec.SetFileSpec(m_opaque_up->GetFileSpecAtIndex(idx)); @@ -104,8 +97,7 @@ const lldb_private::FileSpecList &SBFileSpecList::ref() const { } bool SBFileSpecList::GetDescription(SBStream &description) const { - LLDB_RECORD_METHOD_CONST(bool, SBFileSpecList, GetDescription, - (lldb::SBStream &), description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp index ab3886e24d6fb..ffbbed00f8e21 100644 --- a/lldb/source/API/SBFrame.cpp +++ b/lldb/source/API/SBFrame.cpp @@ -14,7 +14,6 @@ #include "lldb/lldb-types.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/Core/Address.h" #include "lldb/Core/StreamFile.h" @@ -38,6 +37,7 @@ #include "lldb/Target/Target.h" #include "lldb/Target/Thread.h" #include "lldb/Utility/ConstString.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" #include "lldb/API/SBAddress.h" @@ -55,17 +55,16 @@ using namespace lldb; using namespace lldb_private; SBFrame::SBFrame() : m_opaque_sp(new ExecutionContextRef()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFrame); + LLDB_INSTRUMENT_VA(this); } SBFrame::SBFrame(const StackFrameSP &lldb_object_sp) : m_opaque_sp(new ExecutionContextRef(lldb_object_sp)) { - LLDB_RECORD_CONSTRUCTOR(SBFrame, (const lldb::StackFrameSP &), - lldb_object_sp); + LLDB_INSTRUMENT_VA(this, lldb_object_sp); } SBFrame::SBFrame(const SBFrame &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBFrame, (const lldb::SBFrame &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = clone(rhs.m_opaque_sp); } @@ -73,8 +72,7 @@ SBFrame::SBFrame(const SBFrame &rhs) { SBFrame::~SBFrame() = default; const SBFrame &SBFrame::operator=(const SBFrame &rhs) { - LLDB_RECORD_METHOD(const lldb::SBFrame &, - SBFrame, operator=,(const lldb::SBFrame &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = clone(rhs.m_opaque_sp); @@ -90,11 +88,11 @@ void SBFrame::SetFrameSP(const StackFrameSP &lldb_object_sp) { } bool SBFrame::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBFrame::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, operator bool); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -112,8 +110,7 @@ SBFrame::operator bool() const { } SBSymbolContext SBFrame::GetSymbolContext(uint32_t resolve_scope) const { - LLDB_RECORD_METHOD_CONST(lldb::SBSymbolContext, SBFrame, GetSymbolContext, - (uint32_t), resolve_scope); + LLDB_INSTRUMENT_VA(this, resolve_scope); SBSymbolContext sb_sym_ctx; std::unique_lock lock; @@ -133,7 +130,7 @@ SBSymbolContext SBFrame::GetSymbolContext(uint32_t resolve_scope) const { } SBModule SBFrame::GetModule() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBModule, SBFrame, GetModule); + LLDB_INSTRUMENT_VA(this); SBModule sb_module; ModuleSP module_sp; @@ -158,8 +155,7 @@ SBModule SBFrame::GetModule() const { } SBCompileUnit SBFrame::GetCompileUnit() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBCompileUnit, SBFrame, - GetCompileUnit); + LLDB_INSTRUMENT_VA(this); SBCompileUnit sb_comp_unit; std::unique_lock lock; @@ -183,7 +179,7 @@ SBCompileUnit SBFrame::GetCompileUnit() const { } SBFunction SBFrame::GetFunction() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFunction, SBFrame, GetFunction); + LLDB_INSTRUMENT_VA(this); SBFunction sb_function; std::unique_lock lock; @@ -207,7 +203,7 @@ SBFunction SBFrame::GetFunction() const { } SBSymbol SBFrame::GetSymbol() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBSymbol, SBFrame, GetSymbol); + LLDB_INSTRUMENT_VA(this); SBSymbol sb_symbol; std::unique_lock lock; @@ -230,7 +226,7 @@ SBSymbol SBFrame::GetSymbol() const { } SBBlock SBFrame::GetBlock() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBlock, SBFrame, GetBlock); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; std::unique_lock lock; @@ -251,7 +247,7 @@ SBBlock SBFrame::GetBlock() const { } SBBlock SBFrame::GetFrameBlock() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBlock, SBFrame, GetFrameBlock); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; std::unique_lock lock; @@ -272,7 +268,7 @@ SBBlock SBFrame::GetFrameBlock() const { } SBLineEntry SBFrame::GetLineEntry() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBLineEntry, SBFrame, GetLineEntry); + LLDB_INSTRUMENT_VA(this); SBLineEntry sb_line_entry; std::unique_lock lock; @@ -295,7 +291,7 @@ SBLineEntry SBFrame::GetLineEntry() const { } uint32_t SBFrame::GetFrameID() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBFrame, GetFrameID); + LLDB_INSTRUMENT_VA(this); uint32_t frame_idx = UINT32_MAX; @@ -310,7 +306,7 @@ uint32_t SBFrame::GetFrameID() const { } lldb::addr_t SBFrame::GetCFA() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetCFA); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -322,7 +318,7 @@ lldb::addr_t SBFrame::GetCFA() const { } addr_t SBFrame::GetPC() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetPC); + LLDB_INSTRUMENT_VA(this); addr_t addr = LLDB_INVALID_ADDRESS; std::unique_lock lock; @@ -346,7 +342,7 @@ addr_t SBFrame::GetPC() const { } bool SBFrame::SetPC(addr_t new_pc) { - LLDB_RECORD_METHOD(bool, SBFrame, SetPC, (lldb::addr_t), new_pc); + LLDB_INSTRUMENT_VA(this, new_pc); bool ret_val = false; std::unique_lock lock; @@ -369,7 +365,7 @@ bool SBFrame::SetPC(addr_t new_pc) { } addr_t SBFrame::GetSP() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetSP); + LLDB_INSTRUMENT_VA(this); addr_t addr = LLDB_INVALID_ADDRESS; std::unique_lock lock; @@ -392,7 +388,7 @@ addr_t SBFrame::GetSP() const { } addr_t SBFrame::GetFP() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetFP); + LLDB_INSTRUMENT_VA(this); addr_t addr = LLDB_INVALID_ADDRESS; std::unique_lock lock; @@ -415,7 +411,7 @@ addr_t SBFrame::GetFP() const { } SBAddress SBFrame::GetPCAddress() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBFrame, GetPCAddress); + LLDB_INSTRUMENT_VA(this); SBAddress sb_addr; std::unique_lock lock; @@ -436,14 +432,13 @@ SBAddress SBFrame::GetPCAddress() const { } void SBFrame::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBFrame, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp->Clear(); } lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, GetValueForVariablePath, - (const char *), var_path); + LLDB_INSTRUMENT_VA(this, var_path); SBValue sb_value; std::unique_lock lock; @@ -461,9 +456,7 @@ lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path) { lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path, DynamicValueType use_dynamic) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, GetValueForVariablePath, - (const char *, lldb::DynamicValueType), var_path, - use_dynamic); + LLDB_INSTRUMENT_VA(this, var_path, use_dynamic); SBValue sb_value; if (var_path == nullptr || var_path[0] == '\0') { @@ -496,8 +489,7 @@ lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path, } SBValue SBFrame::FindVariable(const char *name) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindVariable, (const char *), - name); + LLDB_INSTRUMENT_VA(this, name); SBValue value; std::unique_lock lock; @@ -515,8 +507,7 @@ SBValue SBFrame::FindVariable(const char *name) { SBValue SBFrame::FindVariable(const char *name, lldb::DynamicValueType use_dynamic) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindVariable, - (const char *, lldb::DynamicValueType), name, use_dynamic); + LLDB_INSTRUMENT_VA(this, name, use_dynamic); VariableSP var_sp; SBValue sb_value; @@ -549,8 +540,7 @@ SBValue SBFrame::FindVariable(const char *name, } SBValue SBFrame::FindValue(const char *name, ValueType value_type) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindValue, - (const char *, lldb::ValueType), name, value_type); + LLDB_INSTRUMENT_VA(this, name, value_type); SBValue value; std::unique_lock lock; @@ -568,9 +558,7 @@ SBValue SBFrame::FindValue(const char *name, ValueType value_type) { SBValue SBFrame::FindValue(const char *name, ValueType value_type, lldb::DynamicValueType use_dynamic) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindValue, - (const char *, lldb::ValueType, lldb::DynamicValueType), - name, value_type, use_dynamic); + LLDB_INSTRUMENT_VA(this, name, value_type, use_dynamic); SBValue sb_value; @@ -682,8 +670,7 @@ SBValue SBFrame::FindValue(const char *name, ValueType value_type, } bool SBFrame::IsEqual(const SBFrame &that) const { - LLDB_RECORD_METHOD_CONST(bool, SBFrame, IsEqual, (const lldb::SBFrame &), - that); + LLDB_INSTRUMENT_VA(this, that); lldb::StackFrameSP this_sp = GetFrameSP(); lldb::StackFrameSP that_sp = that.GetFrameSP(); @@ -691,21 +678,19 @@ bool SBFrame::IsEqual(const SBFrame &that) const { } bool SBFrame::operator==(const SBFrame &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBFrame, operator==,(const lldb::SBFrame &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return IsEqual(rhs); } bool SBFrame::operator!=(const SBFrame &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBFrame, operator!=,(const lldb::SBFrame &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return !IsEqual(rhs); } SBThread SBFrame::GetThread() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBFrame, GetThread); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -717,7 +702,7 @@ SBThread SBFrame::GetThread() const { } const char *SBFrame::Disassemble() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFrame, Disassemble); + LLDB_INSTRUMENT_VA(this); const char *disassembly = nullptr; std::unique_lock lock; @@ -741,9 +726,7 @@ const char *SBFrame::Disassemble() const { SBValueList SBFrame::GetVariables(bool arguments, bool locals, bool statics, bool in_scope_only) { - LLDB_RECORD_METHOD(lldb::SBValueList, SBFrame, GetVariables, - (bool, bool, bool, bool), arguments, locals, statics, - in_scope_only); + LLDB_INSTRUMENT_VA(this, arguments, locals, statics, in_scope_only); SBValueList value_list; std::unique_lock lock; @@ -773,9 +756,8 @@ SBValueList SBFrame::GetVariables(bool arguments, bool locals, bool statics, lldb::SBValueList SBFrame::GetVariables(bool arguments, bool locals, bool statics, bool in_scope_only, lldb::DynamicValueType use_dynamic) { - LLDB_RECORD_METHOD(lldb::SBValueList, SBFrame, GetVariables, - (bool, bool, bool, bool, lldb::DynamicValueType), - arguments, locals, statics, in_scope_only, use_dynamic); + LLDB_INSTRUMENT_VA(this, arguments, locals, statics, in_scope_only, + use_dynamic); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -794,8 +776,7 @@ lldb::SBValueList SBFrame::GetVariables(bool arguments, bool locals, } SBValueList SBFrame::GetVariables(const lldb::SBVariablesOptions &options) { - LLDB_RECORD_METHOD(lldb::SBValueList, SBFrame, GetVariables, - (const lldb::SBVariablesOptions &), options); + LLDB_INSTRUMENT_VA(this, options); SBValueList value_list; std::unique_lock lock; @@ -895,7 +876,7 @@ SBValueList SBFrame::GetVariables(const lldb::SBVariablesOptions &options) { } SBValueList SBFrame::GetRegisters() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValueList, SBFrame, GetRegisters); + LLDB_INSTRUMENT_VA(this); SBValueList value_list; std::unique_lock lock; @@ -925,8 +906,7 @@ SBValueList SBFrame::GetRegisters() { } SBValue SBFrame::FindRegister(const char *name) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindRegister, (const char *), - name); + LLDB_INSTRUMENT_VA(this, name); SBValue result; ValueObjectSP value_sp; @@ -957,8 +937,7 @@ SBValue SBFrame::FindRegister(const char *name) { } bool SBFrame::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBFrame, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -984,8 +963,7 @@ bool SBFrame::GetDescription(SBStream &description) { } SBValue SBFrame::EvaluateExpression(const char *expr) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression, (const char *), - expr); + LLDB_INSTRUMENT_VA(this, expr); SBValue result; std::unique_lock lock; @@ -1012,9 +990,7 @@ SBValue SBFrame::EvaluateExpression(const char *expr) { SBValue SBFrame::EvaluateExpression(const char *expr, lldb::DynamicValueType fetch_dynamic_value) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression, - (const char *, lldb::DynamicValueType), expr, - fetch_dynamic_value); + LLDB_INSTRUMENT_VA(this, expr, fetch_dynamic_value); SBExpressionOptions options; options.SetFetchDynamicValue(fetch_dynamic_value); @@ -1035,9 +1011,7 @@ SBFrame::EvaluateExpression(const char *expr, SBValue SBFrame::EvaluateExpression(const char *expr, lldb::DynamicValueType fetch_dynamic_value, bool unwind_on_error) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression, - (const char *, lldb::DynamicValueType, bool), expr, - fetch_dynamic_value, unwind_on_error); + LLDB_INSTRUMENT_VA(this, expr, fetch_dynamic_value, unwind_on_error); SBExpressionOptions options; std::unique_lock lock; @@ -1057,9 +1031,7 @@ SBValue SBFrame::EvaluateExpression(const char *expr, lldb::SBValue SBFrame::EvaluateExpression(const char *expr, const SBExpressionOptions &options) { - LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression, - (const char *, const lldb::SBExpressionOptions &), expr, - options); + LLDB_INSTRUMENT_VA(this, expr, options); Log *expr_log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); @@ -1110,13 +1082,13 @@ lldb::SBValue SBFrame::EvaluateExpression(const char *expr, } bool SBFrame::IsInlined() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBFrame, IsInlined); + LLDB_INSTRUMENT_VA(this); return static_cast(this)->IsInlined(); } bool SBFrame::IsInlined() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, IsInlined); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1140,13 +1112,13 @@ bool SBFrame::IsInlined() const { } bool SBFrame::IsArtificial() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBFrame, IsArtificial); + LLDB_INSTRUMENT_VA(this); return static_cast(this)->IsArtificial(); } bool SBFrame::IsArtificial() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, IsArtificial); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1159,13 +1131,13 @@ bool SBFrame::IsArtificial() const { } const char *SBFrame::GetFunctionName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBFrame, GetFunctionName); + LLDB_INSTRUMENT_VA(this); return static_cast(this)->GetFunctionName(); } lldb::LanguageType SBFrame::GuessLanguage() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::LanguageType, SBFrame, GuessLanguage); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1186,7 +1158,7 @@ lldb::LanguageType SBFrame::GuessLanguage() const { } const char *SBFrame::GetFunctionName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFrame, GetFunctionName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; std::unique_lock lock; @@ -1228,7 +1200,7 @@ const char *SBFrame::GetFunctionName() const { } const char *SBFrame::GetDisplayFunctionName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBFrame, GetDisplayFunctionName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp index 2b1bc05e33561..562cae4e89069 100644 --- a/lldb/source/API/SBFunction.cpp +++ b/lldb/source/API/SBFunction.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBFunction.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Disassembler.h" @@ -18,23 +17,23 @@ #include "lldb/Symbol/VariableList.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; -SBFunction::SBFunction() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFunction); } +SBFunction::SBFunction() { LLDB_INSTRUMENT_VA(this); } SBFunction::SBFunction(lldb_private::Function *lldb_object_ptr) : m_opaque_ptr(lldb_object_ptr) {} SBFunction::SBFunction(const lldb::SBFunction &rhs) : m_opaque_ptr(rhs.m_opaque_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBFunction, (const lldb::SBFunction &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBFunction &SBFunction::operator=(const SBFunction &rhs) { - LLDB_RECORD_METHOD(const lldb::SBFunction &, - SBFunction, operator=,(const lldb::SBFunction &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_ptr = rhs.m_opaque_ptr; return *this; @@ -43,17 +42,17 @@ const SBFunction &SBFunction::operator=(const SBFunction &rhs) { SBFunction::~SBFunction() { m_opaque_ptr = nullptr; } bool SBFunction::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFunction, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBFunction::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFunction, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr != nullptr; } const char *SBFunction::GetName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetName); + LLDB_INSTRUMENT_VA(this); const char *cstr = nullptr; if (m_opaque_ptr) @@ -63,7 +62,7 @@ const char *SBFunction::GetName() const { } const char *SBFunction::GetDisplayName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetDisplayName); + LLDB_INSTRUMENT_VA(this); const char *cstr = nullptr; if (m_opaque_ptr) @@ -73,7 +72,7 @@ const char *SBFunction::GetDisplayName() const { } const char *SBFunction::GetMangledName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetMangledName); + LLDB_INSTRUMENT_VA(this); const char *cstr = nullptr; if (m_opaque_ptr) @@ -82,21 +81,19 @@ const char *SBFunction::GetMangledName() const { } bool SBFunction::operator==(const SBFunction &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBFunction, operator==,(const lldb::SBFunction &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr == rhs.m_opaque_ptr; } bool SBFunction::operator!=(const SBFunction &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBFunction, operator!=,(const lldb::SBFunction &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr != rhs.m_opaque_ptr; } bool SBFunction::GetDescription(SBStream &s) { - LLDB_RECORD_METHOD(bool, SBFunction, GetDescription, (lldb::SBStream &), s); + LLDB_INSTRUMENT_VA(this, s); if (m_opaque_ptr) { s.Printf("SBFunction: id = 0x%8.8" PRIx64 ", name = %s", @@ -111,16 +108,14 @@ bool SBFunction::GetDescription(SBStream &s) { } SBInstructionList SBFunction::GetInstructions(SBTarget target) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBFunction, GetInstructions, - (lldb::SBTarget), target); + LLDB_INSTRUMENT_VA(this, target); return GetInstructions(target, nullptr); } SBInstructionList SBFunction::GetInstructions(SBTarget target, const char *flavor) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBFunction, GetInstructions, - (lldb::SBTarget, const char *), target, flavor); + LLDB_INSTRUMENT_VA(this, target, flavor); SBInstructionList sb_instructions; if (m_opaque_ptr) { @@ -146,7 +141,7 @@ void SBFunction::reset(lldb_private::Function *lldb_object_ptr) { } SBAddress SBFunction::GetStartAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBFunction, GetStartAddress); + LLDB_INSTRUMENT_VA(this); SBAddress addr; if (m_opaque_ptr) @@ -155,7 +150,7 @@ SBAddress SBFunction::GetStartAddress() { } SBAddress SBFunction::GetEndAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBFunction, GetEndAddress); + LLDB_INSTRUMENT_VA(this); SBAddress addr; if (m_opaque_ptr) { @@ -169,8 +164,7 @@ SBAddress SBFunction::GetEndAddress() { } const char *SBFunction::GetArgumentName(uint32_t arg_idx) { - LLDB_RECORD_METHOD(const char *, SBFunction, GetArgumentName, (uint32_t), - arg_idx); + LLDB_INSTRUMENT_VA(this, arg_idx); if (m_opaque_ptr) { Block &block = m_opaque_ptr->GetBlock(true); @@ -188,7 +182,7 @@ const char *SBFunction::GetArgumentName(uint32_t arg_idx) { } uint32_t SBFunction::GetPrologueByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBFunction, GetPrologueByteSize); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetPrologueByteSize(); @@ -196,7 +190,7 @@ uint32_t SBFunction::GetPrologueByteSize() { } SBType SBFunction::GetType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBFunction, GetType); + LLDB_INSTRUMENT_VA(this); SBType sb_type; if (m_opaque_ptr) { @@ -208,7 +202,7 @@ SBType SBFunction::GetType() { } SBBlock SBFunction::GetBlock() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBFunction, GetBlock); + LLDB_INSTRUMENT_VA(this); SBBlock sb_block; if (m_opaque_ptr) @@ -217,7 +211,7 @@ SBBlock SBFunction::GetBlock() { } lldb::LanguageType SBFunction::GetLanguage() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::LanguageType, SBFunction, GetLanguage); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) { if (m_opaque_ptr->GetCompileUnit()) @@ -227,7 +221,7 @@ lldb::LanguageType SBFunction::GetLanguage() { } bool SBFunction::GetIsOptimized() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBFunction, GetIsOptimized); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) { if (m_opaque_ptr->GetCompileUnit()) diff --git a/lldb/source/API/SBHostOS.cpp b/lldb/source/API/SBHostOS.cpp index 5bcb83d95ae7a..06cf654031a1d 100644 --- a/lldb/source/API/SBHostOS.cpp +++ b/lldb/source/API/SBHostOS.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBHostOS.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBError.h" #include "lldb/Host/Config.h" #include "lldb/Host/FileSystem.h" @@ -17,6 +16,7 @@ #include "lldb/Host/HostThread.h" #include "lldb/Host/ThreadLauncher.h" #include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/Instrumentation.h" #include "Plugins/ExpressionParser/Clang/ClangHost.h" #if LLDB_ENABLE_PYTHON @@ -30,8 +30,7 @@ using namespace lldb; using namespace lldb_private; SBFileSpec SBHostOS::GetProgramFileSpec() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBFileSpec, SBHostOS, - GetProgramFileSpec); + LLDB_INSTRUMENT(); SBFileSpec sb_filespec; sb_filespec.SetFileSpec(HostInfo::GetProgramFileSpec()); @@ -39,15 +38,13 @@ SBFileSpec SBHostOS::GetProgramFileSpec() { } SBFileSpec SBHostOS::GetLLDBPythonPath() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBFileSpec, SBHostOS, - GetLLDBPythonPath); + LLDB_INSTRUMENT(); return GetLLDBPath(ePathTypePythonDir); } SBFileSpec SBHostOS::GetLLDBPath(lldb::PathType path_type) { - LLDB_RECORD_STATIC_METHOD(lldb::SBFileSpec, SBHostOS, GetLLDBPath, - (lldb::PathType), path_type); + LLDB_INSTRUMENT_VA(path_type); FileSpec fspec; switch (path_type) { @@ -88,8 +85,7 @@ SBFileSpec SBHostOS::GetLLDBPath(lldb::PathType path_type) { } SBFileSpec SBHostOS::GetUserHomeDirectory() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBFileSpec, SBHostOS, - GetUserHomeDirectory); + LLDB_INSTRUMENT(); FileSpec homedir; FileSystem::Instance().GetHomeDirectory(homedir); @@ -104,9 +100,7 @@ SBFileSpec SBHostOS::GetUserHomeDirectory() { lldb::thread_t SBHostOS::ThreadCreate(const char *name, lldb::thread_func_t thread_function, void *thread_arg, SBError *error_ptr) { - LLDB_RECORD_STATIC_METHOD(lldb::thread_t, SBHostOS, ThreadCreate, - (lldb::thread_func_t, void *, SBError *), name, - thread_function, thread_arg, error_ptr); + LLDB_INSTRUMENT_VA(name, thread_function, thread_arg, error_ptr); llvm::Expected thread = ThreadLauncher::LaunchThread(name, thread_function, thread_arg); if (!thread) { @@ -120,15 +114,10 @@ lldb::thread_t SBHostOS::ThreadCreate(const char *name, return thread->Release(); } -void SBHostOS::ThreadCreated(const char *name) { - LLDB_RECORD_STATIC_METHOD(void, SBHostOS, ThreadCreated, (const char *), - name); -} +void SBHostOS::ThreadCreated(const char *name) { LLDB_INSTRUMENT_VA(name); } bool SBHostOS::ThreadCancel(lldb::thread_t thread, SBError *error_ptr) { - LLDB_RECORD_STATIC_METHOD(bool, SBHostOS, ThreadCancel, - (lldb::thread_t, lldb::SBError *), thread, - error_ptr); + LLDB_INSTRUMENT_VA(thread, error_ptr); Status error; HostThread host_thread(thread); @@ -140,9 +129,7 @@ bool SBHostOS::ThreadCancel(lldb::thread_t thread, SBError *error_ptr) { } bool SBHostOS::ThreadDetach(lldb::thread_t thread, SBError *error_ptr) { - LLDB_RECORD_STATIC_METHOD(bool, SBHostOS, ThreadDetach, - (lldb::thread_t, lldb::SBError *), thread, - error_ptr); + LLDB_INSTRUMENT_VA(thread, error_ptr); Status error; #if defined(_WIN32) @@ -160,10 +147,7 @@ bool SBHostOS::ThreadDetach(lldb::thread_t thread, SBError *error_ptr) { bool SBHostOS::ThreadJoin(lldb::thread_t thread, lldb::thread_result_t *result, SBError *error_ptr) { - LLDB_RECORD_STATIC_METHOD( - bool, SBHostOS, ThreadJoin, - (lldb::thread_t, lldb::thread_result_t *, lldb::SBError *), thread, - result, error_ptr); + LLDB_INSTRUMENT_VA(thread, result, error_ptr); Status error; HostThread host_thread(thread); diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp index f9a75686d906b..6cb9e5dbc1afb 100644 --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBInstruction.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBFrame.h" @@ -66,9 +66,7 @@ class InstructionImpl { using namespace lldb; using namespace lldb_private; -SBInstruction::SBInstruction() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBInstruction); -} +SBInstruction::SBInstruction() { LLDB_INSTRUMENT_VA(this); } SBInstruction::SBInstruction(const lldb::DisassemblerSP &disasm_sp, const lldb::InstructionSP &inst_sp) @@ -76,13 +74,11 @@ SBInstruction::SBInstruction(const lldb::DisassemblerSP &disasm_sp, SBInstruction::SBInstruction(const SBInstruction &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBInstruction, (const lldb::SBInstruction &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBInstruction &SBInstruction::operator=(const SBInstruction &rhs) { - LLDB_RECORD_METHOD(const lldb::SBInstruction &, - SBInstruction, operator=,(const lldb::SBInstruction &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; @@ -92,17 +88,17 @@ const SBInstruction &SBInstruction::operator=(const SBInstruction &rhs) { SBInstruction::~SBInstruction() = default; bool SBInstruction::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBInstruction::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInstruction, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp && m_opaque_sp->IsValid(); } SBAddress SBInstruction::GetAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBInstruction, GetAddress); + LLDB_INSTRUMENT_VA(this); SBAddress sb_addr; lldb::InstructionSP inst_sp(GetOpaque()); @@ -112,8 +108,7 @@ SBAddress SBInstruction::GetAddress() { } const char *SBInstruction::GetMnemonic(SBTarget target) { - LLDB_RECORD_METHOD(const char *, SBInstruction, GetMnemonic, (lldb::SBTarget), - target); + LLDB_INSTRUMENT_VA(this, target); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) { @@ -132,8 +127,7 @@ const char *SBInstruction::GetMnemonic(SBTarget target) { } const char *SBInstruction::GetOperands(SBTarget target) { - LLDB_RECORD_METHOD(const char *, SBInstruction, GetOperands, (lldb::SBTarget), - target); + LLDB_INSTRUMENT_VA(this, target); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) { @@ -152,8 +146,7 @@ const char *SBInstruction::GetOperands(SBTarget target) { } const char *SBInstruction::GetComment(SBTarget target) { - LLDB_RECORD_METHOD(const char *, SBInstruction, GetComment, (lldb::SBTarget), - target); + LLDB_INSTRUMENT_VA(this, target); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) { @@ -172,7 +165,7 @@ const char *SBInstruction::GetComment(SBTarget target) { } size_t SBInstruction::GetByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBInstruction, GetByteSize); + LLDB_INSTRUMENT_VA(this); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) @@ -181,8 +174,7 @@ size_t SBInstruction::GetByteSize() { } SBData SBInstruction::GetData(SBTarget target) { - LLDB_RECORD_METHOD(lldb::SBData, SBInstruction, GetData, (lldb::SBTarget), - target); + LLDB_INSTRUMENT_VA(this, target); lldb::SBData sb_data; lldb::InstructionSP inst_sp(GetOpaque()); @@ -196,7 +188,7 @@ SBData SBInstruction::GetData(SBTarget target) { } bool SBInstruction::DoesBranch() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, DoesBranch); + LLDB_INSTRUMENT_VA(this); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) @@ -205,7 +197,7 @@ bool SBInstruction::DoesBranch() { } bool SBInstruction::HasDelaySlot() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, HasDelaySlot); + LLDB_INSTRUMENT_VA(this); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) @@ -214,7 +206,7 @@ bool SBInstruction::HasDelaySlot() { } bool SBInstruction::CanSetBreakpoint() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, CanSetBreakpoint); + LLDB_INSTRUMENT_VA(this); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) @@ -235,8 +227,7 @@ void SBInstruction::SetOpaque(const lldb::DisassemblerSP &disasm_sp, } bool SBInstruction::GetDescription(lldb::SBStream &s) { - LLDB_RECORD_METHOD(bool, SBInstruction, GetDescription, (lldb::SBStream &), - s); + LLDB_INSTRUMENT_VA(this, s); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) { @@ -257,18 +248,18 @@ bool SBInstruction::GetDescription(lldb::SBStream &s) { } void SBInstruction::Print(FILE *outp) { - LLDB_RECORD_METHOD(void, SBInstruction, Print, (FILE *), outp); + LLDB_INSTRUMENT_VA(this, outp); FileSP out = std::make_shared(outp, /*take_ownership=*/false); Print(out); } void SBInstruction::Print(SBFile out) { - LLDB_RECORD_METHOD(void, SBInstruction, Print, (SBFile), out); + LLDB_INSTRUMENT_VA(this, out); Print(out.m_opaque_sp); } void SBInstruction::Print(FileSP out_sp) { - LLDB_RECORD_METHOD(void, SBInstruction, Print, (FileSP), out_sp); + LLDB_INSTRUMENT_VA(this, out_sp); if (!out_sp || !out_sp->IsValid()) return; @@ -291,8 +282,7 @@ void SBInstruction::Print(FileSP out_sp) { bool SBInstruction::EmulateWithFrame(lldb::SBFrame &frame, uint32_t evaluate_options) { - LLDB_RECORD_METHOD(bool, SBInstruction, EmulateWithFrame, - (lldb::SBFrame &, uint32_t), frame, evaluate_options); + LLDB_INSTRUMENT_VA(this, frame, evaluate_options); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp) { @@ -316,8 +306,7 @@ bool SBInstruction::EmulateWithFrame(lldb::SBFrame &frame, } bool SBInstruction::DumpEmulation(const char *triple) { - LLDB_RECORD_METHOD(bool, SBInstruction, DumpEmulation, (const char *), - triple); + LLDB_INSTRUMENT_VA(this, triple); lldb::InstructionSP inst_sp(GetOpaque()); if (inst_sp && triple) { @@ -328,9 +317,7 @@ bool SBInstruction::DumpEmulation(const char *triple) { bool SBInstruction::TestEmulation(lldb::SBStream &output_stream, const char *test_file) { - LLDB_RECORD_METHOD(bool, SBInstruction, TestEmulation, - (lldb::SBStream &, const char *), output_stream, - test_file); + LLDB_INSTRUMENT_VA(this, output_stream, test_file); if (!m_opaque_sp) SetOpaque(lldb::DisassemblerSP(), diff --git a/lldb/source/API/SBInstructionList.cpp b/lldb/source/API/SBInstructionList.cpp index 241c497b7fc39..e289e8e9343d9 100644 --- a/lldb/source/API/SBInstructionList.cpp +++ b/lldb/source/API/SBInstructionList.cpp @@ -7,35 +7,30 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBInstructionList.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBAddress.h" +#include "lldb/API/SBFile.h" #include "lldb/API/SBInstruction.h" #include "lldb/API/SBStream.h" -#include "lldb/API/SBFile.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/Module.h" #include "lldb/Core/StreamFile.h" #include "lldb/Symbol/SymbolContext.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" using namespace lldb; using namespace lldb_private; -SBInstructionList::SBInstructionList() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBInstructionList); -} +SBInstructionList::SBInstructionList() { LLDB_INSTRUMENT_VA(this); } SBInstructionList::SBInstructionList(const SBInstructionList &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBInstructionList, (const lldb::SBInstructionList &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBInstructionList &SBInstructionList:: operator=(const SBInstructionList &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBInstructionList &, - SBInstructionList, operator=,(const lldb::SBInstructionList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; @@ -45,17 +40,17 @@ operator=(const SBInstructionList &rhs) { SBInstructionList::~SBInstructionList() = default; bool SBInstructionList::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInstructionList, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBInstructionList::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInstructionList, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } size_t SBInstructionList::GetSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBInstructionList, GetSize); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) return m_opaque_sp->GetInstructionList().GetSize(); @@ -63,8 +58,7 @@ size_t SBInstructionList::GetSize() { } SBInstruction SBInstructionList::GetInstructionAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBInstruction, SBInstructionList, - GetInstructionAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBInstruction inst; if (m_opaque_sp && idx < m_opaque_sp->GetInstructionList().GetSize()) @@ -77,9 +71,7 @@ SBInstruction SBInstructionList::GetInstructionAtIndex(uint32_t idx) { size_t SBInstructionList::GetInstructionsCount(const SBAddress &start, const SBAddress &end, bool canSetBreakpoint) { - LLDB_RECORD_METHOD(size_t, SBInstructionList, GetInstructionsCount, - (const lldb::SBAddress &, const lldb::SBAddress &, bool), - start, end, canSetBreakpoint); + LLDB_INSTRUMENT_VA(this, start, end, canSetBreakpoint); size_t num_instructions = GetSize(); size_t i = 0; @@ -104,14 +96,13 @@ size_t SBInstructionList::GetInstructionsCount(const SBAddress &start, } void SBInstructionList::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBInstructionList, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp.reset(); } void SBInstructionList::AppendInstruction(SBInstruction insn) { - LLDB_RECORD_METHOD(void, SBInstructionList, AppendInstruction, - (lldb::SBInstruction), insn); + LLDB_INSTRUMENT_VA(this, insn); } void SBInstructionList::SetDisassembler(const lldb::DisassemblerSP &opaque_sp) { @@ -119,7 +110,7 @@ void SBInstructionList::SetDisassembler(const lldb::DisassemblerSP &opaque_sp) { } void SBInstructionList::Print(FILE *out) { - LLDB_RECORD_METHOD(void, SBInstructionList, Print, (FILE *), out); + LLDB_INSTRUMENT_VA(this, out); if (out == nullptr) return; StreamFile stream(out, false); @@ -127,7 +118,7 @@ void SBInstructionList::Print(FILE *out) { } void SBInstructionList::Print(SBFile out) { - LLDB_RECORD_METHOD(void, SBInstructionList, Print, (SBFile), out); + LLDB_INSTRUMENT_VA(this, out); if (!out.IsValid()) return; StreamFile stream(out.m_opaque_sp); @@ -135,7 +126,7 @@ void SBInstructionList::Print(SBFile out) { } void SBInstructionList::Print(FileSP out_sp) { - LLDB_RECORD_METHOD(void, SBInstructionList, Print, (FileSP), out_sp); + LLDB_INSTRUMENT_VA(this, out_sp); if (!out_sp || !out_sp->IsValid()) return; StreamFile stream(out_sp); @@ -143,8 +134,7 @@ void SBInstructionList::Print(FileSP out_sp) { } bool SBInstructionList::GetDescription(lldb::SBStream &stream) { - LLDB_RECORD_METHOD(bool, SBInstructionList, GetDescription, - (lldb::SBStream &), stream); + LLDB_INSTRUMENT_VA(this, stream); return GetDescription(stream.ref()); } @@ -186,8 +176,7 @@ bool SBInstructionList::GetDescription(Stream &sref) { } bool SBInstructionList::DumpEmulationForAllInstructions(const char *triple) { - LLDB_RECORD_METHOD(bool, SBInstructionList, DumpEmulationForAllInstructions, - (const char *), triple); + LLDB_INSTRUMENT_VA(this, triple); if (m_opaque_sp) { size_t len = GetSize(); diff --git a/lldb/source/API/SBLanguageRuntime.cpp b/lldb/source/API/SBLanguageRuntime.cpp index 781c985fc042b..d571f282fce03 100644 --- a/lldb/source/API/SBLanguageRuntime.cpp +++ b/lldb/source/API/SBLanguageRuntime.cpp @@ -7,25 +7,22 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBLanguageRuntime.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/Target/Language.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; lldb::LanguageType SBLanguageRuntime::GetLanguageTypeFromString(const char *string) { - LLDB_RECORD_STATIC_METHOD(lldb::LanguageType, SBLanguageRuntime, - GetLanguageTypeFromString, (const char *), string); + LLDB_INSTRUMENT_VA(string); return Language::GetLanguageTypeFromString(llvm::StringRef(string)); } const char * SBLanguageRuntime::GetNameForLanguageType(lldb::LanguageType language) { - LLDB_RECORD_STATIC_METHOD(const char *, SBLanguageRuntime, - GetNameForLanguageType, (lldb::LanguageType), - language); + LLDB_INSTRUMENT_VA(language); return Language::GetNameForLanguageType(language); } diff --git a/lldb/source/API/SBLaunchInfo.cpp b/lldb/source/API/SBLaunchInfo.cpp index f47fa5a4ba200..5149feba5e0ba 100644 --- a/lldb/source/API/SBLaunchInfo.cpp +++ b/lldb/source/API/SBLaunchInfo.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBLaunchInfo.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBEnvironment.h" #include "lldb/API/SBError.h" @@ -40,7 +40,7 @@ class lldb_private::SBLaunchInfoImpl : public ProcessLaunchInfo { SBLaunchInfo::SBLaunchInfo(const char **argv) : m_opaque_sp(new SBLaunchInfoImpl()) { - LLDB_RECORD_CONSTRUCTOR(SBLaunchInfo, (const char **), argv); + LLDB_INSTRUMENT_VA(this, argv); m_opaque_sp->GetFlags().Reset(eLaunchFlagDebug | eLaunchFlagDisableASLR); if (argv && argv[0]) @@ -48,14 +48,13 @@ SBLaunchInfo::SBLaunchInfo(const char **argv) } SBLaunchInfo::SBLaunchInfo(const SBLaunchInfo &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBLaunchInfo, (const lldb::SBLaunchInfo &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = rhs.m_opaque_sp; } SBLaunchInfo &SBLaunchInfo::operator=(const SBLaunchInfo &rhs) { - LLDB_RECORD_METHOD(SBLaunchInfo &, - SBLaunchInfo, operator=,(const lldb::SBLaunchInfo &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = rhs.m_opaque_sp; return *this; @@ -72,90 +71,86 @@ void SBLaunchInfo::set_ref(const ProcessLaunchInfo &info) { } lldb::pid_t SBLaunchInfo::GetProcessID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBLaunchInfo, GetProcessID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetProcessID(); } uint32_t SBLaunchInfo::GetUserID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetUserID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetUserID(); } uint32_t SBLaunchInfo::GetGroupID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetGroupID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetGroupID(); } bool SBLaunchInfo::UserIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBLaunchInfo, UserIDIsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->UserIDIsValid(); } bool SBLaunchInfo::GroupIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBLaunchInfo, GroupIDIsValid); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GroupIDIsValid(); } void SBLaunchInfo::SetUserID(uint32_t uid) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetUserID, (uint32_t), uid); + LLDB_INSTRUMENT_VA(this, uid); m_opaque_sp->SetUserID(uid); } void SBLaunchInfo::SetGroupID(uint32_t gid) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetGroupID, (uint32_t), gid); + LLDB_INSTRUMENT_VA(this, gid); m_opaque_sp->SetGroupID(gid); } SBFileSpec SBLaunchInfo::GetExecutableFile() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBLaunchInfo, GetExecutableFile); + LLDB_INSTRUMENT_VA(this); return SBFileSpec(m_opaque_sp->GetExecutableFile()); } void SBLaunchInfo::SetExecutableFile(SBFileSpec exe_file, bool add_as_first_arg) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetExecutableFile, - (lldb::SBFileSpec, bool), exe_file, add_as_first_arg); + LLDB_INSTRUMENT_VA(this, exe_file, add_as_first_arg); m_opaque_sp->SetExecutableFile(exe_file.ref(), add_as_first_arg); } SBListener SBLaunchInfo::GetListener() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBListener, SBLaunchInfo, GetListener); + LLDB_INSTRUMENT_VA(this); return SBListener(m_opaque_sp->GetListener()); } void SBLaunchInfo::SetListener(SBListener &listener) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetListener, (lldb::SBListener &), - listener); + LLDB_INSTRUMENT_VA(this, listener); m_opaque_sp->SetListener(listener.GetSP()); } uint32_t SBLaunchInfo::GetNumArguments() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetNumArguments); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetArguments().GetArgumentCount(); } const char *SBLaunchInfo::GetArgumentAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(const char *, SBLaunchInfo, GetArgumentAtIndex, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); return m_opaque_sp->GetArguments().GetArgumentAtIndex(idx); } void SBLaunchInfo::SetArguments(const char **argv, bool append) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetArguments, (const char **, bool), - argv, append); + LLDB_INSTRUMENT_VA(this, argv, append); if (append) { if (argv) @@ -169,14 +164,13 @@ void SBLaunchInfo::SetArguments(const char **argv, bool append) { } uint32_t SBLaunchInfo::GetNumEnvironmentEntries() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetNumEnvironmentEntries); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetEnvironment().size(); } const char *SBLaunchInfo::GetEnvironmentEntryAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(const char *, SBLaunchInfo, GetEnvironmentEntryAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); if (idx > GetNumEnvironmentEntries()) return nullptr; @@ -184,14 +178,12 @@ const char *SBLaunchInfo::GetEnvironmentEntryAtIndex(uint32_t idx) { } void SBLaunchInfo::SetEnvironmentEntries(const char **envp, bool append) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetEnvironmentEntries, - (const char **, bool), envp, append); + LLDB_INSTRUMENT_VA(this, envp, append); SetEnvironment(SBEnvironment(Environment(envp)), append); } void SBLaunchInfo::SetEnvironment(const SBEnvironment &env, bool append) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetEnvironment, - (const lldb::SBEnvironment &, bool), env, append); + LLDB_INSTRUMENT_VA(this, env, append); Environment &refEnv = env.ref(); if (append) { for (auto &KV : refEnv) @@ -202,57 +194,54 @@ void SBLaunchInfo::SetEnvironment(const SBEnvironment &env, bool append) { } SBEnvironment SBLaunchInfo::GetEnvironment() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBEnvironment, SBLaunchInfo, GetEnvironment); + LLDB_INSTRUMENT_VA(this); return SBEnvironment(Environment(m_opaque_sp->GetEnvironment())); } void SBLaunchInfo::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBLaunchInfo, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp->Clear(); } const char *SBLaunchInfo::GetWorkingDirectory() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBLaunchInfo, - GetWorkingDirectory); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetWorkingDirectory().GetCString(); } void SBLaunchInfo::SetWorkingDirectory(const char *working_dir) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetWorkingDirectory, (const char *), - working_dir); + LLDB_INSTRUMENT_VA(this, working_dir); m_opaque_sp->SetWorkingDirectory(FileSpec(working_dir)); } uint32_t SBLaunchInfo::GetLaunchFlags() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetLaunchFlags); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetFlags().Get(); } void SBLaunchInfo::SetLaunchFlags(uint32_t flags) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetLaunchFlags, (uint32_t), flags); + LLDB_INSTRUMENT_VA(this, flags); m_opaque_sp->GetFlags().Reset(flags); } const char *SBLaunchInfo::GetProcessPluginName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBLaunchInfo, GetProcessPluginName); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetProcessPluginName(); } void SBLaunchInfo::SetProcessPluginName(const char *plugin_name) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetProcessPluginName, (const char *), - plugin_name); + LLDB_INSTRUMENT_VA(this, plugin_name); return m_opaque_sp->SetProcessPluginName(plugin_name); } const char *SBLaunchInfo::GetShell() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBLaunchInfo, GetShell); + LLDB_INSTRUMENT_VA(this); // Constify this string so that it is saved in the string pool. Otherwise it // would be freed when this function goes out of scope. @@ -261,93 +250,86 @@ const char *SBLaunchInfo::GetShell() { } void SBLaunchInfo::SetShell(const char *path) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetShell, (const char *), path); + LLDB_INSTRUMENT_VA(this, path); m_opaque_sp->SetShell(FileSpec(path)); } bool SBLaunchInfo::GetShellExpandArguments() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBLaunchInfo, GetShellExpandArguments); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetShellExpandArguments(); } void SBLaunchInfo::SetShellExpandArguments(bool expand) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetShellExpandArguments, (bool), - expand); + LLDB_INSTRUMENT_VA(this, expand); m_opaque_sp->SetShellExpandArguments(expand); } uint32_t SBLaunchInfo::GetResumeCount() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetResumeCount); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetResumeCount(); } void SBLaunchInfo::SetResumeCount(uint32_t c) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetResumeCount, (uint32_t), c); + LLDB_INSTRUMENT_VA(this, c); m_opaque_sp->SetResumeCount(c); } bool SBLaunchInfo::AddCloseFileAction(int fd) { - LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddCloseFileAction, (int), fd); + LLDB_INSTRUMENT_VA(this, fd); return m_opaque_sp->AppendCloseFileAction(fd); } bool SBLaunchInfo::AddDuplicateFileAction(int fd, int dup_fd) { - LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddDuplicateFileAction, (int, int), fd, - dup_fd); + LLDB_INSTRUMENT_VA(this, fd, dup_fd); return m_opaque_sp->AppendDuplicateFileAction(fd, dup_fd); } bool SBLaunchInfo::AddOpenFileAction(int fd, const char *path, bool read, bool write) { - LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddOpenFileAction, - (int, const char *, bool, bool), fd, path, read, write); + LLDB_INSTRUMENT_VA(this, fd, path, read, write); return m_opaque_sp->AppendOpenFileAction(fd, FileSpec(path), read, write); } bool SBLaunchInfo::AddSuppressFileAction(int fd, bool read, bool write) { - LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddSuppressFileAction, - (int, bool, bool), fd, read, write); + LLDB_INSTRUMENT_VA(this, fd, read, write); return m_opaque_sp->AppendSuppressFileAction(fd, read, write); } void SBLaunchInfo::SetLaunchEventData(const char *data) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetLaunchEventData, (const char *), - data); + LLDB_INSTRUMENT_VA(this, data); m_opaque_sp->SetLaunchEventData(data); } const char *SBLaunchInfo::GetLaunchEventData() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBLaunchInfo, - GetLaunchEventData); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetLaunchEventData(); } void SBLaunchInfo::SetDetachOnError(bool enable) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetDetachOnError, (bool), enable); + LLDB_INSTRUMENT_VA(this, enable); m_opaque_sp->SetDetachOnError(enable); } bool SBLaunchInfo::GetDetachOnError() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBLaunchInfo, GetDetachOnError); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetDetachOnError(); } const char *SBLaunchInfo::GetScriptedProcessClassName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBLaunchInfo, - GetScriptedProcessClassName); + LLDB_INSTRUMENT_VA(this); // Constify this string so that it is saved in the string pool. Otherwise it // would be freed when this function goes out of scope. @@ -356,15 +338,13 @@ const char *SBLaunchInfo::GetScriptedProcessClassName() const { } void SBLaunchInfo::SetScriptedProcessClassName(const char *class_name) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetScriptedProcessClassName, - (const char *), class_name); + LLDB_INSTRUMENT_VA(this, class_name); m_opaque_sp->SetScriptedProcessClassName(class_name); } lldb::SBStructuredData SBLaunchInfo::GetScriptedProcessDictionary() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBStructuredData, SBLaunchInfo, - GetScriptedProcessDictionary); + LLDB_INSTRUMENT_VA(this); lldb_private::StructuredData::DictionarySP dict_sp = m_opaque_sp->GetScriptedProcessDictionarySP(); @@ -376,8 +356,7 @@ lldb::SBStructuredData SBLaunchInfo::GetScriptedProcessDictionary() const { } void SBLaunchInfo::SetScriptedProcessDictionary(lldb::SBStructuredData dict) { - LLDB_RECORD_METHOD(void, SBLaunchInfo, SetScriptedProcessDictionary, - (lldb::SBStructuredData), dict); + LLDB_INSTRUMENT_VA(this, dict); if (!dict.IsValid() || !dict.m_impl_up) return; diff --git a/lldb/source/API/SBLineEntry.cpp b/lldb/source/API/SBLineEntry.cpp index 52623b6245346..28d12e65fdaf8 100644 --- a/lldb/source/API/SBLineEntry.cpp +++ b/lldb/source/API/SBLineEntry.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBLineEntry.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/Host/PosixApi.h" #include "lldb/Symbol/LineEntry.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/StreamString.h" #include @@ -19,10 +19,10 @@ using namespace lldb; using namespace lldb_private; -SBLineEntry::SBLineEntry() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBLineEntry); } +SBLineEntry::SBLineEntry() { LLDB_INSTRUMENT_VA(this); } SBLineEntry::SBLineEntry(const SBLineEntry &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBLineEntry, (const lldb::SBLineEntry &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -33,8 +33,7 @@ SBLineEntry::SBLineEntry(const lldb_private::LineEntry *lldb_object_ptr) { } const SBLineEntry &SBLineEntry::operator=(const SBLineEntry &rhs) { - LLDB_RECORD_METHOD(const lldb::SBLineEntry &, - SBLineEntry, operator=,(const lldb::SBLineEntry &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -48,8 +47,7 @@ void SBLineEntry::SetLineEntry(const lldb_private::LineEntry &lldb_object_ref) { SBLineEntry::~SBLineEntry() = default; SBAddress SBLineEntry::GetStartAddress() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBLineEntry, - GetStartAddress); + LLDB_INSTRUMENT_VA(this); SBAddress sb_address; if (m_opaque_up) @@ -59,7 +57,7 @@ SBAddress SBLineEntry::GetStartAddress() const { } SBAddress SBLineEntry::GetEndAddress() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBLineEntry, GetEndAddress); + LLDB_INSTRUMENT_VA(this); SBAddress sb_address; if (m_opaque_up) { @@ -70,17 +68,17 @@ SBAddress SBLineEntry::GetEndAddress() const { } bool SBLineEntry::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBLineEntry, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBLineEntry::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBLineEntry, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up.get() && m_opaque_up->IsValid(); } SBFileSpec SBLineEntry::GetFileSpec() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBLineEntry, GetFileSpec); + LLDB_INSTRUMENT_VA(this); SBFileSpec sb_file_spec; if (m_opaque_up.get() && m_opaque_up->file) @@ -90,7 +88,7 @@ SBFileSpec SBLineEntry::GetFileSpec() const { } uint32_t SBLineEntry::GetLine() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBLineEntry, GetLine); + LLDB_INSTRUMENT_VA(this); uint32_t line = 0; if (m_opaque_up) @@ -100,7 +98,7 @@ uint32_t SBLineEntry::GetLine() const { } uint32_t SBLineEntry::GetColumn() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBLineEntry, GetColumn); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->column; @@ -108,8 +106,7 @@ uint32_t SBLineEntry::GetColumn() const { } void SBLineEntry::SetFileSpec(lldb::SBFileSpec filespec) { - LLDB_RECORD_METHOD(void, SBLineEntry, SetFileSpec, (lldb::SBFileSpec), - filespec); + LLDB_INSTRUMENT_VA(this, filespec); if (filespec.IsValid()) ref().file = filespec.ref(); @@ -117,20 +114,19 @@ void SBLineEntry::SetFileSpec(lldb::SBFileSpec filespec) { ref().file.Clear(); } void SBLineEntry::SetLine(uint32_t line) { - LLDB_RECORD_METHOD(void, SBLineEntry, SetLine, (uint32_t), line); + LLDB_INSTRUMENT_VA(this, line); ref().line = line; } void SBLineEntry::SetColumn(uint32_t column) { - LLDB_RECORD_METHOD(void, SBLineEntry, SetColumn, (uint32_t), column); + LLDB_INSTRUMENT_VA(this, column); ref().line = column; } bool SBLineEntry::operator==(const SBLineEntry &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBLineEntry, operator==,(const lldb::SBLineEntry &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); lldb_private::LineEntry *lhs_ptr = m_opaque_up.get(); lldb_private::LineEntry *rhs_ptr = rhs.m_opaque_up.get(); @@ -142,8 +138,7 @@ bool SBLineEntry::operator==(const SBLineEntry &rhs) const { } bool SBLineEntry::operator!=(const SBLineEntry &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBLineEntry, operator!=,(const lldb::SBLineEntry &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); lldb_private::LineEntry *lhs_ptr = m_opaque_up.get(); lldb_private::LineEntry *rhs_ptr = rhs.m_opaque_up.get(); @@ -167,8 +162,7 @@ lldb_private::LineEntry &SBLineEntry::ref() { const lldb_private::LineEntry &SBLineEntry::ref() const { return *m_opaque_up; } bool SBLineEntry::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBLineEntry, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBListener.cpp b/lldb/source/API/SBListener.cpp index 50e7abd951d0b..2ce17a5f521d7 100644 --- a/lldb/source/API/SBListener.cpp +++ b/lldb/source/API/SBListener.cpp @@ -7,34 +7,33 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBListener.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBBroadcaster.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBEvent.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Debugger.h" #include "lldb/Utility/Broadcaster.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Listener.h" #include "lldb/Utility/StreamString.h" using namespace lldb; using namespace lldb_private; -SBListener::SBListener() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBListener); } +SBListener::SBListener() { LLDB_INSTRUMENT_VA(this); } SBListener::SBListener(const char *name) : m_opaque_sp(Listener::MakeListener(name)), m_unused_ptr(nullptr) { - LLDB_RECORD_CONSTRUCTOR(SBListener, (const char *), name); + LLDB_INSTRUMENT_VA(this, name); } SBListener::SBListener(const SBListener &rhs) : m_opaque_sp(rhs.m_opaque_sp), m_unused_ptr(nullptr) { - LLDB_RECORD_CONSTRUCTOR(SBListener, (const lldb::SBListener &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const lldb::SBListener &SBListener::operator=(const lldb::SBListener &rhs) { - LLDB_RECORD_METHOD(const lldb::SBListener &, - SBListener, operator=,(const lldb::SBListener &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -49,18 +48,17 @@ SBListener::SBListener(const lldb::ListenerSP &listener_sp) SBListener::~SBListener() = default; bool SBListener::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBListener, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBListener::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBListener, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp != nullptr; } void SBListener::AddEvent(const SBEvent &event) { - LLDB_RECORD_METHOD(void, SBListener, AddEvent, (const lldb::SBEvent &), - event); + LLDB_INSTRUMENT_VA(this, event); EventSP &event_sp = event.GetSP(); if (event_sp) @@ -68,7 +66,7 @@ void SBListener::AddEvent(const SBEvent &event) { } void SBListener::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBListener, Clear); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) m_opaque_sp->Clear(); @@ -77,9 +75,7 @@ void SBListener::Clear() { uint32_t SBListener::StartListeningForEventClass(SBDebugger &debugger, const char *broadcaster_class, uint32_t event_mask) { - LLDB_RECORD_METHOD(uint32_t, SBListener, StartListeningForEventClass, - (lldb::SBDebugger &, const char *, uint32_t), debugger, - broadcaster_class, event_mask); + LLDB_INSTRUMENT_VA(this, debugger, broadcaster_class, event_mask); if (m_opaque_sp) { Debugger *lldb_debugger = debugger.get(); @@ -95,9 +91,7 @@ uint32_t SBListener::StartListeningForEventClass(SBDebugger &debugger, bool SBListener::StopListeningForEventClass(SBDebugger &debugger, const char *broadcaster_class, uint32_t event_mask) { - LLDB_RECORD_METHOD(bool, SBListener, StopListeningForEventClass, - (lldb::SBDebugger &, const char *, uint32_t), debugger, - broadcaster_class, event_mask); + LLDB_INSTRUMENT_VA(this, debugger, broadcaster_class, event_mask); if (m_opaque_sp) { Debugger *lldb_debugger = debugger.get(); @@ -112,9 +106,7 @@ bool SBListener::StopListeningForEventClass(SBDebugger &debugger, uint32_t SBListener::StartListeningForEvents(const SBBroadcaster &broadcaster, uint32_t event_mask) { - LLDB_RECORD_METHOD(uint32_t, SBListener, StartListeningForEvents, - (const lldb::SBBroadcaster &, uint32_t), broadcaster, - event_mask); + LLDB_INSTRUMENT_VA(this, broadcaster, event_mask); uint32_t acquired_event_mask = 0; if (m_opaque_sp && broadcaster.IsValid()) { @@ -127,9 +119,7 @@ uint32_t SBListener::StartListeningForEvents(const SBBroadcaster &broadcaster, bool SBListener::StopListeningForEvents(const SBBroadcaster &broadcaster, uint32_t event_mask) { - LLDB_RECORD_METHOD(bool, SBListener, StopListeningForEvents, - (const lldb::SBBroadcaster &, uint32_t), broadcaster, - event_mask); + LLDB_INSTRUMENT_VA(this, broadcaster, event_mask); if (m_opaque_sp && broadcaster.IsValid()) { return m_opaque_sp->StopListeningForEvents(broadcaster.get(), event_mask); @@ -138,8 +128,7 @@ bool SBListener::StopListeningForEvents(const SBBroadcaster &broadcaster, } bool SBListener::WaitForEvent(uint32_t timeout_secs, SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, WaitForEvent, - (uint32_t, lldb::SBEvent &), timeout_secs, event); + LLDB_INSTRUMENT_VA(this, timeout_secs, event); bool success = false; @@ -165,9 +154,7 @@ bool SBListener::WaitForEvent(uint32_t timeout_secs, SBEvent &event) { bool SBListener::WaitForEventForBroadcaster(uint32_t num_seconds, const SBBroadcaster &broadcaster, SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, WaitForEventForBroadcaster, - (uint32_t, const lldb::SBBroadcaster &, lldb::SBEvent &), - num_seconds, broadcaster, event); + LLDB_INSTRUMENT_VA(this, num_seconds, broadcaster, event); if (m_opaque_sp && broadcaster.IsValid()) { Timeout timeout(llvm::None); @@ -187,10 +174,7 @@ bool SBListener::WaitForEventForBroadcaster(uint32_t num_seconds, bool SBListener::WaitForEventForBroadcasterWithType( uint32_t num_seconds, const SBBroadcaster &broadcaster, uint32_t event_type_mask, SBEvent &event) { - LLDB_RECORD_METHOD( - bool, SBListener, WaitForEventForBroadcasterWithType, - (uint32_t, const lldb::SBBroadcaster &, uint32_t, lldb::SBEvent &), - num_seconds, broadcaster, event_type_mask, event); + LLDB_INSTRUMENT_VA(this, num_seconds, broadcaster, event_type_mask, event); if (m_opaque_sp && broadcaster.IsValid()) { Timeout timeout(llvm::None); @@ -208,8 +192,7 @@ bool SBListener::WaitForEventForBroadcasterWithType( } bool SBListener::PeekAtNextEvent(SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, PeekAtNextEvent, (lldb::SBEvent &), - event); + LLDB_INSTRUMENT_VA(this, event); if (m_opaque_sp) { event.reset(m_opaque_sp->PeekAtNextEvent()); @@ -221,9 +204,7 @@ bool SBListener::PeekAtNextEvent(SBEvent &event) { bool SBListener::PeekAtNextEventForBroadcaster(const SBBroadcaster &broadcaster, SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, PeekAtNextEventForBroadcaster, - (const lldb::SBBroadcaster &, lldb::SBEvent &), - broadcaster, event); + LLDB_INSTRUMENT_VA(this, broadcaster, event); if (m_opaque_sp && broadcaster.IsValid()) { event.reset(m_opaque_sp->PeekAtNextEventForBroadcaster(broadcaster.get())); @@ -236,9 +217,7 @@ bool SBListener::PeekAtNextEventForBroadcaster(const SBBroadcaster &broadcaster, bool SBListener::PeekAtNextEventForBroadcasterWithType( const SBBroadcaster &broadcaster, uint32_t event_type_mask, SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, PeekAtNextEventForBroadcasterWithType, - (const lldb::SBBroadcaster &, uint32_t, lldb::SBEvent &), - broadcaster, event_type_mask, event); + LLDB_INSTRUMENT_VA(this, broadcaster, event_type_mask, event); if (m_opaque_sp && broadcaster.IsValid()) { event.reset(m_opaque_sp->PeekAtNextEventForBroadcasterWithType( @@ -250,7 +229,7 @@ bool SBListener::PeekAtNextEventForBroadcasterWithType( } bool SBListener::GetNextEvent(SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, GetNextEvent, (lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(this, event); if (m_opaque_sp) { EventSP event_sp; @@ -265,9 +244,7 @@ bool SBListener::GetNextEvent(SBEvent &event) { bool SBListener::GetNextEventForBroadcaster(const SBBroadcaster &broadcaster, SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, GetNextEventForBroadcaster, - (const lldb::SBBroadcaster &, lldb::SBEvent &), - broadcaster, event); + LLDB_INSTRUMENT_VA(this, broadcaster, event); if (m_opaque_sp && broadcaster.IsValid()) { EventSP event_sp; @@ -284,9 +261,7 @@ bool SBListener::GetNextEventForBroadcaster(const SBBroadcaster &broadcaster, bool SBListener::GetNextEventForBroadcasterWithType( const SBBroadcaster &broadcaster, uint32_t event_type_mask, SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, GetNextEventForBroadcasterWithType, - (const lldb::SBBroadcaster &, uint32_t, lldb::SBEvent &), - broadcaster, event_type_mask, event); + LLDB_INSTRUMENT_VA(this, broadcaster, event_type_mask, event); if (m_opaque_sp && broadcaster.IsValid()) { EventSP event_sp; @@ -302,8 +277,7 @@ bool SBListener::GetNextEventForBroadcasterWithType( } bool SBListener::HandleBroadcastEvent(const SBEvent &event) { - LLDB_RECORD_METHOD(bool, SBListener, HandleBroadcastEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(this, event); if (m_opaque_sp) return m_opaque_sp->HandleBroadcastEvent(event.GetSP()); diff --git a/lldb/source/API/SBMemoryRegionInfo.cpp b/lldb/source/API/SBMemoryRegionInfo.cpp index 1a0fb7c74230c..7d9db478dde17 100644 --- a/lldb/source/API/SBMemoryRegionInfo.cpp +++ b/lldb/source/API/SBMemoryRegionInfo.cpp @@ -7,29 +7,26 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBMemoryRegionInfo.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBError.h" #include "lldb/API/SBStream.h" #include "lldb/Target/MemoryRegionInfo.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/StreamString.h" using namespace lldb; using namespace lldb_private; SBMemoryRegionInfo::SBMemoryRegionInfo() : m_opaque_up(new MemoryRegionInfo()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBMemoryRegionInfo); + LLDB_INSTRUMENT_VA(this); } SBMemoryRegionInfo::SBMemoryRegionInfo(const char *name, lldb::addr_t begin, lldb::addr_t end, uint32_t permissions, bool mapped, bool stack_memory) : SBMemoryRegionInfo() { - LLDB_RECORD_CONSTRUCTOR( - SBMemoryRegionInfo, - (const char *, lldb::addr_t, lldb::addr_t, uint32_t, bool, bool), name, - begin, end, permissions, mapped, stack_memory); + LLDB_INSTRUMENT_VA(this, name, begin, end, permissions, mapped, stack_memory); m_opaque_up->SetName(name); m_opaque_up->GetRange().SetRangeBase(begin); m_opaque_up->GetRange().SetRangeEnd(end); @@ -47,16 +44,13 @@ SBMemoryRegionInfo::SBMemoryRegionInfo(const MemoryRegionInfo *lldb_object_ptr) } SBMemoryRegionInfo::SBMemoryRegionInfo(const SBMemoryRegionInfo &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBMemoryRegionInfo, - (const lldb::SBMemoryRegionInfo &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } const SBMemoryRegionInfo &SBMemoryRegionInfo:: operator=(const SBMemoryRegionInfo &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBMemoryRegionInfo &, - SBMemoryRegionInfo, operator=,(const lldb::SBMemoryRegionInfo &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -66,23 +60,19 @@ operator=(const SBMemoryRegionInfo &rhs) { SBMemoryRegionInfo::~SBMemoryRegionInfo() = default; void SBMemoryRegionInfo::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBMemoryRegionInfo, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_up->Clear(); } bool SBMemoryRegionInfo::operator==(const SBMemoryRegionInfo &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBMemoryRegionInfo, operator==,(const lldb::SBMemoryRegionInfo &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return ref() == rhs.ref(); } bool SBMemoryRegionInfo::operator!=(const SBMemoryRegionInfo &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBMemoryRegionInfo, operator!=,(const lldb::SBMemoryRegionInfo &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return ref() != rhs.ref(); } @@ -92,55 +82,55 @@ MemoryRegionInfo &SBMemoryRegionInfo::ref() { return *m_opaque_up; } const MemoryRegionInfo &SBMemoryRegionInfo::ref() const { return *m_opaque_up; } lldb::addr_t SBMemoryRegionInfo::GetRegionBase() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBMemoryRegionInfo, GetRegionBase); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetRange().GetRangeBase(); } lldb::addr_t SBMemoryRegionInfo::GetRegionEnd() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBMemoryRegionInfo, GetRegionEnd); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetRange().GetRangeEnd(); } bool SBMemoryRegionInfo::IsReadable() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsReadable); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetReadable() == MemoryRegionInfo::eYes; } bool SBMemoryRegionInfo::IsWritable() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsWritable); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetWritable() == MemoryRegionInfo::eYes; } bool SBMemoryRegionInfo::IsExecutable() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsExecutable); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetExecutable() == MemoryRegionInfo::eYes; } bool SBMemoryRegionInfo::IsMapped() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsMapped); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetMapped() == MemoryRegionInfo::eYes; } const char *SBMemoryRegionInfo::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBMemoryRegionInfo, GetName); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetName().AsCString(); } bool SBMemoryRegionInfo::HasDirtyMemoryPageList() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, HasDirtyMemoryPageList); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetDirtyPageList().hasValue(); } uint32_t SBMemoryRegionInfo::GetNumDirtyPages() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBMemoryRegionInfo, GetNumDirtyPages); + LLDB_INSTRUMENT_VA(this); uint32_t num_dirty_pages = 0; llvm::Optional> dirty_page_list = @@ -152,8 +142,7 @@ uint32_t SBMemoryRegionInfo::GetNumDirtyPages() { } addr_t SBMemoryRegionInfo::GetDirtyPageAddressAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::addr_t, SBMemoryRegionInfo, - GetDirtyPageAddressAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); addr_t dirty_page_addr = LLDB_INVALID_ADDRESS; const llvm::Optional> &dirty_page_list = @@ -165,14 +154,13 @@ addr_t SBMemoryRegionInfo::GetDirtyPageAddressAtIndex(uint32_t idx) { } int SBMemoryRegionInfo::GetPageSize() { - LLDB_RECORD_METHOD_NO_ARGS(int, SBMemoryRegionInfo, GetPageSize); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetPageSize(); } bool SBMemoryRegionInfo::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBMemoryRegionInfo, GetDescription, - (lldb::SBStream &), description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); const addr_t load_addr = m_opaque_up->GetRange().base; diff --git a/lldb/source/API/SBMemoryRegionInfoList.cpp b/lldb/source/API/SBMemoryRegionInfoList.cpp index 2de0f07006664..39dee86dc3007 100644 --- a/lldb/source/API/SBMemoryRegionInfoList.cpp +++ b/lldb/source/API/SBMemoryRegionInfoList.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBMemoryRegionInfoList.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBMemoryRegionInfo.h" #include "lldb/API/SBStream.h" #include "lldb/Target/MemoryRegionInfo.h" +#include "lldb/Utility/Instrumentation.h" #include @@ -83,24 +83,20 @@ const MemoryRegionInfos &SBMemoryRegionInfoList::ref() const { SBMemoryRegionInfoList::SBMemoryRegionInfoList() : m_opaque_up(new MemoryRegionInfoListImpl()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBMemoryRegionInfoList); + LLDB_INSTRUMENT_VA(this); } SBMemoryRegionInfoList::SBMemoryRegionInfoList( const SBMemoryRegionInfoList &rhs) : m_opaque_up(new MemoryRegionInfoListImpl(*rhs.m_opaque_up)) { - LLDB_RECORD_CONSTRUCTOR(SBMemoryRegionInfoList, - (const lldb::SBMemoryRegionInfoList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBMemoryRegionInfoList::~SBMemoryRegionInfoList() = default; const SBMemoryRegionInfoList &SBMemoryRegionInfoList:: operator=(const SBMemoryRegionInfoList &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBMemoryRegionInfoList &, - SBMemoryRegionInfoList, operator=,(const lldb::SBMemoryRegionInfoList &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { *m_opaque_up = *rhs.m_opaque_up; @@ -109,44 +105,39 @@ operator=(const SBMemoryRegionInfoList &rhs) { } uint32_t SBMemoryRegionInfoList::GetSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBMemoryRegionInfoList, GetSize); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetSize(); } bool SBMemoryRegionInfoList::GetMemoryRegionContainingAddress( lldb::addr_t addr, SBMemoryRegionInfo ®ion_info) { - LLDB_RECORD_METHOD( - bool, SBMemoryRegionInfoList, GetMemoryRegionContainingAddress, - (lldb::addr_t, lldb::SBMemoryRegionInfo &), addr, region_info); + LLDB_INSTRUMENT_VA(this, addr, region_info); return m_opaque_up->GetMemoryRegionContainingAddress(addr, region_info.ref()); } bool SBMemoryRegionInfoList::GetMemoryRegionAtIndex( uint32_t idx, SBMemoryRegionInfo ®ion_info) { - LLDB_RECORD_METHOD(bool, SBMemoryRegionInfoList, GetMemoryRegionAtIndex, - (uint32_t, lldb::SBMemoryRegionInfo &), idx, region_info); + LLDB_INSTRUMENT_VA(this, idx, region_info); return m_opaque_up->GetMemoryRegionInfoAtIndex(idx, region_info.ref()); } void SBMemoryRegionInfoList::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBMemoryRegionInfoList, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_up->Clear(); } void SBMemoryRegionInfoList::Append(SBMemoryRegionInfo &sb_region) { - LLDB_RECORD_METHOD(void, SBMemoryRegionInfoList, Append, - (lldb::SBMemoryRegionInfo &), sb_region); + LLDB_INSTRUMENT_VA(this, sb_region); m_opaque_up->Append(sb_region.ref()); } void SBMemoryRegionInfoList::Append(SBMemoryRegionInfoList &sb_region_list) { - LLDB_RECORD_METHOD(void, SBMemoryRegionInfoList, Append, - (lldb::SBMemoryRegionInfoList &), sb_region_list); + LLDB_INSTRUMENT_VA(this, sb_region_list); m_opaque_up->Append(*sb_region_list); } diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp index 366c920f5756a..2483495b97db0 100644 --- a/lldb/source/API/SBModule.cpp +++ b/lldb/source/API/SBModule.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBModule.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBModuleSpec.h" @@ -24,17 +23,18 @@ #include "lldb/Symbol/TypeSystem.h" #include "lldb/Symbol/VariableList.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/StreamString.h" using namespace lldb; using namespace lldb_private; -SBModule::SBModule() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModule); } +SBModule::SBModule() { LLDB_INSTRUMENT_VA(this); } SBModule::SBModule(const lldb::ModuleSP &module_sp) : m_opaque_sp(module_sp) {} SBModule::SBModule(const SBModuleSpec &module_spec) { - LLDB_RECORD_CONSTRUCTOR(SBModule, (const lldb::SBModuleSpec &), module_spec); + LLDB_INSTRUMENT_VA(this, module_spec); ModuleSP module_sp; Status error = ModuleList::GetSharedModule( @@ -44,12 +44,11 @@ SBModule::SBModule(const SBModuleSpec &module_spec) { } SBModule::SBModule(const SBModule &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBModule, (const lldb::SBModule &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBModule::SBModule(lldb::SBProcess &process, lldb::addr_t header_addr) { - LLDB_RECORD_CONSTRUCTOR(SBModule, (lldb::SBProcess &, lldb::addr_t), process, - header_addr); + LLDB_INSTRUMENT_VA(this, process, header_addr); ProcessSP process_sp(process.GetSP()); if (process_sp) { @@ -64,8 +63,7 @@ SBModule::SBModule(lldb::SBProcess &process, lldb::addr_t header_addr) { } const SBModule &SBModule::operator=(const SBModule &rhs) { - LLDB_RECORD_METHOD(const lldb::SBModule &, SBModule, operator=, - (const lldb::SBModule &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; @@ -75,23 +73,23 @@ const SBModule &SBModule::operator=(const SBModule &rhs) { SBModule::~SBModule() = default; bool SBModule::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModule, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBModule::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModule, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } void SBModule::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBModule, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp.reset(); } SBFileSpec SBModule::GetFileSpec() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBModule, GetFileSpec); + LLDB_INSTRUMENT_VA(this); SBFileSpec file_spec; ModuleSP module_sp(GetSP()); @@ -102,8 +100,7 @@ SBFileSpec SBModule::GetFileSpec() const { } lldb::SBFileSpec SBModule::GetPlatformFileSpec() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBModule, - GetPlatformFileSpec); + LLDB_INSTRUMENT_VA(this); SBFileSpec file_spec; ModuleSP module_sp(GetSP()); @@ -114,8 +111,7 @@ lldb::SBFileSpec SBModule::GetPlatformFileSpec() const { } bool SBModule::SetPlatformFileSpec(const lldb::SBFileSpec &platform_file) { - LLDB_RECORD_METHOD(bool, SBModule, SetPlatformFileSpec, - (const lldb::SBFileSpec &), platform_file); + LLDB_INSTRUMENT_VA(this, platform_file); bool result = false; @@ -129,8 +125,7 @@ bool SBModule::SetPlatformFileSpec(const lldb::SBFileSpec &platform_file) { } lldb::SBFileSpec SBModule::GetRemoteInstallFileSpec() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModule, - GetRemoteInstallFileSpec); + LLDB_INSTRUMENT_VA(this); SBFileSpec sb_file_spec; ModuleSP module_sp(GetSP()); @@ -140,8 +135,7 @@ lldb::SBFileSpec SBModule::GetRemoteInstallFileSpec() { } bool SBModule::SetRemoteInstallFileSpec(lldb::SBFileSpec &file) { - LLDB_RECORD_METHOD(bool, SBModule, SetRemoteInstallFileSpec, - (lldb::SBFileSpec &), file); + LLDB_INSTRUMENT_VA(this, file); ModuleSP module_sp(GetSP()); if (module_sp) { @@ -152,7 +146,7 @@ bool SBModule::SetRemoteInstallFileSpec(lldb::SBFileSpec &file) { } const uint8_t *SBModule::GetUUIDBytes() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const uint8_t *, SBModule, GetUUIDBytes); + LLDB_INSTRUMENT_VA(this); const uint8_t *uuid_bytes = nullptr; ModuleSP module_sp(GetSP()); @@ -163,7 +157,7 @@ const uint8_t *SBModule::GetUUIDBytes() const { } const char *SBModule::GetUUIDString() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBModule, GetUUIDString); + LLDB_INSTRUMENT_VA(this); const char *uuid_cstr = nullptr; ModuleSP module_sp(GetSP()); @@ -183,8 +177,7 @@ const char *SBModule::GetUUIDString() const { } bool SBModule::operator==(const SBModule &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBModule, operator==, (const lldb::SBModule &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (m_opaque_sp) return m_opaque_sp.get() == rhs.m_opaque_sp.get(); @@ -192,8 +185,7 @@ bool SBModule::operator==(const SBModule &rhs) const { } bool SBModule::operator!=(const SBModule &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBModule, operator!=, (const lldb::SBModule &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (m_opaque_sp) return m_opaque_sp.get() != rhs.m_opaque_sp.get(); @@ -205,8 +197,7 @@ ModuleSP SBModule::GetSP() const { return m_opaque_sp; } void SBModule::SetSP(const ModuleSP &module_sp) { m_opaque_sp = module_sp; } SBAddress SBModule::ResolveFileAddress(lldb::addr_t vm_addr) { - LLDB_RECORD_METHOD(lldb::SBAddress, SBModule, ResolveFileAddress, - (lldb::addr_t), vm_addr); + LLDB_INSTRUMENT_VA(this, vm_addr); lldb::SBAddress sb_addr; ModuleSP module_sp(GetSP()); @@ -221,9 +212,7 @@ SBAddress SBModule::ResolveFileAddress(lldb::addr_t vm_addr) { SBSymbolContext SBModule::ResolveSymbolContextForAddress(const SBAddress &addr, uint32_t resolve_scope) { - LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBModule, - ResolveSymbolContextForAddress, - (const lldb::SBAddress &, uint32_t), addr, resolve_scope); + LLDB_INSTRUMENT_VA(this, addr, resolve_scope); SBSymbolContext sb_sc; ModuleSP module_sp(GetSP()); @@ -234,8 +223,7 @@ SBModule::ResolveSymbolContextForAddress(const SBAddress &addr, } bool SBModule::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBModule, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -249,7 +237,7 @@ bool SBModule::GetDescription(SBStream &description) { } uint32_t SBModule::GetNumCompileUnits() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBModule, GetNumCompileUnits); + LLDB_INSTRUMENT_VA(this); ModuleSP module_sp(GetSP()); if (module_sp) { @@ -259,8 +247,7 @@ uint32_t SBModule::GetNumCompileUnits() { } SBCompileUnit SBModule::GetCompileUnitAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBCompileUnit, SBModule, GetCompileUnitAtIndex, - (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); SBCompileUnit sb_cu; ModuleSP module_sp(GetSP()); @@ -272,8 +259,7 @@ SBCompileUnit SBModule::GetCompileUnitAtIndex(uint32_t index) { } SBSymbolContextList SBModule::FindCompileUnits(const SBFileSpec &sb_file_spec) { - LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBModule, FindCompileUnits, - (const lldb::SBFileSpec &), sb_file_spec); + LLDB_INSTRUMENT_VA(this, sb_file_spec); SBSymbolContextList sb_sc_list; const ModuleSP module_sp(GetSP()); @@ -290,7 +276,7 @@ static Symtab *GetUnifiedSymbolTable(const lldb::ModuleSP &module_sp) { } size_t SBModule::GetNumSymbols() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModule, GetNumSymbols); + LLDB_INSTRUMENT_VA(this); ModuleSP module_sp(GetSP()); if (Symtab *symtab = GetUnifiedSymbolTable(module_sp)) @@ -299,7 +285,7 @@ size_t SBModule::GetNumSymbols() { } SBSymbol SBModule::GetSymbolAtIndex(size_t idx) { - LLDB_RECORD_METHOD(lldb::SBSymbol, SBModule, GetSymbolAtIndex, (size_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBSymbol sb_symbol; ModuleSP module_sp(GetSP()); @@ -311,8 +297,7 @@ SBSymbol SBModule::GetSymbolAtIndex(size_t idx) { lldb::SBSymbol SBModule::FindSymbol(const char *name, lldb::SymbolType symbol_type) { - LLDB_RECORD_METHOD(lldb::SBSymbol, SBModule, FindSymbol, - (const char *, lldb::SymbolType), name, symbol_type); + LLDB_INSTRUMENT_VA(this, name, symbol_type); SBSymbol sb_symbol; if (name && name[0]) { @@ -328,8 +313,7 @@ lldb::SBSymbol SBModule::FindSymbol(const char *name, lldb::SBSymbolContextList SBModule::FindSymbols(const char *name, lldb::SymbolType symbol_type) { - LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBModule, FindSymbols, - (const char *, lldb::SymbolType), name, symbol_type); + LLDB_INSTRUMENT_VA(this, name, symbol_type); SBSymbolContextList sb_sc_list; if (name && name[0]) { @@ -356,7 +340,7 @@ lldb::SBSymbolContextList SBModule::FindSymbols(const char *name, } size_t SBModule::GetNumSections() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModule, GetNumSections); + LLDB_INSTRUMENT_VA(this); ModuleSP module_sp(GetSP()); if (module_sp) { @@ -370,8 +354,7 @@ size_t SBModule::GetNumSections() { } SBSection SBModule::GetSectionAtIndex(size_t idx) { - LLDB_RECORD_METHOD(lldb::SBSection, SBModule, GetSectionAtIndex, (size_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); SBSection sb_section; ModuleSP module_sp(GetSP()); @@ -388,8 +371,7 @@ SBSection SBModule::GetSectionAtIndex(size_t idx) { lldb::SBSymbolContextList SBModule::FindFunctions(const char *name, uint32_t name_type_mask) { - LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBModule, FindFunctions, - (const char *, uint32_t), name, name_type_mask); + LLDB_INSTRUMENT_VA(this, name, name_type_mask); lldb::SBSymbolContextList sb_sc_list; ModuleSP module_sp(GetSP()); @@ -407,9 +389,7 @@ lldb::SBSymbolContextList SBModule::FindFunctions(const char *name, SBValueList SBModule::FindGlobalVariables(SBTarget &target, const char *name, uint32_t max_matches) { - LLDB_RECORD_METHOD(lldb::SBValueList, SBModule, FindGlobalVariables, - (lldb::SBTarget &, const char *, uint32_t), target, name, - max_matches); + LLDB_INSTRUMENT_VA(this, target, name, max_matches); SBValueList sb_value_list; ModuleSP module_sp(GetSP()); @@ -431,8 +411,7 @@ SBValueList SBModule::FindGlobalVariables(SBTarget &target, const char *name, lldb::SBValue SBModule::FindFirstGlobalVariable(lldb::SBTarget &target, const char *name) { - LLDB_RECORD_METHOD(lldb::SBValue, SBModule, FindFirstGlobalVariable, - (lldb::SBTarget &, const char *), target, name); + LLDB_INSTRUMENT_VA(this, target, name); SBValueList sb_value_list(FindGlobalVariables(target, name, 1)); if (sb_value_list.IsValid() && sb_value_list.GetSize() > 0) @@ -441,8 +420,7 @@ lldb::SBValue SBModule::FindFirstGlobalVariable(lldb::SBTarget &target, } lldb::SBType SBModule::FindFirstType(const char *name_cstr) { - LLDB_RECORD_METHOD(lldb::SBType, SBModule, FindFirstType, (const char *), - name_cstr); + LLDB_INSTRUMENT_VA(this, name_cstr); SBType sb_type; ModuleSP module_sp(GetSP()); @@ -467,8 +445,7 @@ lldb::SBType SBModule::FindFirstType(const char *name_cstr) { } lldb::SBType SBModule::GetBasicType(lldb::BasicType type) { - LLDB_RECORD_METHOD(lldb::SBType, SBModule, GetBasicType, (lldb::BasicType), - type); + LLDB_INSTRUMENT_VA(this, type); ModuleSP module_sp(GetSP()); if (module_sp) { @@ -484,8 +461,7 @@ lldb::SBType SBModule::GetBasicType(lldb::BasicType type) { } lldb::SBTypeList SBModule::FindTypes(const char *type) { - LLDB_RECORD_METHOD(lldb::SBTypeList, SBModule, FindTypes, (const char *), - type); + LLDB_INSTRUMENT_VA(this, type); SBTypeList retval; @@ -521,8 +497,7 @@ lldb::SBTypeList SBModule::FindTypes(const char *type) { } lldb::SBType SBModule::GetTypeByID(lldb::user_id_t uid) { - LLDB_RECORD_METHOD(lldb::SBType, SBModule, GetTypeByID, (lldb::user_id_t), - uid); + LLDB_INSTRUMENT_VA(this, uid); ModuleSP module_sp(GetSP()); if (module_sp) { @@ -536,8 +511,7 @@ lldb::SBType SBModule::GetTypeByID(lldb::user_id_t uid) { } lldb::SBTypeList SBModule::GetTypes(uint32_t type_mask) { - LLDB_RECORD_METHOD(lldb::SBTypeList, SBModule, GetTypes, (uint32_t), - type_mask); + LLDB_INSTRUMENT_VA(this, type_mask); SBTypeList sb_type_list; @@ -556,8 +530,7 @@ lldb::SBTypeList SBModule::GetTypes(uint32_t type_mask) { } SBSection SBModule::FindSection(const char *sect_name) { - LLDB_RECORD_METHOD(lldb::SBSection, SBModule, FindSection, (const char *), - sect_name); + LLDB_INSTRUMENT_VA(this, sect_name); SBSection sb_section; @@ -578,7 +551,7 @@ SBSection SBModule::FindSection(const char *sect_name) { } lldb::ByteOrder SBModule::GetByteOrder() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::ByteOrder, SBModule, GetByteOrder); + LLDB_INSTRUMENT_VA(this); ModuleSP module_sp(GetSP()); if (module_sp) @@ -587,7 +560,7 @@ lldb::ByteOrder SBModule::GetByteOrder() { } const char *SBModule::GetTriple() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBModule, GetTriple); + LLDB_INSTRUMENT_VA(this); ModuleSP module_sp(GetSP()); if (module_sp) { @@ -602,7 +575,7 @@ const char *SBModule::GetTriple() { } uint32_t SBModule::GetAddressByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBModule, GetAddressByteSize); + LLDB_INSTRUMENT_VA(this); ModuleSP module_sp(GetSP()); if (module_sp) @@ -611,8 +584,7 @@ uint32_t SBModule::GetAddressByteSize() { } uint32_t SBModule::GetVersion(uint32_t *versions, uint32_t num_versions) { - LLDB_RECORD_METHOD(uint32_t, SBModule, GetVersion, (uint32_t *, uint32_t), - versions, num_versions); + LLDB_INSTRUMENT_VA(this, versions, num_versions); llvm::VersionTuple version; if (ModuleSP module_sp = GetSP()) @@ -640,8 +612,7 @@ uint32_t SBModule::GetVersion(uint32_t *versions, uint32_t num_versions) { } lldb::SBFileSpec SBModule::GetSymbolFileSpec() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBModule, - GetSymbolFileSpec); + LLDB_INSTRUMENT_VA(this); lldb::SBFileSpec sb_file_spec; ModuleSP module_sp(GetSP()); @@ -653,8 +624,7 @@ lldb::SBFileSpec SBModule::GetSymbolFileSpec() const { } lldb::SBAddress SBModule::GetObjectFileHeaderAddress() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBModule, - GetObjectFileHeaderAddress); + LLDB_INSTRUMENT_VA(this); lldb::SBAddress sb_addr; ModuleSP module_sp(GetSP()); @@ -667,8 +637,7 @@ lldb::SBAddress SBModule::GetObjectFileHeaderAddress() const { } lldb::SBAddress SBModule::GetObjectFileEntryPointAddress() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBModule, - GetObjectFileEntryPointAddress); + LLDB_INSTRUMENT_VA(this); lldb::SBAddress sb_addr; ModuleSP module_sp(GetSP()); @@ -681,15 +650,13 @@ lldb::SBAddress SBModule::GetObjectFileEntryPointAddress() const { } uint32_t SBModule::GetNumberAllocatedModules() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(uint32_t, SBModule, - GetNumberAllocatedModules); + LLDB_INSTRUMENT(); return Module::GetNumberAllocatedModules(); } void SBModule::GarbageCollectAllocatedModules() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBModule, - GarbageCollectAllocatedModules); + LLDB_INSTRUMENT(); const bool mandatory = false; ModuleList::RemoveOrphanSharedModules(mandatory); diff --git a/lldb/source/API/SBModuleSpec.cpp b/lldb/source/API/SBModuleSpec.cpp index ce2bb3556d4df..7deba8e971f87 100644 --- a/lldb/source/API/SBModuleSpec.cpp +++ b/lldb/source/API/SBModuleSpec.cpp @@ -7,31 +7,30 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBModuleSpec.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Module.h" #include "lldb/Core/ModuleSpec.h" #include "lldb/Host/Host.h" #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" using namespace lldb; using namespace lldb_private; SBModuleSpec::SBModuleSpec() : m_opaque_up(new lldb_private::ModuleSpec()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModuleSpec); + LLDB_INSTRUMENT_VA(this); } SBModuleSpec::SBModuleSpec(const SBModuleSpec &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBModuleSpec, (const lldb::SBModuleSpec &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } const SBModuleSpec &SBModuleSpec::operator=(const SBModuleSpec &rhs) { - LLDB_RECORD_METHOD(const lldb::SBModuleSpec &, - SBModuleSpec, operator=,(const lldb::SBModuleSpec &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -41,76 +40,72 @@ const SBModuleSpec &SBModuleSpec::operator=(const SBModuleSpec &rhs) { SBModuleSpec::~SBModuleSpec() = default; bool SBModuleSpec::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModuleSpec, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBModuleSpec::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModuleSpec, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->operator bool(); } void SBModuleSpec::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBModuleSpec, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_up->Clear(); } SBFileSpec SBModuleSpec::GetFileSpec() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModuleSpec, GetFileSpec); + LLDB_INSTRUMENT_VA(this); SBFileSpec sb_spec(m_opaque_up->GetFileSpec()); return sb_spec; } void SBModuleSpec::SetFileSpec(const lldb::SBFileSpec &sb_spec) { - LLDB_RECORD_METHOD(void, SBModuleSpec, SetFileSpec, - (const lldb::SBFileSpec &), sb_spec); + LLDB_INSTRUMENT_VA(this, sb_spec); m_opaque_up->GetFileSpec() = *sb_spec; } lldb::SBFileSpec SBModuleSpec::GetPlatformFileSpec() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModuleSpec, - GetPlatformFileSpec); + LLDB_INSTRUMENT_VA(this); return SBFileSpec(m_opaque_up->GetPlatformFileSpec()); } void SBModuleSpec::SetPlatformFileSpec(const lldb::SBFileSpec &sb_spec) { - LLDB_RECORD_METHOD(void, SBModuleSpec, SetPlatformFileSpec, - (const lldb::SBFileSpec &), sb_spec); + LLDB_INSTRUMENT_VA(this, sb_spec); m_opaque_up->GetPlatformFileSpec() = *sb_spec; } lldb::SBFileSpec SBModuleSpec::GetSymbolFileSpec() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModuleSpec, GetSymbolFileSpec); + LLDB_INSTRUMENT_VA(this); return SBFileSpec(m_opaque_up->GetSymbolFileSpec()); } void SBModuleSpec::SetSymbolFileSpec(const lldb::SBFileSpec &sb_spec) { - LLDB_RECORD_METHOD(void, SBModuleSpec, SetSymbolFileSpec, - (const lldb::SBFileSpec &), sb_spec); + LLDB_INSTRUMENT_VA(this, sb_spec); m_opaque_up->GetSymbolFileSpec() = *sb_spec; } const char *SBModuleSpec::GetObjectName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBModuleSpec, GetObjectName); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetObjectName().GetCString(); } void SBModuleSpec::SetObjectName(const char *name) { - LLDB_RECORD_METHOD(void, SBModuleSpec, SetObjectName, (const char *), name); + LLDB_INSTRUMENT_VA(this, name); m_opaque_up->GetObjectName().SetCString(name); } const char *SBModuleSpec::GetTriple() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBModuleSpec, GetTriple); + LLDB_INSTRUMENT_VA(this); std::string triple(m_opaque_up->GetArchitecture().GetTriple().str()); // Unique the string so we don't run into ownership issues since the const @@ -121,48 +116,46 @@ const char *SBModuleSpec::GetTriple() { } void SBModuleSpec::SetTriple(const char *triple) { - LLDB_RECORD_METHOD(void, SBModuleSpec, SetTriple, (const char *), triple); + LLDB_INSTRUMENT_VA(this, triple); m_opaque_up->GetArchitecture().SetTriple(triple); } const uint8_t *SBModuleSpec::GetUUIDBytes() { + LLDB_INSTRUMENT_VA(this) return m_opaque_up->GetUUID().GetBytes().data(); } size_t SBModuleSpec::GetUUIDLength() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModuleSpec, GetUUIDLength); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetUUID().GetBytes().size(); } bool SBModuleSpec::SetUUIDBytes(const uint8_t *uuid, size_t uuid_len) { + LLDB_INSTRUMENT_VA(this, uuid, uuid_len) m_opaque_up->GetUUID() = UUID::fromOptionalData(uuid, uuid_len); return m_opaque_up->GetUUID().IsValid(); } bool SBModuleSpec::GetDescription(lldb::SBStream &description) { - LLDB_RECORD_METHOD(bool, SBModuleSpec, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); m_opaque_up->Dump(description.ref()); return true; } SBModuleSpecList::SBModuleSpecList() : m_opaque_up(new ModuleSpecList()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModuleSpecList); + LLDB_INSTRUMENT_VA(this); } SBModuleSpecList::SBModuleSpecList(const SBModuleSpecList &rhs) : m_opaque_up(new ModuleSpecList(*rhs.m_opaque_up)) { - LLDB_RECORD_CONSTRUCTOR(SBModuleSpecList, (const lldb::SBModuleSpecList &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBModuleSpecList &SBModuleSpecList::operator=(const SBModuleSpecList &rhs) { - LLDB_RECORD_METHOD( - lldb::SBModuleSpecList &, - SBModuleSpecList, operator=,(const lldb::SBModuleSpecList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) *m_opaque_up = *rhs.m_opaque_up; @@ -172,8 +165,7 @@ SBModuleSpecList &SBModuleSpecList::operator=(const SBModuleSpecList &rhs) { SBModuleSpecList::~SBModuleSpecList() = default; SBModuleSpecList SBModuleSpecList::GetModuleSpecifications(const char *path) { - LLDB_RECORD_STATIC_METHOD(lldb::SBModuleSpecList, SBModuleSpecList, - GetModuleSpecifications, (const char *), path); + LLDB_INSTRUMENT_VA(path); SBModuleSpecList specs; FileSpec file_spec(path); @@ -184,28 +176,25 @@ SBModuleSpecList SBModuleSpecList::GetModuleSpecifications(const char *path) { } void SBModuleSpecList::Append(const SBModuleSpec &spec) { - LLDB_RECORD_METHOD(void, SBModuleSpecList, Append, - (const lldb::SBModuleSpec &), spec); + LLDB_INSTRUMENT_VA(this, spec); m_opaque_up->Append(*spec.m_opaque_up); } void SBModuleSpecList::Append(const SBModuleSpecList &spec_list) { - LLDB_RECORD_METHOD(void, SBModuleSpecList, Append, - (const lldb::SBModuleSpecList &), spec_list); + LLDB_INSTRUMENT_VA(this, spec_list); m_opaque_up->Append(*spec_list.m_opaque_up); } size_t SBModuleSpecList::GetSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModuleSpecList, GetSize); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetSize(); } SBModuleSpec SBModuleSpecList::GetSpecAtIndex(size_t i) { - LLDB_RECORD_METHOD(lldb::SBModuleSpec, SBModuleSpecList, GetSpecAtIndex, - (size_t), i); + LLDB_INSTRUMENT_VA(this, i); SBModuleSpec sb_module_spec; m_opaque_up->GetModuleSpecAtIndex(i, *sb_module_spec.m_opaque_up); @@ -214,9 +203,7 @@ SBModuleSpec SBModuleSpecList::GetSpecAtIndex(size_t i) { SBModuleSpec SBModuleSpecList::FindFirstMatchingSpec(const SBModuleSpec &match_spec) { - LLDB_RECORD_METHOD(lldb::SBModuleSpec, SBModuleSpecList, - FindFirstMatchingSpec, (const lldb::SBModuleSpec &), - match_spec); + LLDB_INSTRUMENT_VA(this, match_spec); SBModuleSpec sb_module_spec; m_opaque_up->FindMatchingModuleSpec(*match_spec.m_opaque_up, @@ -226,9 +213,7 @@ SBModuleSpecList::FindFirstMatchingSpec(const SBModuleSpec &match_spec) { SBModuleSpecList SBModuleSpecList::FindMatchingSpecs(const SBModuleSpec &match_spec) { - LLDB_RECORD_METHOD(lldb::SBModuleSpecList, SBModuleSpecList, - FindMatchingSpecs, (const lldb::SBModuleSpec &), - match_spec); + LLDB_INSTRUMENT_VA(this, match_spec); SBModuleSpecList specs; m_opaque_up->FindMatchingModuleSpecs(*match_spec.m_opaque_up, @@ -237,8 +222,7 @@ SBModuleSpecList::FindMatchingSpecs(const SBModuleSpec &match_spec) { } bool SBModuleSpecList::GetDescription(lldb::SBStream &description) { - LLDB_RECORD_METHOD(bool, SBModuleSpecList, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); m_opaque_up->Dump(description.ref()); return true; diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp index 5b0f1c3e1e213..d521a38b30e8d 100644 --- a/lldb/source/API/SBPlatform.cpp +++ b/lldb/source/API/SBPlatform.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBPlatform.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBEnvironment.h" #include "lldb/API/SBError.h" #include "lldb/API/SBFileSpec.h" @@ -19,6 +18,7 @@ #include "lldb/Target/Target.h" #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/Args.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Status.h" #include "llvm/Support/FileSystem.h" @@ -75,14 +75,13 @@ struct PlatformShellCommand { // SBPlatformConnectOptions SBPlatformConnectOptions::SBPlatformConnectOptions(const char *url) : m_opaque_ptr(new PlatformConnectOptions(url)) { - LLDB_RECORD_CONSTRUCTOR(SBPlatformConnectOptions, (const char *), url); + LLDB_INSTRUMENT_VA(this, url); } SBPlatformConnectOptions::SBPlatformConnectOptions( const SBPlatformConnectOptions &rhs) : m_opaque_ptr(new PlatformConnectOptions()) { - LLDB_RECORD_CONSTRUCTOR(SBPlatformConnectOptions, - (const lldb::SBPlatformConnectOptions &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); *m_opaque_ptr = *rhs.m_opaque_ptr; } @@ -91,18 +90,14 @@ SBPlatformConnectOptions::~SBPlatformConnectOptions() { delete m_opaque_ptr; } SBPlatformConnectOptions & SBPlatformConnectOptions::operator=(const SBPlatformConnectOptions &rhs) { - LLDB_RECORD_METHOD( - SBPlatformConnectOptions &, - SBPlatformConnectOptions, operator=,( - const lldb::SBPlatformConnectOptions &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); *m_opaque_ptr = *rhs.m_opaque_ptr; return *this; } const char *SBPlatformConnectOptions::GetURL() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformConnectOptions, GetURL); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr->m_url.empty()) return nullptr; @@ -110,8 +105,7 @@ const char *SBPlatformConnectOptions::GetURL() { } void SBPlatformConnectOptions::SetURL(const char *url) { - LLDB_RECORD_METHOD(void, SBPlatformConnectOptions, SetURL, (const char *), - url); + LLDB_INSTRUMENT_VA(this, url); if (url && url[0]) m_opaque_ptr->m_url = url; @@ -120,7 +114,7 @@ void SBPlatformConnectOptions::SetURL(const char *url) { } bool SBPlatformConnectOptions::GetRsyncEnabled() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBPlatformConnectOptions, GetRsyncEnabled); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr->m_rsync_enabled; } @@ -128,9 +122,8 @@ bool SBPlatformConnectOptions::GetRsyncEnabled() { void SBPlatformConnectOptions::EnableRsync( const char *options, const char *remote_path_prefix, bool omit_hostname_from_remote_path) { - LLDB_RECORD_METHOD(void, SBPlatformConnectOptions, EnableRsync, - (const char *, const char *, bool), options, - remote_path_prefix, omit_hostname_from_remote_path); + LLDB_INSTRUMENT_VA(this, options, remote_path_prefix, + omit_hostname_from_remote_path); m_opaque_ptr->m_rsync_enabled = true; m_opaque_ptr->m_rsync_omit_hostname_from_remote_path = @@ -147,21 +140,19 @@ void SBPlatformConnectOptions::EnableRsync( } void SBPlatformConnectOptions::DisableRsync() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatformConnectOptions, DisableRsync); + LLDB_INSTRUMENT_VA(this); m_opaque_ptr->m_rsync_enabled = false; } const char *SBPlatformConnectOptions::GetLocalCacheDirectory() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformConnectOptions, - GetLocalCacheDirectory); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr->m_local_cache_directory.GetCString(); } void SBPlatformConnectOptions::SetLocalCacheDirectory(const char *path) { - LLDB_RECORD_METHOD(void, SBPlatformConnectOptions, SetLocalCacheDirectory, - (const char *), path); + LLDB_INSTRUMENT_VA(this, path); if (path && path[0]) m_opaque_ptr->m_local_cache_directory.SetCString(path); @@ -173,21 +164,18 @@ void SBPlatformConnectOptions::SetLocalCacheDirectory(const char *path) { SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_interpreter, const char *shell_command) : m_opaque_ptr(new PlatformShellCommand(shell_interpreter, shell_command)) { - LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *, const char *), - shell_interpreter, shell_command); + LLDB_INSTRUMENT_VA(this, shell_interpreter, shell_command); } SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_command) : m_opaque_ptr(new PlatformShellCommand(shell_command)) { - LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *), - shell_command); + LLDB_INSTRUMENT_VA(this, shell_command); } SBPlatformShellCommand::SBPlatformShellCommand( const SBPlatformShellCommand &rhs) : m_opaque_ptr(new PlatformShellCommand()) { - LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, - (const lldb::SBPlatformShellCommand &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); *m_opaque_ptr = *rhs.m_opaque_ptr; } @@ -195,10 +183,7 @@ SBPlatformShellCommand::SBPlatformShellCommand( SBPlatformShellCommand & SBPlatformShellCommand::operator=(const SBPlatformShellCommand &rhs) { - LLDB_RECORD_METHOD( - SBPlatformShellCommand &, - SBPlatformShellCommand, operator=,(const lldb::SBPlatformShellCommand &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); *m_opaque_ptr = *rhs.m_opaque_ptr; return *this; @@ -207,7 +192,7 @@ SBPlatformShellCommand::operator=(const SBPlatformShellCommand &rhs) { SBPlatformShellCommand::~SBPlatformShellCommand() { delete m_opaque_ptr; } void SBPlatformShellCommand::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatformShellCommand, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_ptr->m_output = std::string(); m_opaque_ptr->m_status = 0; @@ -215,7 +200,7 @@ void SBPlatformShellCommand::Clear() { } const char *SBPlatformShellCommand::GetShell() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetShell); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr->m_shell.empty()) return nullptr; @@ -223,8 +208,7 @@ const char *SBPlatformShellCommand::GetShell() { } void SBPlatformShellCommand::SetShell(const char *shell_interpreter) { - LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetShell, (const char *), - shell_interpreter); + LLDB_INSTRUMENT_VA(this, shell_interpreter); if (shell_interpreter && shell_interpreter[0]) m_opaque_ptr->m_shell = shell_interpreter; @@ -233,7 +217,7 @@ void SBPlatformShellCommand::SetShell(const char *shell_interpreter) { } const char *SBPlatformShellCommand::GetCommand() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetCommand); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr->m_command.empty()) return nullptr; @@ -241,8 +225,7 @@ const char *SBPlatformShellCommand::GetCommand() { } void SBPlatformShellCommand::SetCommand(const char *shell_command) { - LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetCommand, (const char *), - shell_command); + LLDB_INSTRUMENT_VA(this, shell_command); if (shell_command && shell_command[0]) m_opaque_ptr->m_command = shell_command; @@ -251,8 +234,7 @@ void SBPlatformShellCommand::SetCommand(const char *shell_command) { } const char *SBPlatformShellCommand::GetWorkingDirectory() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, - GetWorkingDirectory); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr->m_working_dir.empty()) return nullptr; @@ -260,8 +242,7 @@ const char *SBPlatformShellCommand::GetWorkingDirectory() { } void SBPlatformShellCommand::SetWorkingDirectory(const char *path) { - LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetWorkingDirectory, - (const char *), path); + LLDB_INSTRUMENT_VA(this, path); if (path && path[0]) m_opaque_ptr->m_working_dir = path; @@ -270,8 +251,7 @@ void SBPlatformShellCommand::SetWorkingDirectory(const char *path) { } uint32_t SBPlatformShellCommand::GetTimeoutSeconds() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatformShellCommand, - GetTimeoutSeconds); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr->m_timeout) return m_opaque_ptr->m_timeout->count(); @@ -279,8 +259,7 @@ uint32_t SBPlatformShellCommand::GetTimeoutSeconds() { } void SBPlatformShellCommand::SetTimeoutSeconds(uint32_t sec) { - LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetTimeoutSeconds, - (uint32_t), sec); + LLDB_INSTRUMENT_VA(this, sec); if (sec == UINT32_MAX) m_opaque_ptr->m_timeout = llvm::None; @@ -289,19 +268,19 @@ void SBPlatformShellCommand::SetTimeoutSeconds(uint32_t sec) { } int SBPlatformShellCommand::GetSignal() { - LLDB_RECORD_METHOD_NO_ARGS(int, SBPlatformShellCommand, GetSignal); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr->m_signo; } int SBPlatformShellCommand::GetStatus() { - LLDB_RECORD_METHOD_NO_ARGS(int, SBPlatformShellCommand, GetStatus); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr->m_status; } const char *SBPlatformShellCommand::GetOutput() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetOutput); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr->m_output.empty()) return nullptr; @@ -309,10 +288,10 @@ const char *SBPlatformShellCommand::GetOutput() { } // SBPlatform -SBPlatform::SBPlatform() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBPlatform); } +SBPlatform::SBPlatform() { LLDB_INSTRUMENT_VA(this); } SBPlatform::SBPlatform(const char *platform_name) { - LLDB_RECORD_CONSTRUCTOR(SBPlatform, (const char *), platform_name); + LLDB_INSTRUMENT_VA(this, platform_name); Status error; if (platform_name && platform_name[0]) @@ -320,14 +299,13 @@ SBPlatform::SBPlatform(const char *platform_name) { } SBPlatform::SBPlatform(const SBPlatform &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBPlatform, (const lldb::SBPlatform &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = rhs.m_opaque_sp; } SBPlatform &SBPlatform::operator=(const SBPlatform &rhs) { - LLDB_RECORD_METHOD(SBPlatform &, - SBPlatform, operator=,(const lldb::SBPlatform &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = rhs.m_opaque_sp; return *this; @@ -336,8 +314,7 @@ SBPlatform &SBPlatform::operator=(const SBPlatform &rhs) { SBPlatform::~SBPlatform() = default; SBPlatform SBPlatform::GetHostPlatform() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBPlatform, SBPlatform, - GetHostPlatform); + LLDB_INSTRUMENT(); SBPlatform host_platform; host_platform.m_opaque_sp = Platform::GetHostPlatform(); @@ -345,23 +322,23 @@ SBPlatform SBPlatform::GetHostPlatform() { } bool SBPlatform::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBPlatform, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBPlatform::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBPlatform, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } void SBPlatform::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatform, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp.reset(); } const char *SBPlatform::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetName); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) @@ -376,7 +353,7 @@ void SBPlatform::SetSP(const lldb::PlatformSP &platform_sp) { } const char *SBPlatform::GetWorkingDirectory() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetWorkingDirectory); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) @@ -385,8 +362,7 @@ const char *SBPlatform::GetWorkingDirectory() { } bool SBPlatform::SetWorkingDirectory(const char *path) { - LLDB_RECORD_METHOD(bool, SBPlatform, SetWorkingDirectory, (const char *), - path); + LLDB_INSTRUMENT_VA(this, path); PlatformSP platform_sp(GetSP()); if (platform_sp) { @@ -400,8 +376,7 @@ bool SBPlatform::SetWorkingDirectory(const char *path) { } SBError SBPlatform::ConnectRemote(SBPlatformConnectOptions &connect_options) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, ConnectRemote, - (lldb::SBPlatformConnectOptions &), connect_options); + LLDB_INSTRUMENT_VA(this, connect_options); SBError sb_error; PlatformSP platform_sp(GetSP()); @@ -416,7 +391,7 @@ SBError SBPlatform::ConnectRemote(SBPlatformConnectOptions &connect_options) { } void SBPlatform::DisconnectRemote() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatform, DisconnectRemote); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) @@ -424,7 +399,7 @@ void SBPlatform::DisconnectRemote() { } bool SBPlatform::IsConnected() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBPlatform, IsConnected); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) @@ -433,7 +408,7 @@ bool SBPlatform::IsConnected() { } const char *SBPlatform::GetTriple() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetTriple); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) { @@ -448,7 +423,7 @@ const char *SBPlatform::GetTriple() { } const char *SBPlatform::GetOSBuild() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetOSBuild); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) { @@ -463,7 +438,7 @@ const char *SBPlatform::GetOSBuild() { } const char *SBPlatform::GetOSDescription() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetOSDescription); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) { @@ -478,7 +453,7 @@ const char *SBPlatform::GetOSDescription() { } const char *SBPlatform::GetHostname() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetHostname); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) @@ -487,7 +462,7 @@ const char *SBPlatform::GetHostname() { } uint32_t SBPlatform::GetOSMajorVersion() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatform, GetOSMajorVersion); + LLDB_INSTRUMENT_VA(this); llvm::VersionTuple version; if (PlatformSP platform_sp = GetSP()) @@ -496,7 +471,7 @@ uint32_t SBPlatform::GetOSMajorVersion() { } uint32_t SBPlatform::GetOSMinorVersion() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatform, GetOSMinorVersion); + LLDB_INSTRUMENT_VA(this); llvm::VersionTuple version; if (PlatformSP platform_sp = GetSP()) @@ -505,7 +480,7 @@ uint32_t SBPlatform::GetOSMinorVersion() { } uint32_t SBPlatform::GetOSUpdateVersion() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatform, GetOSUpdateVersion); + LLDB_INSTRUMENT_VA(this); llvm::VersionTuple version; if (PlatformSP platform_sp = GetSP()) @@ -514,14 +489,13 @@ uint32_t SBPlatform::GetOSUpdateVersion() { } void SBPlatform::SetSDKRoot(const char *sysroot) { - LLDB_RECORD_METHOD(void, SBPlatform, SetSDKRoot, (const char *), sysroot); + LLDB_INSTRUMENT_VA(this, sysroot); if (PlatformSP platform_sp = GetSP()) platform_sp->SetSDKRootDirectory(ConstString(sysroot)); } SBError SBPlatform::Get(SBFileSpec &src, SBFileSpec &dst) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Get, - (lldb::SBFileSpec &, lldb::SBFileSpec &), src, dst); + LLDB_INSTRUMENT_VA(this, src, dst); SBError sb_error; PlatformSP platform_sp(GetSP()); @@ -534,8 +508,7 @@ SBError SBPlatform::Get(SBFileSpec &src, SBFileSpec &dst) { } SBError SBPlatform::Put(SBFileSpec &src, SBFileSpec &dst) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Put, - (lldb::SBFileSpec &, lldb::SBFileSpec &), src, dst); + LLDB_INSTRUMENT_VA(this, src, dst); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { if (src.Exists()) { uint32_t permissions = FileSystem::Instance().GetPermissions(src.ref()); @@ -557,8 +530,7 @@ SBError SBPlatform::Put(SBFileSpec &src, SBFileSpec &dst) { } SBError SBPlatform::Install(SBFileSpec &src, SBFileSpec &dst) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Install, - (lldb::SBFileSpec &, lldb::SBFileSpec &), src, dst); + LLDB_INSTRUMENT_VA(this, src, dst); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { if (src.Exists()) return platform_sp->Install(src.ref(), dst.ref()); @@ -571,8 +543,7 @@ SBError SBPlatform::Install(SBFileSpec &src, SBFileSpec &dst) { } SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Run, - (lldb::SBPlatformShellCommand &), shell_command); + LLDB_INSTRUMENT_VA(this, shell_command); return ExecuteConnected( [&](const lldb::PlatformSP &platform_sp) { const char *command = shell_command.GetCommand(); @@ -595,8 +566,7 @@ SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) { } SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Launch, (lldb::SBLaunchInfo &), - launch_info); + LLDB_INSTRUMENT_VA(this, launch_info); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { ProcessLaunchInfo info = launch_info.ref(); Status error = platform_sp->LaunchProcess(info); @@ -606,7 +576,7 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) { } SBError SBPlatform::Kill(const lldb::pid_t pid) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Kill, (const lldb::pid_t), pid); + LLDB_INSTRUMENT_VA(this, pid); return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) { return platform_sp->KillProcess(pid); }); @@ -628,8 +598,7 @@ SBError SBPlatform::ExecuteConnected( } SBError SBPlatform::MakeDirectory(const char *path, uint32_t file_permissions) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, MakeDirectory, - (const char *, uint32_t), path, file_permissions); + LLDB_INSTRUMENT_VA(this, path, file_permissions); SBError sb_error; PlatformSP platform_sp(GetSP()); @@ -643,8 +612,7 @@ SBError SBPlatform::MakeDirectory(const char *path, uint32_t file_permissions) { } uint32_t SBPlatform::GetFilePermissions(const char *path) { - LLDB_RECORD_METHOD(uint32_t, SBPlatform, GetFilePermissions, (const char *), - path); + LLDB_INSTRUMENT_VA(this, path); PlatformSP platform_sp(GetSP()); if (platform_sp) { @@ -657,8 +625,7 @@ uint32_t SBPlatform::GetFilePermissions(const char *path) { SBError SBPlatform::SetFilePermissions(const char *path, uint32_t file_permissions) { - LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, SetFilePermissions, - (const char *, uint32_t), path, file_permissions); + LLDB_INSTRUMENT_VA(this, path, file_permissions); SBError sb_error; PlatformSP platform_sp(GetSP()); @@ -672,8 +639,7 @@ SBError SBPlatform::SetFilePermissions(const char *path, } SBUnixSignals SBPlatform::GetUnixSignals() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBUnixSignals, SBPlatform, - GetUnixSignals); + LLDB_INSTRUMENT_VA(this); if (auto platform_sp = GetSP()) return SBUnixSignals{platform_sp}; @@ -682,7 +648,7 @@ SBUnixSignals SBPlatform::GetUnixSignals() const { } SBEnvironment SBPlatform::GetEnvironment() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBEnvironment, SBPlatform, GetEnvironment); + LLDB_INSTRUMENT_VA(this); PlatformSP platform_sp(GetSP()); if (platform_sp) { diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp index d598e483028df..2538013412b68 100644 --- a/lldb/source/API/SBProcess.cpp +++ b/lldb/source/API/SBProcess.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBProcess.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include @@ -49,22 +49,21 @@ using namespace lldb; using namespace lldb_private; -SBProcess::SBProcess() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBProcess); } +SBProcess::SBProcess() { LLDB_INSTRUMENT_VA(this); } // SBProcess constructor SBProcess::SBProcess(const SBProcess &rhs) : m_opaque_wp(rhs.m_opaque_wp) { - LLDB_RECORD_CONSTRUCTOR(SBProcess, (const lldb::SBProcess &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBProcess::SBProcess(const lldb::ProcessSP &process_sp) : m_opaque_wp(process_sp) { - LLDB_RECORD_CONSTRUCTOR(SBProcess, (const lldb::ProcessSP &), process_sp); + LLDB_INSTRUMENT_VA(this, process_sp); } const SBProcess &SBProcess::operator=(const SBProcess &rhs) { - LLDB_RECORD_METHOD(const lldb::SBProcess &, - SBProcess, operator=,(const lldb::SBProcess &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_wp = rhs.m_opaque_wp; @@ -75,14 +74,13 @@ const SBProcess &SBProcess::operator=(const SBProcess &rhs) { SBProcess::~SBProcess() = default; const char *SBProcess::GetBroadcasterClassName() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBProcess, - GetBroadcasterClassName); + LLDB_INSTRUMENT(); return Process::GetStaticBroadcasterClass().AsCString(); } const char *SBProcess::GetPluginName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcess, GetPluginName); + LLDB_INSTRUMENT_VA(this); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -92,7 +90,7 @@ const char *SBProcess::GetPluginName() { } const char *SBProcess::GetShortPluginName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcess, GetShortPluginName); + LLDB_INSTRUMENT_VA(this); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -106,17 +104,17 @@ lldb::ProcessSP SBProcess::GetSP() const { return m_opaque_wp.lock(); } void SBProcess::SetSP(const ProcessSP &process_sp) { m_opaque_wp = process_sp; } void SBProcess::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBProcess, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_wp.reset(); } bool SBProcess::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcess, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBProcess::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcess, operator bool); + LLDB_INSTRUMENT_VA(this); ProcessSP process_sp(m_opaque_wp.lock()); return ((bool)process_sp && process_sp->IsValid()); @@ -128,11 +126,7 @@ bool SBProcess::RemoteLaunch(char const **argv, char const **envp, const char *working_directory, uint32_t launch_flags, bool stop_at_entry, lldb::SBError &error) { - LLDB_RECORD_METHOD(bool, SBProcess, RemoteLaunch, - (const char **, const char **, const char *, const char *, - const char *, const char *, uint32_t, bool, - lldb::SBError &), - argv, envp, stdin_path, stdout_path, stderr_path, + LLDB_INSTRUMENT_VA(this, argv, envp, stdin_path, stdout_path, stderr_path, working_directory, launch_flags, stop_at_entry, error); ProcessSP process_sp(GetSP()); @@ -165,8 +159,7 @@ bool SBProcess::RemoteLaunch(char const **argv, char const **envp, bool SBProcess::RemoteAttachToProcessWithID(lldb::pid_t pid, lldb::SBError &error) { - LLDB_RECORD_METHOD(bool, SBProcess, RemoteAttachToProcessWithID, - (lldb::pid_t, lldb::SBError &), pid, error); + LLDB_INSTRUMENT_VA(this, pid, error); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -188,7 +181,7 @@ bool SBProcess::RemoteAttachToProcessWithID(lldb::pid_t pid, } uint32_t SBProcess::GetNumThreads() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetNumThreads); + LLDB_INSTRUMENT_VA(this); uint32_t num_threads = 0; ProcessSP process_sp(GetSP()); @@ -205,8 +198,7 @@ uint32_t SBProcess::GetNumThreads() { } SBThread SBProcess::GetSelectedThread() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBProcess, - GetSelectedThread); + LLDB_INSTRUMENT_VA(this); SBThread sb_thread; ThreadSP thread_sp; @@ -223,8 +215,7 @@ SBThread SBProcess::GetSelectedThread() const { SBThread SBProcess::CreateOSPluginThread(lldb::tid_t tid, lldb::addr_t context) { - LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, CreateOSPluginThread, - (lldb::tid_t, lldb::addr_t), tid, context); + LLDB_INSTRUMENT_VA(this, tid, context); SBThread sb_thread; ThreadSP thread_sp; @@ -240,7 +231,7 @@ SBThread SBProcess::CreateOSPluginThread(lldb::tid_t tid, } SBTarget SBProcess::GetTarget() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBTarget, SBProcess, GetTarget); + LLDB_INSTRUMENT_VA(this); SBTarget sb_target; TargetSP target_sp; @@ -254,8 +245,7 @@ SBTarget SBProcess::GetTarget() const { } size_t SBProcess::PutSTDIN(const char *src, size_t src_len) { - LLDB_RECORD_METHOD(size_t, SBProcess, PutSTDIN, (const char *, size_t), src, - src_len); + LLDB_INSTRUMENT_VA(this, src, src_len); size_t ret_val = 0; ProcessSP process_sp(GetSP()); @@ -268,8 +258,7 @@ size_t SBProcess::PutSTDIN(const char *src, size_t src_len) { } size_t SBProcess::GetSTDOUT(char *dst, size_t dst_len) const { - LLDB_RECORD_METHOD_CONST(size_t, SBProcess, GetSTDOUT, (char *, size_t), dst, - "", dst_len); + LLDB_INSTRUMENT_VA(this, dst, dst_len); size_t bytes_read = 0; ProcessSP process_sp(GetSP()); @@ -282,8 +271,7 @@ size_t SBProcess::GetSTDOUT(char *dst, size_t dst_len) const { } size_t SBProcess::GetSTDERR(char *dst, size_t dst_len) const { - LLDB_RECORD_METHOD_CONST(size_t, SBProcess, GetSTDERR, (char *, size_t), dst, - "", dst_len); + LLDB_INSTRUMENT_VA(this, dst, dst_len); size_t bytes_read = 0; ProcessSP process_sp(GetSP()); @@ -296,8 +284,7 @@ size_t SBProcess::GetSTDERR(char *dst, size_t dst_len) const { } size_t SBProcess::GetAsyncProfileData(char *dst, size_t dst_len) const { - LLDB_RECORD_METHOD_CONST(size_t, SBProcess, GetAsyncProfileData, - (char *, size_t), dst, "", dst_len); + LLDB_INSTRUMENT_VA(this, dst, dst_len); size_t bytes_read = 0; ProcessSP process_sp(GetSP()); @@ -310,23 +297,20 @@ size_t SBProcess::GetAsyncProfileData(char *dst, size_t dst_len) const { } void SBProcess::ReportEventState(const SBEvent &event, SBFile out) const { - LLDB_RECORD_METHOD_CONST(void, SBProcess, ReportEventState, - (const SBEvent &, SBFile), event, out); + LLDB_INSTRUMENT_VA(this, event, out); return ReportEventState(event, out.m_opaque_sp); } void SBProcess::ReportEventState(const SBEvent &event, FILE *out) const { - LLDB_RECORD_METHOD_CONST(void, SBProcess, ReportEventState, - (const lldb::SBEvent &, FILE *), event, out); + LLDB_INSTRUMENT_VA(this, event, out); FileSP outfile = std::make_shared(out, false); return ReportEventState(event, outfile); } void SBProcess::ReportEventState(const SBEvent &event, FileSP out) const { - LLDB_RECORD_METHOD_CONST(void, SBProcess, ReportEventState, - (const SBEvent &, FileSP), event, out); + LLDB_INSTRUMENT_VA(this, event, out); if (!out || !out->IsValid()) return; @@ -342,9 +326,7 @@ void SBProcess::ReportEventState(const SBEvent &event, FileSP out) const { void SBProcess::AppendEventStateReport(const SBEvent &event, SBCommandReturnObject &result) { - LLDB_RECORD_METHOD(void, SBProcess, AppendEventStateReport, - (const lldb::SBEvent &, lldb::SBCommandReturnObject &), - event, result); + LLDB_INSTRUMENT_VA(this, event, result); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -358,8 +340,7 @@ void SBProcess::AppendEventStateReport(const SBEvent &event, } bool SBProcess::SetSelectedThread(const SBThread &thread) { - LLDB_RECORD_METHOD(bool, SBProcess, SetSelectedThread, - (const lldb::SBThread &), thread); + LLDB_INSTRUMENT_VA(this, thread); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -372,9 +353,7 @@ bool SBProcess::SetSelectedThread(const SBThread &thread) { } bool SBProcess::SetSelectedThreadByID(lldb::tid_t tid) { - LLDB_RECORD_METHOD(bool, SBProcess, SetSelectedThreadByID, (lldb::tid_t), - tid); - + LLDB_INSTRUMENT_VA(this, tid); bool ret_val = false; ProcessSP process_sp(GetSP()); @@ -388,8 +367,7 @@ bool SBProcess::SetSelectedThreadByID(lldb::tid_t tid) { } bool SBProcess::SetSelectedThreadByIndexID(uint32_t index_id) { - LLDB_RECORD_METHOD(bool, SBProcess, SetSelectedThreadByIndexID, (uint32_t), - index_id); + LLDB_INSTRUMENT_VA(this, index_id); bool ret_val = false; ProcessSP process_sp(GetSP()); @@ -404,8 +382,7 @@ bool SBProcess::SetSelectedThreadByIndexID(uint32_t index_id) { } SBThread SBProcess::GetThreadAtIndex(size_t index) { - LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, GetThreadAtIndex, (size_t), - index); + LLDB_INSTRUMENT_VA(this, index); SBThread sb_thread; ThreadSP thread_sp; @@ -423,7 +400,7 @@ SBThread SBProcess::GetThreadAtIndex(size_t index) { } uint32_t SBProcess::GetNumQueues() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetNumQueues); + LLDB_INSTRUMENT_VA(this); uint32_t num_queues = 0; ProcessSP process_sp(GetSP()); @@ -440,8 +417,7 @@ uint32_t SBProcess::GetNumQueues() { } SBQueue SBProcess::GetQueueAtIndex(size_t index) { - LLDB_RECORD_METHOD(lldb::SBQueue, SBProcess, GetQueueAtIndex, (size_t), - index); + LLDB_INSTRUMENT_VA(this, index); SBQueue sb_queue; QueueSP queue_sp; @@ -460,8 +436,7 @@ SBQueue SBProcess::GetQueueAtIndex(size_t index) { } uint32_t SBProcess::GetStopID(bool include_expression_stops) { - LLDB_RECORD_METHOD(uint32_t, SBProcess, GetStopID, (bool), - include_expression_stops); + LLDB_INSTRUMENT_VA(this, include_expression_stops); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -476,8 +451,7 @@ uint32_t SBProcess::GetStopID(bool include_expression_stops) { } SBEvent SBProcess::GetStopEventForStopID(uint32_t stop_id) { - LLDB_RECORD_METHOD(lldb::SBEvent, SBProcess, GetStopEventForStopID, - (uint32_t), stop_id); + LLDB_INSTRUMENT_VA(this, stop_id); SBEvent sb_event; EventSP event_sp; @@ -493,7 +467,7 @@ SBEvent SBProcess::GetStopEventForStopID(uint32_t stop_id) { } StateType SBProcess::GetState() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::StateType, SBProcess, GetState); + LLDB_INSTRUMENT_VA(this); StateType ret_val = eStateInvalid; ProcessSP process_sp(GetSP()); @@ -507,7 +481,7 @@ StateType SBProcess::GetState() { } int SBProcess::GetExitStatus() { - LLDB_RECORD_METHOD_NO_ARGS(int, SBProcess, GetExitStatus); + LLDB_INSTRUMENT_VA(this); int exit_status = 0; ProcessSP process_sp(GetSP()); @@ -521,7 +495,7 @@ int SBProcess::GetExitStatus() { } const char *SBProcess::GetExitDescription() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcess, GetExitDescription); + LLDB_INSTRUMENT_VA(this); const char *exit_desc = nullptr; ProcessSP process_sp(GetSP()); @@ -534,7 +508,7 @@ const char *SBProcess::GetExitDescription() { } lldb::pid_t SBProcess::GetProcessID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBProcess, GetProcessID); + LLDB_INSTRUMENT_VA(this); lldb::pid_t ret_val = LLDB_INVALID_PROCESS_ID; ProcessSP process_sp(GetSP()); @@ -545,7 +519,7 @@ lldb::pid_t SBProcess::GetProcessID() { } uint32_t SBProcess::GetUniqueID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetUniqueID); + LLDB_INSTRUMENT_VA(this); uint32_t ret_val = 0; ProcessSP process_sp(GetSP()); @@ -555,7 +529,7 @@ uint32_t SBProcess::GetUniqueID() { } ByteOrder SBProcess::GetByteOrder() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ByteOrder, SBProcess, GetByteOrder); + LLDB_INSTRUMENT_VA(this); ByteOrder byteOrder = eByteOrderInvalid; ProcessSP process_sp(GetSP()); @@ -567,7 +541,7 @@ ByteOrder SBProcess::GetByteOrder() const { } uint32_t SBProcess::GetAddressByteSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBProcess, GetAddressByteSize); + LLDB_INSTRUMENT_VA(this); uint32_t size = 0; ProcessSP process_sp(GetSP()); @@ -579,7 +553,7 @@ uint32_t SBProcess::GetAddressByteSize() const { } SBError SBProcess::Continue() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Continue); + LLDB_INSTRUMENT_VA(this); SBError sb_error; ProcessSP process_sp(GetSP()); @@ -599,7 +573,7 @@ SBError SBProcess::Continue() { } SBError SBProcess::Destroy() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Destroy); + LLDB_INSTRUMENT_VA(this); SBError sb_error; ProcessSP process_sp(GetSP()); @@ -614,7 +588,7 @@ SBError SBProcess::Destroy() { } SBError SBProcess::Stop() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Stop); + LLDB_INSTRUMENT_VA(this); SBError sb_error; ProcessSP process_sp(GetSP()); @@ -629,7 +603,7 @@ SBError SBProcess::Stop() { } SBError SBProcess::Kill() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Kill); + LLDB_INSTRUMENT_VA(this); SBError sb_error; ProcessSP process_sp(GetSP()); @@ -644,7 +618,7 @@ SBError SBProcess::Kill() { } SBError SBProcess::Detach() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Detach); + LLDB_INSTRUMENT_VA(this); // FIXME: This should come from a process default. bool keep_stopped = false; @@ -652,7 +626,7 @@ SBError SBProcess::Detach() { } SBError SBProcess::Detach(bool keep_stopped) { - LLDB_RECORD_METHOD(lldb::SBError, SBProcess, Detach, (bool), keep_stopped); + LLDB_INSTRUMENT_VA(this, keep_stopped); SBError sb_error; ProcessSP process_sp(GetSP()); @@ -667,7 +641,7 @@ SBError SBProcess::Detach(bool keep_stopped) { } SBError SBProcess::Signal(int signo) { - LLDB_RECORD_METHOD(lldb::SBError, SBProcess, Signal, (int), signo); + LLDB_INSTRUMENT_VA(this, signo); SBError sb_error; ProcessSP process_sp(GetSP()); @@ -682,7 +656,7 @@ SBError SBProcess::Signal(int signo) { } SBUnixSignals SBProcess::GetUnixSignals() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBUnixSignals, SBProcess, GetUnixSignals); + LLDB_INSTRUMENT_VA(this); if (auto process_sp = GetSP()) return SBUnixSignals{process_sp}; @@ -691,7 +665,7 @@ SBUnixSignals SBProcess::GetUnixSignals() { } void SBProcess::SendAsyncInterrupt() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBProcess, SendAsyncInterrupt); + LLDB_INSTRUMENT_VA(this); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -700,8 +674,7 @@ void SBProcess::SendAsyncInterrupt() { } SBThread SBProcess::GetThreadByID(tid_t tid) { - LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, GetThreadByID, (lldb::tid_t), - tid); + LLDB_INSTRUMENT_VA(this, tid); SBThread sb_thread; ThreadSP thread_sp; @@ -719,8 +692,7 @@ SBThread SBProcess::GetThreadByID(tid_t tid) { } SBThread SBProcess::GetThreadByIndexID(uint32_t index_id) { - LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, GetThreadByIndexID, (uint32_t), - index_id); + LLDB_INSTRUMENT_VA(this, index_id); SBThread sb_thread; ThreadSP thread_sp; @@ -739,8 +711,7 @@ SBThread SBProcess::GetThreadByIndexID(uint32_t index_id) { } StateType SBProcess::GetStateFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::StateType, SBProcess, GetStateFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); StateType ret_val = Process::ProcessEventData::GetStateFromEvent(event.get()); @@ -748,8 +719,7 @@ StateType SBProcess::GetStateFromEvent(const SBEvent &event) { } bool SBProcess::GetRestartedFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBProcess, GetRestartedFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); bool ret_val = Process::ProcessEventData::GetRestartedFromEvent(event.get()); @@ -757,8 +727,7 @@ bool SBProcess::GetRestartedFromEvent(const SBEvent &event) { } size_t SBProcess::GetNumRestartedReasonsFromEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(size_t, SBProcess, GetNumRestartedReasonsFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Process::ProcessEventData::GetNumRestartedReasons(event.get()); } @@ -766,16 +735,13 @@ size_t SBProcess::GetNumRestartedReasonsFromEvent(const lldb::SBEvent &event) { const char * SBProcess::GetRestartedReasonAtIndexFromEvent(const lldb::SBEvent &event, size_t idx) { - LLDB_RECORD_STATIC_METHOD(const char *, SBProcess, - GetRestartedReasonAtIndexFromEvent, - (const lldb::SBEvent &, size_t), event, idx); + LLDB_INSTRUMENT_VA(event, idx); return Process::ProcessEventData::GetRestartedReasonAtIndex(event.get(), idx); } SBProcess SBProcess::GetProcessFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBProcess, SBProcess, GetProcessFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); ProcessSP process_sp = Process::ProcessEventData::GetProcessFromEvent(event.get()); @@ -788,32 +754,27 @@ SBProcess SBProcess::GetProcessFromEvent(const SBEvent &event) { } bool SBProcess::GetInterruptedFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBProcess, GetInterruptedFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Process::ProcessEventData::GetInterruptedFromEvent(event.get()); } lldb::SBStructuredData SBProcess::GetStructuredDataFromEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBStructuredData, SBProcess, - GetStructuredDataFromEvent, (const lldb::SBEvent &), - event); + LLDB_INSTRUMENT_VA(event); return SBStructuredData(event.GetSP()); } bool SBProcess::EventIsProcessEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBProcess, EventIsProcessEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return (event.GetBroadcasterClass() == SBProcess::GetBroadcasterClass()) && !EventIsStructuredDataEvent(event); } bool SBProcess::EventIsStructuredDataEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBProcess, EventIsStructuredDataEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); EventSP event_sp = event.GetSP(); EventData *event_data = event_sp ? event_sp->GetData() : nullptr; @@ -822,9 +783,7 @@ bool SBProcess::EventIsStructuredDataEvent(const lldb::SBEvent &event) { } SBBroadcaster SBProcess::GetBroadcaster() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBroadcaster, SBProcess, - GetBroadcaster); - + LLDB_INSTRUMENT_VA(this); ProcessSP process_sp(GetSP()); @@ -834,17 +793,14 @@ SBBroadcaster SBProcess::GetBroadcaster() const { } const char *SBProcess::GetBroadcasterClass() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBProcess, - GetBroadcasterClass); + LLDB_INSTRUMENT(); return Process::GetStaticBroadcasterClass().AsCString(); } size_t SBProcess::ReadMemory(addr_t addr, void *dst, size_t dst_len, SBError &sb_error) { - LLDB_RECORD_METHOD(size_t, SBProcess, ReadMemory, - (lldb::addr_t, void *, size_t, lldb::SBError &), addr, dst, - dst_len, sb_error); + LLDB_INSTRUMENT_VA(this, addr, dst, dst_len, sb_error); size_t bytes_read = 0; @@ -869,9 +825,7 @@ size_t SBProcess::ReadMemory(addr_t addr, void *dst, size_t dst_len, size_t SBProcess::ReadCStringFromMemory(addr_t addr, void *buf, size_t size, lldb::SBError &sb_error) { - LLDB_RECORD_METHOD(size_t, SBProcess, ReadCStringFromMemory, - (lldb::addr_t, void *, size_t, lldb::SBError &), addr, buf, - size, sb_error); + LLDB_INSTRUMENT_VA(this, addr, buf, size, sb_error); size_t bytes_read = 0; ProcessSP process_sp(GetSP()); @@ -893,9 +847,7 @@ size_t SBProcess::ReadCStringFromMemory(addr_t addr, void *buf, size_t size, uint64_t SBProcess::ReadUnsignedFromMemory(addr_t addr, uint32_t byte_size, lldb::SBError &sb_error) { - LLDB_RECORD_METHOD(uint64_t, SBProcess, ReadUnsignedFromMemory, - (lldb::addr_t, uint32_t, lldb::SBError &), addr, byte_size, - sb_error); + LLDB_INSTRUMENT_VA(this, addr, byte_size, sb_error); uint64_t value = 0; ProcessSP process_sp(GetSP()); @@ -917,8 +869,7 @@ uint64_t SBProcess::ReadUnsignedFromMemory(addr_t addr, uint32_t byte_size, lldb::addr_t SBProcess::ReadPointerFromMemory(addr_t addr, lldb::SBError &sb_error) { - LLDB_RECORD_METHOD(lldb::addr_t, SBProcess, ReadPointerFromMemory, - (lldb::addr_t, lldb::SBError &), addr, sb_error); + LLDB_INSTRUMENT_VA(this, addr, sb_error); lldb::addr_t ptr = LLDB_INVALID_ADDRESS; ProcessSP process_sp(GetSP()); @@ -939,9 +890,7 @@ lldb::addr_t SBProcess::ReadPointerFromMemory(addr_t addr, size_t SBProcess::WriteMemory(addr_t addr, const void *src, size_t src_len, SBError &sb_error) { - LLDB_RECORD_METHOD(size_t, SBProcess, WriteMemory, - (lldb::addr_t, const void *, size_t, lldb::SBError &), - addr, src, src_len, sb_error); + LLDB_INSTRUMENT_VA(this, addr, src, src_len, sb_error); size_t bytes_written = 0; @@ -963,8 +912,7 @@ size_t SBProcess::WriteMemory(addr_t addr, const void *src, size_t src_len, } bool SBProcess::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBProcess, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -988,8 +936,7 @@ bool SBProcess::GetDescription(SBStream &description) { } SBStructuredData SBProcess::GetExtendedCrashInformation() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStructuredData, SBProcess, - GetExtendedCrashInformation); + LLDB_INSTRUMENT_VA(this); SBStructuredData data; ProcessSP process_sp(GetSP()); if (!process_sp) @@ -1013,9 +960,7 @@ SBStructuredData SBProcess::GetExtendedCrashInformation() { uint32_t SBProcess::GetNumSupportedHardwareWatchpoints(lldb::SBError &sb_error) const { - LLDB_RECORD_METHOD_CONST(uint32_t, SBProcess, - GetNumSupportedHardwareWatchpoints, - (lldb::SBError &), sb_error); + LLDB_INSTRUMENT_VA(this, sb_error); uint32_t num = 0; ProcessSP process_sp(GetSP()); @@ -1031,9 +976,7 @@ SBProcess::GetNumSupportedHardwareWatchpoints(lldb::SBError &sb_error) const { uint32_t SBProcess::LoadImage(lldb::SBFileSpec &sb_remote_image_spec, lldb::SBError &sb_error) { - LLDB_RECORD_METHOD(uint32_t, SBProcess, LoadImage, - (lldb::SBFileSpec &, lldb::SBError &), - sb_remote_image_spec, sb_error); + LLDB_INSTRUMENT_VA(this, sb_remote_image_spec, sb_error); return LoadImage(SBFileSpec(), sb_remote_image_spec, sb_error); } @@ -1041,10 +984,7 @@ uint32_t SBProcess::LoadImage(lldb::SBFileSpec &sb_remote_image_spec, uint32_t SBProcess::LoadImage(const lldb::SBFileSpec &sb_local_image_spec, const lldb::SBFileSpec &sb_remote_image_spec, lldb::SBError &sb_error) { - LLDB_RECORD_METHOD( - uint32_t, SBProcess, LoadImage, - (const lldb::SBFileSpec &, const lldb::SBFileSpec &, lldb::SBError &), - sb_local_image_spec, sb_remote_image_spec, sb_error); + LLDB_INSTRUMENT_VA(this, sb_local_image_spec, sb_remote_image_spec, sb_error); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -1068,10 +1008,7 @@ uint32_t SBProcess::LoadImageUsingPaths(const lldb::SBFileSpec &image_spec, SBStringList &paths, lldb::SBFileSpec &loaded_path, lldb::SBError &error) { - LLDB_RECORD_METHOD(uint32_t, SBProcess, LoadImageUsingPaths, - (const lldb::SBFileSpec &, lldb::SBStringList &, - lldb::SBFileSpec &, lldb::SBError &), - image_spec, paths, loaded_path, error); + LLDB_INSTRUMENT_VA(this, image_spec, paths, loaded_path, error); ProcessSP process_sp(GetSP()); if (process_sp) { @@ -1103,8 +1040,7 @@ uint32_t SBProcess::LoadImageUsingPaths(const lldb::SBFileSpec &image_spec, } lldb::SBError SBProcess::UnloadImage(uint32_t image_token) { - LLDB_RECORD_METHOD(lldb::SBError, SBProcess, UnloadImage, (uint32_t), - image_token); + LLDB_INSTRUMENT_VA(this, image_token); lldb::SBError sb_error; ProcessSP process_sp(GetSP()); @@ -1125,8 +1061,7 @@ lldb::SBError SBProcess::UnloadImage(uint32_t image_token) { } lldb::SBError SBProcess::SendEventData(const char *event_data) { - LLDB_RECORD_METHOD(lldb::SBError, SBProcess, SendEventData, (const char *), - event_data); + LLDB_INSTRUMENT_VA(this, event_data); lldb::SBError sb_error; ProcessSP process_sp(GetSP()); @@ -1145,7 +1080,7 @@ lldb::SBError SBProcess::SendEventData(const char *event_data) { } uint32_t SBProcess::GetNumExtendedBacktraceTypes() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetNumExtendedBacktraceTypes); + LLDB_INSTRUMENT_VA(this); ProcessSP process_sp(GetSP()); if (process_sp && process_sp->GetSystemRuntime()) { @@ -1156,8 +1091,7 @@ uint32_t SBProcess::GetNumExtendedBacktraceTypes() { } const char *SBProcess::GetExtendedBacktraceTypeAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(const char *, SBProcess, GetExtendedBacktraceTypeAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); ProcessSP process_sp(GetSP()); if (process_sp && process_sp->GetSystemRuntime()) { @@ -1172,8 +1106,7 @@ const char *SBProcess::GetExtendedBacktraceTypeAtIndex(uint32_t idx) { } SBThreadCollection SBProcess::GetHistoryThreads(addr_t addr) { - LLDB_RECORD_METHOD(lldb::SBThreadCollection, SBProcess, GetHistoryThreads, - (lldb::addr_t), addr); + LLDB_INSTRUMENT_VA(this, addr); ProcessSP process_sp(GetSP()); SBThreadCollection threads; @@ -1185,8 +1118,7 @@ SBThreadCollection SBProcess::GetHistoryThreads(addr_t addr) { bool SBProcess::IsInstrumentationRuntimePresent( InstrumentationRuntimeType type) { - LLDB_RECORD_METHOD(bool, SBProcess, IsInstrumentationRuntimePresent, - (lldb::InstrumentationRuntimeType), type); + LLDB_INSTRUMENT_VA(this, type); ProcessSP process_sp(GetSP()); if (!process_sp) @@ -1205,8 +1137,7 @@ bool SBProcess::IsInstrumentationRuntimePresent( } lldb::SBError SBProcess::SaveCore(const char *file_name) { - LLDB_RECORD_METHOD(lldb::SBError, SBProcess, SaveCore, (const char *), - file_name); + LLDB_INSTRUMENT_VA(this, file_name); lldb::SBError error; ProcessSP process_sp(GetSP()); @@ -1232,9 +1163,7 @@ lldb::SBError SBProcess::SaveCore(const char *file_name) { lldb::SBError SBProcess::GetMemoryRegionInfo(lldb::addr_t load_addr, SBMemoryRegionInfo &sb_region_info) { - LLDB_RECORD_METHOD(lldb::SBError, SBProcess, GetMemoryRegionInfo, - (lldb::addr_t, lldb::SBMemoryRegionInfo &), load_addr, - sb_region_info); + LLDB_INSTRUMENT_VA(this, load_addr, sb_region_info); lldb::SBError sb_error; ProcessSP process_sp(GetSP()); @@ -1256,8 +1185,7 @@ SBProcess::GetMemoryRegionInfo(lldb::addr_t load_addr, } lldb::SBMemoryRegionInfoList SBProcess::GetMemoryRegions() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBMemoryRegionInfoList, SBProcess, - GetMemoryRegions); + LLDB_INSTRUMENT_VA(this); lldb::SBMemoryRegionInfoList sb_region_list; @@ -1274,7 +1202,7 @@ lldb::SBMemoryRegionInfoList SBProcess::GetMemoryRegions() { } lldb::SBProcessInfo SBProcess::GetProcessInfo() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcessInfo, SBProcess, GetProcessInfo); + LLDB_INSTRUMENT_VA(this); lldb::SBProcessInfo sb_proc_info; ProcessSP process_sp(GetSP()); @@ -1287,9 +1215,7 @@ lldb::SBProcessInfo SBProcess::GetProcessInfo() { lldb::addr_t SBProcess::AllocateMemory(size_t size, uint32_t permissions, lldb::SBError &sb_error) { - LLDB_RECORD_METHOD(lldb::addr_t, SBProcess, AllocateMemory, - (size_t, uint32_t, lldb::SBError &), size, permissions, - sb_error); + LLDB_INSTRUMENT_VA(this, size, permissions, sb_error); lldb::addr_t addr = LLDB_INVALID_ADDRESS; ProcessSP process_sp(GetSP()); @@ -1309,8 +1235,7 @@ lldb::addr_t SBProcess::AllocateMemory(size_t size, uint32_t permissions, } lldb::SBError SBProcess::DeallocateMemory(lldb::addr_t ptr) { - LLDB_RECORD_METHOD(lldb::SBError, SBProcess, DeallocateMemory, (lldb::addr_t), - ptr); + LLDB_INSTRUMENT_VA(this, ptr); lldb::SBError sb_error; ProcessSP process_sp(GetSP()); diff --git a/lldb/source/API/SBProcessInfo.cpp b/lldb/source/API/SBProcessInfo.cpp index 93d84d2880310..da3db75ff47ee 100644 --- a/lldb/source/API/SBProcessInfo.cpp +++ b/lldb/source/API/SBProcessInfo.cpp @@ -7,20 +7,18 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBProcessInfo.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBFileSpec.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/ProcessInfo.h" using namespace lldb; using namespace lldb_private; -SBProcessInfo::SBProcessInfo() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBProcessInfo); -} +SBProcessInfo::SBProcessInfo() { LLDB_INSTRUMENT_VA(this); } SBProcessInfo::SBProcessInfo(const SBProcessInfo &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBProcessInfo, (const lldb::SBProcessInfo &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -28,9 +26,7 @@ SBProcessInfo::SBProcessInfo(const SBProcessInfo &rhs) { SBProcessInfo::~SBProcessInfo() = default; SBProcessInfo &SBProcessInfo::operator=(const SBProcessInfo &rhs) { - LLDB_RECORD_METHOD(lldb::SBProcessInfo &, - SBProcessInfo, operator=,(const lldb::SBProcessInfo &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -49,17 +45,17 @@ void SBProcessInfo::SetProcessInfo(const ProcessInstanceInfo &proc_info_ref) { } bool SBProcessInfo::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcessInfo, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBProcessInfo::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcessInfo, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up != nullptr; } const char *SBProcessInfo::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcessInfo, GetName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; if (m_opaque_up) { @@ -69,8 +65,7 @@ const char *SBProcessInfo::GetName() { } SBFileSpec SBProcessInfo::GetExecutableFile() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBProcessInfo, - GetExecutableFile); + LLDB_INSTRUMENT_VA(this); SBFileSpec file_spec; if (m_opaque_up) { @@ -80,7 +75,7 @@ SBFileSpec SBProcessInfo::GetExecutableFile() { } lldb::pid_t SBProcessInfo::GetProcessID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBProcessInfo, GetProcessID); + LLDB_INSTRUMENT_VA(this); lldb::pid_t proc_id = LLDB_INVALID_PROCESS_ID; if (m_opaque_up) { @@ -90,7 +85,7 @@ lldb::pid_t SBProcessInfo::GetProcessID() { } uint32_t SBProcessInfo::GetUserID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetUserID); + LLDB_INSTRUMENT_VA(this); uint32_t user_id = UINT32_MAX; if (m_opaque_up) { @@ -100,7 +95,7 @@ uint32_t SBProcessInfo::GetUserID() { } uint32_t SBProcessInfo::GetGroupID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetGroupID); + LLDB_INSTRUMENT_VA(this); uint32_t group_id = UINT32_MAX; if (m_opaque_up) { @@ -110,7 +105,7 @@ uint32_t SBProcessInfo::GetGroupID() { } bool SBProcessInfo::UserIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, UserIDIsValid); + LLDB_INSTRUMENT_VA(this); bool is_valid = false; if (m_opaque_up) { @@ -120,7 +115,7 @@ bool SBProcessInfo::UserIDIsValid() { } bool SBProcessInfo::GroupIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, GroupIDIsValid); + LLDB_INSTRUMENT_VA(this); bool is_valid = false; if (m_opaque_up) { @@ -130,7 +125,7 @@ bool SBProcessInfo::GroupIDIsValid() { } uint32_t SBProcessInfo::GetEffectiveUserID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetEffectiveUserID); + LLDB_INSTRUMENT_VA(this); uint32_t user_id = UINT32_MAX; if (m_opaque_up) { @@ -140,7 +135,7 @@ uint32_t SBProcessInfo::GetEffectiveUserID() { } uint32_t SBProcessInfo::GetEffectiveGroupID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetEffectiveGroupID); + LLDB_INSTRUMENT_VA(this); uint32_t group_id = UINT32_MAX; if (m_opaque_up) { @@ -150,7 +145,7 @@ uint32_t SBProcessInfo::GetEffectiveGroupID() { } bool SBProcessInfo::EffectiveUserIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, EffectiveUserIDIsValid); + LLDB_INSTRUMENT_VA(this); bool is_valid = false; if (m_opaque_up) { @@ -160,7 +155,7 @@ bool SBProcessInfo::EffectiveUserIDIsValid() { } bool SBProcessInfo::EffectiveGroupIDIsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, EffectiveGroupIDIsValid); + LLDB_INSTRUMENT_VA(this); bool is_valid = false; if (m_opaque_up) { @@ -170,7 +165,7 @@ bool SBProcessInfo::EffectiveGroupIDIsValid() { } lldb::pid_t SBProcessInfo::GetParentProcessID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBProcessInfo, GetParentProcessID); + LLDB_INSTRUMENT_VA(this); lldb::pid_t proc_id = LLDB_INVALID_PROCESS_ID; if (m_opaque_up) { @@ -180,7 +175,7 @@ lldb::pid_t SBProcessInfo::GetParentProcessID() { } const char *SBProcessInfo::GetTriple() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcessInfo, GetTriple); + LLDB_INSTRUMENT_VA(this); const char *triple = nullptr; if (m_opaque_up) { diff --git a/lldb/source/API/SBQueue.cpp b/lldb/source/API/SBQueue.cpp index debb82173067f..b2c143f6357e8 100644 --- a/lldb/source/API/SBQueue.cpp +++ b/lldb/source/API/SBQueue.cpp @@ -8,8 +8,8 @@ #include -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBQueue.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBQueueItem.h" @@ -215,17 +215,15 @@ class QueueImpl { }; } -SBQueue::SBQueue() : m_opaque_sp(new QueueImpl()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBQueue); -} +SBQueue::SBQueue() : m_opaque_sp(new QueueImpl()) { LLDB_INSTRUMENT_VA(this); } SBQueue::SBQueue(const QueueSP &queue_sp) : m_opaque_sp(new QueueImpl(queue_sp)) { - LLDB_RECORD_CONSTRUCTOR(SBQueue, (const lldb::QueueSP &), queue_sp); + LLDB_INSTRUMENT_VA(this, queue_sp); } SBQueue::SBQueue(const SBQueue &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBQueue, (const lldb::SBQueue &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (&rhs == this) return; @@ -234,8 +232,7 @@ SBQueue::SBQueue(const SBQueue &rhs) { } const lldb::SBQueue &SBQueue::operator=(const lldb::SBQueue &rhs) { - LLDB_RECORD_METHOD(const lldb::SBQueue &, - SBQueue, operator=,(const lldb::SBQueue &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = rhs.m_opaque_sp; return *this; @@ -244,17 +241,17 @@ const lldb::SBQueue &SBQueue::operator=(const lldb::SBQueue &rhs) { SBQueue::~SBQueue() = default; bool SBQueue::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueue, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBQueue::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueue, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->IsValid(); } void SBQueue::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBQueue, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp->Clear(); } @@ -264,65 +261,63 @@ void SBQueue::SetQueue(const QueueSP &queue_sp) { } lldb::queue_id_t SBQueue::GetQueueID() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::queue_id_t, SBQueue, GetQueueID); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetQueueID(); } uint32_t SBQueue::GetIndexID() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBQueue, GetIndexID); + LLDB_INSTRUMENT_VA(this); uint32_t index_id = m_opaque_sp->GetIndexID(); return index_id; } const char *SBQueue::GetName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBQueue, GetName); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetName(); } uint32_t SBQueue::GetNumThreads() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBQueue, GetNumThreads); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetNumThreads(); } SBThread SBQueue::GetThreadAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBThread, SBQueue, GetThreadAtIndex, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); SBThread th = m_opaque_sp->GetThreadAtIndex(idx); return th; } uint32_t SBQueue::GetNumPendingItems() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBQueue, GetNumPendingItems); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetNumPendingItems(); } SBQueueItem SBQueue::GetPendingItemAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBQueueItem, SBQueue, GetPendingItemAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); return m_opaque_sp->GetPendingItemAtIndex(idx); } uint32_t SBQueue::GetNumRunningItems() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBQueue, GetNumRunningItems); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetNumRunningItems(); } SBProcess SBQueue::GetProcess() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBQueue, GetProcess); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetProcess(); } lldb::QueueKind SBQueue::GetKind() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::QueueKind, SBQueue, GetKind); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp->GetKind(); } diff --git a/lldb/source/API/SBQueueItem.cpp b/lldb/source/API/SBQueueItem.cpp index f28f2d69aa9ee..b2204452c0fac 100644 --- a/lldb/source/API/SBQueueItem.cpp +++ b/lldb/source/API/SBQueueItem.cpp @@ -8,7 +8,6 @@ #include "lldb/lldb-forward.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBQueueItem.h" #include "lldb/API/SBThread.h" @@ -16,47 +15,46 @@ #include "lldb/Target/Process.h" #include "lldb/Target/QueueItem.h" #include "lldb/Target/Thread.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; // Constructors -SBQueueItem::SBQueueItem() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBQueueItem); } +SBQueueItem::SBQueueItem() { LLDB_INSTRUMENT_VA(this); } SBQueueItem::SBQueueItem(const QueueItemSP &queue_item_sp) : m_queue_item_sp(queue_item_sp) { - LLDB_RECORD_CONSTRUCTOR(SBQueueItem, (const lldb::QueueItemSP &), - queue_item_sp); + LLDB_INSTRUMENT_VA(this, queue_item_sp); } // Destructor SBQueueItem::~SBQueueItem() { m_queue_item_sp.reset(); } bool SBQueueItem::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueueItem, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBQueueItem::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueueItem, operator bool); + LLDB_INSTRUMENT_VA(this); return m_queue_item_sp.get() != nullptr; } void SBQueueItem::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBQueueItem, Clear); + LLDB_INSTRUMENT_VA(this); m_queue_item_sp.reset(); } void SBQueueItem::SetQueueItem(const QueueItemSP &queue_item_sp) { - LLDB_RECORD_METHOD(void, SBQueueItem, SetQueueItem, - (const lldb::QueueItemSP &), queue_item_sp); + LLDB_INSTRUMENT_VA(this, queue_item_sp); m_queue_item_sp = queue_item_sp; } lldb::QueueItemKind SBQueueItem::GetKind() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::QueueItemKind, SBQueueItem, GetKind); + LLDB_INSTRUMENT_VA(this); QueueItemKind result = eQueueItemKindUnknown; if (m_queue_item_sp) { @@ -66,7 +64,7 @@ lldb::QueueItemKind SBQueueItem::GetKind() const { } void SBQueueItem::SetKind(lldb::QueueItemKind kind) { - LLDB_RECORD_METHOD(void, SBQueueItem, SetKind, (lldb::QueueItemKind), kind); + LLDB_INSTRUMENT_VA(this, kind); if (m_queue_item_sp) { m_queue_item_sp->SetKind(kind); @@ -74,7 +72,7 @@ void SBQueueItem::SetKind(lldb::QueueItemKind kind) { } SBAddress SBQueueItem::GetAddress() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBQueueItem, GetAddress); + LLDB_INSTRUMENT_VA(this); SBAddress result; if (m_queue_item_sp) { @@ -84,7 +82,7 @@ SBAddress SBQueueItem::GetAddress() const { } void SBQueueItem::SetAddress(SBAddress addr) { - LLDB_RECORD_METHOD(void, SBQueueItem, SetAddress, (lldb::SBAddress), addr); + LLDB_INSTRUMENT_VA(this, addr); if (m_queue_item_sp) { m_queue_item_sp->SetAddress(addr.ref()); @@ -92,8 +90,7 @@ void SBQueueItem::SetAddress(SBAddress addr) { } SBThread SBQueueItem::GetExtendedBacktraceThread(const char *type) { - LLDB_RECORD_METHOD(lldb::SBThread, SBQueueItem, GetExtendedBacktraceThread, - (const char *), type); + LLDB_INSTRUMENT_VA(this, type); SBThread result; if (m_queue_item_sp) { diff --git a/lldb/source/API/SBReproducer.cpp b/lldb/source/API/SBReproducer.cpp index ba564c911b725..d3d27cc577480 100644 --- a/lldb/source/API/SBReproducer.cpp +++ b/lldb/source/API/SBReproducer.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// - +#include "lldb/API/SBReproducer.h" #include "lldb/API/LLDB.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBAttachInfo.h" @@ -20,12 +20,11 @@ #include "lldb/API/SBError.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBHostOS.h" -#include "lldb/API/SBReproducer.h" #include "lldb/Host/FileSystem.h" -#include "lldb/Version/Version.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Reproducer.h" #include "lldb/Utility/ReproducerProvider.h" +#include "lldb/Version/Version.h" using namespace lldb; using namespace lldb_private; @@ -40,25 +39,33 @@ SBReplayOptions::SBReplayOptions(const SBReplayOptions &rhs) SBReplayOptions::~SBReplayOptions() = default; SBReplayOptions &SBReplayOptions::operator=(const SBReplayOptions &rhs) { + LLDB_INSTRUMENT_VA(this, rhs) if (this == &rhs) return *this; *m_opaque_up = *rhs.m_opaque_up; return *this; } -void SBReplayOptions::SetVerify(bool verify) { m_opaque_up->verify = verify; } +void SBReplayOptions::SetVerify(bool verify) { + LLDB_INSTRUMENT_VA(this, verify) m_opaque_up->verify = verify; +} -bool SBReplayOptions::GetVerify() const { return m_opaque_up->verify; } +bool SBReplayOptions::GetVerify() const { + LLDB_INSTRUMENT_VA(this) return m_opaque_up->verify; +} void SBReplayOptions::SetCheckVersion(bool check) { + LLDB_INSTRUMENT_VA(this, check) m_opaque_up->check_version = check; } bool SBReplayOptions::GetCheckVersion() const { + LLDB_INSTRUMENT_VA(this) return m_opaque_up->check_version; } const char *SBReproducer::Capture() { + LLDB_INSTRUMENT() static std::string error; if (auto e = Reproducer::Initialize(ReproducerMode::Capture, llvm::None)) { error = llvm::toString(std::move(e)); @@ -69,6 +76,7 @@ const char *SBReproducer::Capture() { } const char *SBReproducer::Capture(const char *path) { + LLDB_INSTRUMENT_VA(path) static std::string error; if (auto e = Reproducer::Initialize(ReproducerMode::Capture, FileSpec(path))) { @@ -80,23 +88,28 @@ const char *SBReproducer::Capture(const char *path) { } const char *SBReproducer::PassiveReplay(const char *path) { + LLDB_INSTRUMENT_VA(path) return "Reproducer replay has been removed"; } const char *SBReproducer::Replay(const char *path) { + LLDB_INSTRUMENT_VA(path) return "Reproducer replay has been removed"; } const char *SBReproducer::Replay(const char *path, bool skip_version_check) { + LLDB_INSTRUMENT_VA(path, skip_version_check) return Replay(path); } const char *SBReproducer::Replay(const char *path, const SBReplayOptions &options) { + LLDB_INSTRUMENT_VA(path, options) return Replay(path); } const char *SBReproducer::Finalize(const char *path) { + LLDB_INSTRUMENT_VA(path) static std::string error; repro::Loader *loader = repro::Reproducer::Instance().GetLoader(); @@ -114,6 +127,7 @@ const char *SBReproducer::Finalize(const char *path) { } bool SBReproducer::Generate() { + LLDB_INSTRUMENT() auto &r = Reproducer::Instance(); if (auto generator = r.GetGenerator()) { generator->Keep(); @@ -123,6 +137,7 @@ bool SBReproducer::Generate() { } bool SBReproducer::SetAutoGenerate(bool b) { + LLDB_INSTRUMENT_VA(b) auto &r = Reproducer::Instance(); if (auto generator = r.GetGenerator()) { generator->SetAutoGenerate(b); @@ -132,6 +147,7 @@ bool SBReproducer::SetAutoGenerate(bool b) { } const char *SBReproducer::GetPath() { + LLDB_INSTRUMENT() ConstString path; auto &r = Reproducer::Instance(); if (FileSpec reproducer_path = Reproducer::Instance().GetReproducerPath()) @@ -140,6 +156,7 @@ const char *SBReproducer::GetPath() { } void SBReproducer::SetWorkingDirectory(const char *path) { + LLDB_INSTRUMENT_VA(path) if (auto *g = lldb_private::repro::Reproducer::Instance().GetGenerator()) { auto &wp = g->GetOrCreate(); wp.SetDirectory(path); diff --git a/lldb/source/API/SBSection.cpp b/lldb/source/API/SBSection.cpp index aa068518f9468..733e0db0b5bad 100644 --- a/lldb/source/API/SBSection.cpp +++ b/lldb/source/API/SBSection.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBSection.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBTarget.h" #include "lldb/Core/Module.h" @@ -15,15 +14,16 @@ #include "lldb/Symbol/ObjectFile.h" #include "lldb/Utility/DataBuffer.h" #include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/StreamString.h" using namespace lldb; using namespace lldb_private; -SBSection::SBSection() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSection); } +SBSection::SBSection() { LLDB_INSTRUMENT_VA(this); } SBSection::SBSection(const SBSection &rhs) : m_opaque_wp(rhs.m_opaque_wp) { - LLDB_RECORD_CONSTRUCTOR(SBSection, (const lldb::SBSection &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBSection::SBSection(const lldb::SectionSP §ion_sp) { @@ -34,8 +34,7 @@ SBSection::SBSection(const lldb::SectionSP §ion_sp) { } const SBSection &SBSection::operator=(const SBSection &rhs) { - LLDB_RECORD_METHOD(const lldb::SBSection &, - SBSection, operator=,(const lldb::SBSection &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_wp = rhs.m_opaque_wp; return *this; @@ -44,18 +43,18 @@ const SBSection &SBSection::operator=(const SBSection &rhs) { SBSection::~SBSection() = default; bool SBSection::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSection, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBSection::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSection, operator bool); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); return section_sp && section_sp->GetModule().get() != nullptr; } const char *SBSection::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBSection, GetName); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp) @@ -64,7 +63,7 @@ const char *SBSection::GetName() { } lldb::SBSection SBSection::GetParent() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSection, SBSection, GetParent); + LLDB_INSTRUMENT_VA(this); lldb::SBSection sb_section; SectionSP section_sp(GetSP()); @@ -77,8 +76,7 @@ lldb::SBSection SBSection::GetParent() { } lldb::SBSection SBSection::FindSubSection(const char *sect_name) { - LLDB_RECORD_METHOD(lldb::SBSection, SBSection, FindSubSection, (const char *), - sect_name); + LLDB_INSTRUMENT_VA(this, sect_name); lldb::SBSection sb_section; if (sect_name) { @@ -93,7 +91,7 @@ lldb::SBSection SBSection::FindSubSection(const char *sect_name) { } size_t SBSection::GetNumSubSections() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBSection, GetNumSubSections); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp) @@ -102,8 +100,7 @@ size_t SBSection::GetNumSubSections() { } lldb::SBSection SBSection::GetSubSectionAtIndex(size_t idx) { - LLDB_RECORD_METHOD(lldb::SBSection, SBSection, GetSubSectionAtIndex, (size_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); lldb::SBSection sb_section; SectionSP section_sp(GetSP()); @@ -119,7 +116,7 @@ void SBSection::SetSP(const lldb::SectionSP §ion_sp) { } lldb::addr_t SBSection::GetFileAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBSection, GetFileAddress); + LLDB_INSTRUMENT_VA(this); lldb::addr_t file_addr = LLDB_INVALID_ADDRESS; SectionSP section_sp(GetSP()); @@ -129,8 +126,7 @@ lldb::addr_t SBSection::GetFileAddress() { } lldb::addr_t SBSection::GetLoadAddress(lldb::SBTarget &sb_target) { - LLDB_RECORD_METHOD(lldb::addr_t, SBSection, GetLoadAddress, - (lldb::SBTarget &), sb_target); + LLDB_INSTRUMENT_VA(this, sb_target); TargetSP target_sp(sb_target.GetSP()); if (target_sp) { @@ -142,7 +138,7 @@ lldb::addr_t SBSection::GetLoadAddress(lldb::SBTarget &sb_target) { } lldb::addr_t SBSection::GetByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBSection, GetByteSize); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp) @@ -151,7 +147,7 @@ lldb::addr_t SBSection::GetByteSize() { } uint64_t SBSection::GetFileOffset() { - LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBSection, GetFileOffset); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp) { @@ -166,7 +162,7 @@ uint64_t SBSection::GetFileOffset() { } uint64_t SBSection::GetFileByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBSection, GetFileByteSize); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp) @@ -175,14 +171,13 @@ uint64_t SBSection::GetFileByteSize() { } SBData SBSection::GetSectionData() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBData, SBSection, GetSectionData); + LLDB_INSTRUMENT_VA(this); return GetSectionData(0, UINT64_MAX); } SBData SBSection::GetSectionData(uint64_t offset, uint64_t size) { - LLDB_RECORD_METHOD(lldb::SBData, SBSection, GetSectionData, - (uint64_t, uint64_t), offset, size); + LLDB_INSTRUMENT_VA(this, offset, size); SBData sb_data; SectionSP section_sp(GetSP()); @@ -221,7 +216,7 @@ SBData SBSection::GetSectionData(uint64_t offset, uint64_t size) { } SectionType SBSection::GetSectionType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SectionType, SBSection, GetSectionType); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp.get()) @@ -230,7 +225,7 @@ SectionType SBSection::GetSectionType() { } uint32_t SBSection::GetPermissions() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBSection, GetPermissions); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp) @@ -239,7 +234,7 @@ uint32_t SBSection::GetPermissions() const { } uint32_t SBSection::GetTargetByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBSection, GetTargetByteSize); + LLDB_INSTRUMENT_VA(this); SectionSP section_sp(GetSP()); if (section_sp.get()) @@ -248,8 +243,7 @@ uint32_t SBSection::GetTargetByteSize() { } bool SBSection::operator==(const SBSection &rhs) { - LLDB_RECORD_METHOD(bool, SBSection, operator==,(const lldb::SBSection &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); SectionSP lhs_section_sp(GetSP()); SectionSP rhs_section_sp(rhs.GetSP()); @@ -259,8 +253,7 @@ bool SBSection::operator==(const SBSection &rhs) { } bool SBSection::operator!=(const SBSection &rhs) { - LLDB_RECORD_METHOD(bool, SBSection, operator!=,(const lldb::SBSection &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); SectionSP lhs_section_sp(GetSP()); SectionSP rhs_section_sp(rhs.GetSP()); @@ -268,8 +261,7 @@ bool SBSection::operator!=(const SBSection &rhs) { } bool SBSection::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBSection, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBSourceManager.cpp b/lldb/source/API/SBSourceManager.cpp index e1cfb4dfde151..7729f5d9d69f8 100644 --- a/lldb/source/API/SBSourceManager.cpp +++ b/lldb/source/API/SBSourceManager.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBSourceManager.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBTarget.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBFileSpec.h" #include "lldb/Core/Debugger.h" @@ -71,21 +71,19 @@ using namespace lldb; using namespace lldb_private; SBSourceManager::SBSourceManager(const SBDebugger &debugger) { - LLDB_RECORD_CONSTRUCTOR(SBSourceManager, (const lldb::SBDebugger &), - debugger); + LLDB_INSTRUMENT_VA(this, debugger); m_opaque_up = std::make_unique(debugger.get_sp()); } SBSourceManager::SBSourceManager(const SBTarget &target) { - LLDB_RECORD_CONSTRUCTOR(SBSourceManager, (const lldb::SBTarget &), target); + LLDB_INSTRUMENT_VA(this, target); m_opaque_up = std::make_unique(target.GetSP()); } SBSourceManager::SBSourceManager(const SBSourceManager &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBSourceManager, (const lldb::SBSourceManager &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (&rhs == this) return; @@ -95,9 +93,7 @@ SBSourceManager::SBSourceManager(const SBSourceManager &rhs) { const lldb::SBSourceManager &SBSourceManager:: operator=(const lldb::SBSourceManager &rhs) { - LLDB_RECORD_METHOD(const lldb::SBSourceManager &, - SBSourceManager, operator=,(const lldb::SBSourceManager &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = std::make_unique(*(rhs.m_opaque_up.get())); return *this; @@ -108,10 +104,7 @@ SBSourceManager::~SBSourceManager() = default; size_t SBSourceManager::DisplaySourceLinesWithLineNumbers( const SBFileSpec &file, uint32_t line, uint32_t context_before, uint32_t context_after, const char *current_line_cstr, SBStream &s) { - LLDB_RECORD_METHOD(size_t, SBSourceManager, DisplaySourceLinesWithLineNumbers, - (const lldb::SBFileSpec &, uint32_t, uint32_t, uint32_t, - const char *, lldb::SBStream &), - file, line, context_before, context_after, + LLDB_INSTRUMENT_VA(this, file, line, context_before, context_after, current_line_cstr, s); const uint32_t column = 0; @@ -124,11 +117,8 @@ size_t SBSourceManager::DisplaySourceLinesWithLineNumbersAndColumn( const SBFileSpec &file, uint32_t line, uint32_t column, uint32_t context_before, uint32_t context_after, const char *current_line_cstr, SBStream &s) { - LLDB_RECORD_METHOD( - size_t, SBSourceManager, DisplaySourceLinesWithLineNumbersAndColumn, - (const lldb::SBFileSpec &, uint32_t, uint32_t, uint32_t, uint32_t, - const char *, lldb::SBStream &), - file, line, column, context_before, context_after, current_line_cstr, s); + LLDB_INSTRUMENT_VA(this, file, line, column, context_before, context_after, + current_line_cstr, s); if (m_opaque_up == nullptr) return 0; diff --git a/lldb/source/API/SBStream.cpp b/lldb/source/API/SBStream.cpp index c3e344802148c..9ceef3466f937 100644 --- a/lldb/source/API/SBStream.cpp +++ b/lldb/source/API/SBStream.cpp @@ -8,10 +8,10 @@ #include "lldb/API/SBStream.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBFile.h" #include "lldb/Core/StreamFile.h" #include "lldb/Host/FileSystem.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Status.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamString.h" @@ -20,7 +20,7 @@ using namespace lldb; using namespace lldb_private; SBStream::SBStream() : m_opaque_up(new StreamString()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStream); + LLDB_INSTRUMENT_VA(this); } SBStream::SBStream(SBStream &&rhs) @@ -29,11 +29,11 @@ SBStream::SBStream(SBStream &&rhs) SBStream::~SBStream() = default; bool SBStream::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStream, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBStream::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStream, operator bool); + LLDB_INSTRUMENT_VA(this); return (m_opaque_up != nullptr); } @@ -41,7 +41,7 @@ SBStream::operator bool() const { // If this stream is not redirected to a file, it will maintain a local cache // for the stream data which can be accessed using this accessor. const char *SBStream::GetData() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBStream, GetData); + LLDB_INSTRUMENT_VA(this); if (m_is_file || m_opaque_up == nullptr) return nullptr; @@ -52,7 +52,7 @@ const char *SBStream::GetData() { // If this stream is not redirected to a file, it will maintain a local cache // for the stream output whose length can be accessed using this accessor. size_t SBStream::GetSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBStream, GetSize); + LLDB_INSTRUMENT_VA(this); if (m_is_file || m_opaque_up == nullptr) return 0; @@ -61,7 +61,7 @@ size_t SBStream::GetSize() { } void SBStream::Print(const char *str) { - LLDB_RECORD_METHOD(void, SBStream, Print, (const char *), str); + LLDB_INSTRUMENT_VA(this, str); Printf("%s", str); } @@ -76,8 +76,7 @@ void SBStream::Printf(const char *format, ...) { } void SBStream::RedirectToFile(const char *path, bool append) { - LLDB_RECORD_METHOD(void, SBStream, RedirectToFile, (const char *, bool), path, - append); + LLDB_INSTRUMENT_VA(this, path, append); if (path == nullptr) return; @@ -114,19 +113,18 @@ void SBStream::RedirectToFile(const char *path, bool append) { } void SBStream::RedirectToFileHandle(FILE *fh, bool transfer_fh_ownership) { - LLDB_RECORD_METHOD(void, SBStream, RedirectToFileHandle, (FILE *, bool), fh, - transfer_fh_ownership); + LLDB_INSTRUMENT_VA(this, fh, transfer_fh_ownership); FileSP file = std::make_unique(fh, transfer_fh_ownership); return RedirectToFile(file); } void SBStream::RedirectToFile(SBFile file) { - LLDB_RECORD_METHOD(void, SBStream, RedirectToFile, (SBFile), file) + LLDB_INSTRUMENT_VA(this, file) RedirectToFile(file.GetFile()); } void SBStream::RedirectToFile(FileSP file_sp) { - LLDB_RECORD_METHOD(void, SBStream, RedirectToFile, (FileSP), file_sp); + LLDB_INSTRUMENT_VA(this, file_sp); if (!file_sp || !file_sp->IsValid()) return; @@ -150,8 +148,7 @@ void SBStream::RedirectToFile(FileSP file_sp) { } void SBStream::RedirectToFileDescriptor(int fd, bool transfer_fh_ownership) { - LLDB_RECORD_METHOD(void, SBStream, RedirectToFileDescriptor, (int, bool), fd, - transfer_fh_ownership); + LLDB_INSTRUMENT_VA(this, fd, transfer_fh_ownership); std::string local_data; if (m_opaque_up) { @@ -182,7 +179,7 @@ lldb_private::Stream &SBStream::ref() { } void SBStream::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBStream, Clear); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) { // See if we have any locally backed data. If so, copy it so we can then diff --git a/lldb/source/API/SBStringList.cpp b/lldb/source/API/SBStringList.cpp index afafd7429fd76..dfb77b1ab32fb 100644 --- a/lldb/source/API/SBStringList.cpp +++ b/lldb/source/API/SBStringList.cpp @@ -7,14 +7,14 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBStringList.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/StringList.h" using namespace lldb; using namespace lldb_private; -SBStringList::SBStringList() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStringList); } +SBStringList::SBStringList() { LLDB_INSTRUMENT_VA(this); } SBStringList::SBStringList(const lldb_private::StringList *lldb_strings_ptr) { if (lldb_strings_ptr) @@ -22,14 +22,13 @@ SBStringList::SBStringList(const lldb_private::StringList *lldb_strings_ptr) { } SBStringList::SBStringList(const SBStringList &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBStringList, (const lldb::SBStringList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } const SBStringList &SBStringList::operator=(const SBStringList &rhs) { - LLDB_RECORD_METHOD(const lldb::SBStringList &, - SBStringList, operator=,(const lldb::SBStringList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -47,17 +46,17 @@ const lldb_private::StringList &SBStringList::operator*() const { } bool SBStringList::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStringList, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBStringList::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStringList, operator bool); + LLDB_INSTRUMENT_VA(this); return (m_opaque_up != nullptr); } void SBStringList::AppendString(const char *str) { - LLDB_RECORD_METHOD(void, SBStringList, AppendString, (const char *), str); + LLDB_INSTRUMENT_VA(this, str); if (str != nullptr) { if (IsValid()) @@ -68,8 +67,7 @@ void SBStringList::AppendString(const char *str) { } void SBStringList::AppendList(const char **strv, int strc) { - LLDB_RECORD_METHOD(void, SBStringList, AppendList, (const char **, int), strv, - strc); + LLDB_INSTRUMENT_VA(this, strv, strc); if ((strv != nullptr) && (strc > 0)) { if (IsValid()) @@ -80,8 +78,7 @@ void SBStringList::AppendList(const char **strv, int strc) { } void SBStringList::AppendList(const SBStringList &strings) { - LLDB_RECORD_METHOD(void, SBStringList, AppendList, - (const lldb::SBStringList &), strings); + LLDB_INSTRUMENT_VA(this, strings); if (strings.IsValid()) { if (!IsValid()) @@ -97,7 +94,7 @@ void SBStringList::AppendList(const StringList &strings) { } uint32_t SBStringList::GetSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBStringList, GetSize); + LLDB_INSTRUMENT_VA(this); if (IsValid()) { return m_opaque_up->GetSize(); @@ -106,8 +103,7 @@ uint32_t SBStringList::GetSize() const { } const char *SBStringList::GetStringAtIndex(size_t idx) { - LLDB_RECORD_METHOD(const char *, SBStringList, GetStringAtIndex, (size_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); if (IsValid()) { return m_opaque_up->GetStringAtIndex(idx); @@ -116,8 +112,7 @@ const char *SBStringList::GetStringAtIndex(size_t idx) { } const char *SBStringList::GetStringAtIndex(size_t idx) const { - LLDB_RECORD_METHOD_CONST(const char *, SBStringList, GetStringAtIndex, - (size_t), idx); + LLDB_INSTRUMENT_VA(this, idx); if (IsValid()) { return m_opaque_up->GetStringAtIndex(idx); @@ -126,7 +121,7 @@ const char *SBStringList::GetStringAtIndex(size_t idx) const { } void SBStringList::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBStringList, Clear); + LLDB_INSTRUMENT_VA(this); if (IsValid()) { m_opaque_up->Clear(); diff --git a/lldb/source/API/SBStructuredData.cpp b/lldb/source/API/SBStructuredData.cpp index c607e305e0a6e..498bcdd39e448 100644 --- a/lldb/source/API/SBStructuredData.cpp +++ b/lldb/source/API/SBStructuredData.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBStructuredData.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBStringList.h" @@ -25,41 +25,36 @@ using namespace lldb_private; #pragma mark SBStructuredData SBStructuredData::SBStructuredData() : m_impl_up(new StructuredDataImpl()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStructuredData); + LLDB_INSTRUMENT_VA(this); } SBStructuredData::SBStructuredData(const lldb::SBStructuredData &rhs) : m_impl_up(new StructuredDataImpl(*rhs.m_impl_up)) { - LLDB_RECORD_CONSTRUCTOR(SBStructuredData, (const lldb::SBStructuredData &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBStructuredData::SBStructuredData(const lldb::EventSP &event_sp) : m_impl_up(new StructuredDataImpl(event_sp)) { - LLDB_RECORD_CONSTRUCTOR(SBStructuredData, (const lldb::EventSP &), event_sp); + LLDB_INSTRUMENT_VA(this, event_sp); } SBStructuredData::SBStructuredData(const lldb_private::StructuredDataImpl &impl) : m_impl_up(new StructuredDataImpl(impl)) { - LLDB_RECORD_CONSTRUCTOR(SBStructuredData, - (const lldb_private::StructuredDataImpl &), impl); + LLDB_INSTRUMENT_VA(this, impl); } SBStructuredData::~SBStructuredData() = default; SBStructuredData &SBStructuredData:: operator=(const lldb::SBStructuredData &rhs) { - LLDB_RECORD_METHOD( - lldb::SBStructuredData &, - SBStructuredData, operator=,(const lldb::SBStructuredData &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); *m_impl_up = *rhs.m_impl_up; return *this; } lldb::SBError SBStructuredData::SetFromJSON(lldb::SBStream &stream) { - LLDB_RECORD_METHOD(lldb::SBError, SBStructuredData, SetFromJSON, - (lldb::SBStream &), stream); + LLDB_INSTRUMENT_VA(this, stream); lldb::SBError error; std::string json_str(stream.GetData()); @@ -73,33 +68,31 @@ lldb::SBError SBStructuredData::SetFromJSON(lldb::SBStream &stream) { } lldb::SBError SBStructuredData::SetFromJSON(const char *json) { - LLDB_RECORD_METHOD(lldb::SBError, SBStructuredData, SetFromJSON, - (const char *), json); + LLDB_INSTRUMENT_VA(this, json); lldb::SBStream s; s.Print(json); return SetFromJSON(s); } bool SBStructuredData::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStructuredData, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBStructuredData::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStructuredData, operator bool); + LLDB_INSTRUMENT_VA(this); return m_impl_up->IsValid(); } void SBStructuredData::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBStructuredData, Clear); + LLDB_INSTRUMENT_VA(this); m_impl_up->Clear(); } SBError SBStructuredData::GetAsJSON(lldb::SBStream &stream) const { - LLDB_RECORD_METHOD_CONST(lldb::SBError, SBStructuredData, GetAsJSON, - (lldb::SBStream &), stream); + LLDB_INSTRUMENT_VA(this, stream); SBError error; error.SetError(m_impl_up->GetAsJSON(stream.ref())); @@ -107,8 +100,7 @@ SBError SBStructuredData::GetAsJSON(lldb::SBStream &stream) const { } lldb::SBError SBStructuredData::GetDescription(lldb::SBStream &stream) const { - LLDB_RECORD_METHOD_CONST(lldb::SBError, SBStructuredData, GetDescription, - (lldb::SBStream &), stream); + LLDB_INSTRUMENT_VA(this, stream); Status error = m_impl_up->GetDescription(stream.ref()); SBError sb_error; @@ -117,21 +109,19 @@ lldb::SBError SBStructuredData::GetDescription(lldb::SBStream &stream) const { } StructuredDataType SBStructuredData::GetType() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::StructuredDataType, SBStructuredData, - GetType); + LLDB_INSTRUMENT_VA(this); return m_impl_up->GetType(); } size_t SBStructuredData::GetSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBStructuredData, GetSize); + LLDB_INSTRUMENT_VA(this); return m_impl_up->GetSize(); } bool SBStructuredData::GetKeys(lldb::SBStringList &keys) const { - LLDB_RECORD_METHOD_CONST(bool, SBStructuredData, GetKeys, - (lldb::SBStringList &), keys); + LLDB_INSTRUMENT_VA(this, keys); if (GetType() != eStructuredDataTypeDictionary) return false; @@ -157,8 +147,7 @@ bool SBStructuredData::GetKeys(lldb::SBStringList &keys) const { } lldb::SBStructuredData SBStructuredData::GetValueForKey(const char *key) const { - LLDB_RECORD_METHOD_CONST(lldb::SBStructuredData, SBStructuredData, - GetValueForKey, (const char *), key); + LLDB_INSTRUMENT_VA(this, key); SBStructuredData result; result.m_impl_up->SetObjectSP(m_impl_up->GetValueForKey(key)); @@ -166,8 +155,7 @@ lldb::SBStructuredData SBStructuredData::GetValueForKey(const char *key) const { } lldb::SBStructuredData SBStructuredData::GetItemAtIndex(size_t idx) const { - LLDB_RECORD_METHOD_CONST(lldb::SBStructuredData, SBStructuredData, - GetItemAtIndex, (size_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBStructuredData result; result.m_impl_up->SetObjectSP(m_impl_up->GetItemAtIndex(idx)); @@ -175,29 +163,25 @@ lldb::SBStructuredData SBStructuredData::GetItemAtIndex(size_t idx) const { } uint64_t SBStructuredData::GetIntegerValue(uint64_t fail_value) const { - LLDB_RECORD_METHOD_CONST(uint64_t, SBStructuredData, GetIntegerValue, - (uint64_t), fail_value); + LLDB_INSTRUMENT_VA(this, fail_value); return m_impl_up->GetIntegerValue(fail_value); } double SBStructuredData::GetFloatValue(double fail_value) const { - LLDB_RECORD_METHOD_CONST(double, SBStructuredData, GetFloatValue, (double), - fail_value); + LLDB_INSTRUMENT_VA(this, fail_value); return m_impl_up->GetFloatValue(fail_value); } bool SBStructuredData::GetBooleanValue(bool fail_value) const { - LLDB_RECORD_METHOD_CONST(bool, SBStructuredData, GetBooleanValue, (bool), - fail_value); + LLDB_INSTRUMENT_VA(this, fail_value); return m_impl_up->GetBooleanValue(fail_value); } size_t SBStructuredData::GetStringValue(char *dst, size_t dst_len) const { - LLDB_RECORD_METHOD_CONST(size_t, SBStructuredData, GetStringValue, - (char *, size_t), dst, "", dst_len); + LLDB_INSTRUMENT_VA(this, dst, dst_len); return m_impl_up->GetStringValue(dst, dst_len); } diff --git a/lldb/source/API/SBSymbol.cpp b/lldb/source/API/SBSymbol.cpp index 96fe5708d344d..b671f987dc996 100644 --- a/lldb/source/API/SBSymbol.cpp +++ b/lldb/source/API/SBSymbol.cpp @@ -7,29 +7,28 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBSymbol.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/Module.h" #include "lldb/Symbol/Symbol.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; -SBSymbol::SBSymbol() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbol); } +SBSymbol::SBSymbol() { LLDB_INSTRUMENT_VA(this); } SBSymbol::SBSymbol(lldb_private::Symbol *lldb_object_ptr) : m_opaque_ptr(lldb_object_ptr) {} SBSymbol::SBSymbol(const lldb::SBSymbol &rhs) : m_opaque_ptr(rhs.m_opaque_ptr) { - LLDB_RECORD_CONSTRUCTOR(SBSymbol, (const lldb::SBSymbol &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBSymbol &SBSymbol::operator=(const SBSymbol &rhs) { - LLDB_RECORD_METHOD(const lldb::SBSymbol &, - SBSymbol, operator=,(const lldb::SBSymbol &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_ptr = rhs.m_opaque_ptr; return *this; @@ -42,17 +41,17 @@ void SBSymbol::SetSymbol(lldb_private::Symbol *lldb_object_ptr) { } bool SBSymbol::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbol, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBSymbol::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbol, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_ptr != nullptr; } const char *SBSymbol::GetName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; if (m_opaque_ptr) @@ -62,7 +61,7 @@ const char *SBSymbol::GetName() const { } const char *SBSymbol::GetDisplayName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetDisplayName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; if (m_opaque_ptr) @@ -72,7 +71,7 @@ const char *SBSymbol::GetDisplayName() const { } const char *SBSymbol::GetMangledName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetMangledName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; if (m_opaque_ptr) @@ -81,22 +80,19 @@ const char *SBSymbol::GetMangledName() const { } bool SBSymbol::operator==(const SBSymbol &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBSymbol, operator==,(const lldb::SBSymbol &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr == rhs.m_opaque_ptr; } bool SBSymbol::operator!=(const SBSymbol &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBSymbol, operator!=,(const lldb::SBSymbol &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_ptr != rhs.m_opaque_ptr; } bool SBSymbol::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBSymbol, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -109,16 +105,14 @@ bool SBSymbol::GetDescription(SBStream &description) { } SBInstructionList SBSymbol::GetInstructions(SBTarget target) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBSymbol, GetInstructions, - (lldb::SBTarget), target); + LLDB_INSTRUMENT_VA(this, target); return GetInstructions(target, nullptr); } SBInstructionList SBSymbol::GetInstructions(SBTarget target, const char *flavor_string) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBSymbol, GetInstructions, - (lldb::SBTarget, const char *), target, flavor_string); + LLDB_INSTRUMENT_VA(this, target, flavor_string); SBInstructionList sb_instructions; if (m_opaque_ptr) { @@ -145,7 +139,7 @@ lldb_private::Symbol *SBSymbol::get() { return m_opaque_ptr; } void SBSymbol::reset(lldb_private::Symbol *symbol) { m_opaque_ptr = symbol; } SBAddress SBSymbol::GetStartAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBSymbol, GetStartAddress); + LLDB_INSTRUMENT_VA(this); SBAddress addr; if (m_opaque_ptr && m_opaque_ptr->ValueIsAddress()) { @@ -155,7 +149,7 @@ SBAddress SBSymbol::GetStartAddress() { } SBAddress SBSymbol::GetEndAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBSymbol, GetEndAddress); + LLDB_INSTRUMENT_VA(this); SBAddress addr; if (m_opaque_ptr && m_opaque_ptr->ValueIsAddress()) { @@ -169,7 +163,7 @@ SBAddress SBSymbol::GetEndAddress() { } uint32_t SBSymbol::GetPrologueByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBSymbol, GetPrologueByteSize); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetPrologueByteSize(); @@ -177,7 +171,7 @@ uint32_t SBSymbol::GetPrologueByteSize() { } SymbolType SBSymbol::GetType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SymbolType, SBSymbol, GetType); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->GetType(); @@ -185,7 +179,7 @@ SymbolType SBSymbol::GetType() { } bool SBSymbol::IsExternal() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBSymbol, IsExternal); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->IsExternal(); @@ -193,7 +187,7 @@ bool SBSymbol::IsExternal() { } bool SBSymbol::IsSynthetic() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBSymbol, IsSynthetic); + LLDB_INSTRUMENT_VA(this); if (m_opaque_ptr) return m_opaque_ptr->IsSynthetic(); diff --git a/lldb/source/API/SBSymbolContext.cpp b/lldb/source/API/SBSymbolContext.cpp index ebe9bcfabb9fb..484399c895900 100644 --- a/lldb/source/API/SBSymbolContext.cpp +++ b/lldb/source/API/SBSymbolContext.cpp @@ -7,30 +7,26 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBSymbolContext.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/Core/Module.h" #include "lldb/Symbol/Function.h" #include "lldb/Symbol/Symbol.h" #include "lldb/Symbol/SymbolContext.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; -SBSymbolContext::SBSymbolContext() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContext); -} +SBSymbolContext::SBSymbolContext() { LLDB_INSTRUMENT_VA(this); } SBSymbolContext::SBSymbolContext(const SymbolContext &sc) : m_opaque_up(std::make_unique(sc)) { - LLDB_RECORD_CONSTRUCTOR(SBSymbolContext, - (const lldb_private::SymbolContext &), sc); + LLDB_INSTRUMENT_VA(this, sc); } SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBSymbolContext, (const lldb::SBSymbolContext &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -38,9 +34,7 @@ SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) { SBSymbolContext::~SBSymbolContext() = default; const SBSymbolContext &SBSymbolContext::operator=(const SBSymbolContext &rhs) { - LLDB_RECORD_METHOD(const lldb::SBSymbolContext &, - SBSymbolContext, operator=,(const lldb::SBSymbolContext &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -48,17 +42,17 @@ const SBSymbolContext &SBSymbolContext::operator=(const SBSymbolContext &rhs) { } bool SBSymbolContext::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContext, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBSymbolContext::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContext, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up != nullptr; } SBModule SBSymbolContext::GetModule() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBModule, SBSymbolContext, GetModule); + LLDB_INSTRUMENT_VA(this); SBModule sb_module; ModuleSP module_sp; @@ -71,14 +65,13 @@ SBModule SBSymbolContext::GetModule() { } SBCompileUnit SBSymbolContext::GetCompileUnit() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBCompileUnit, SBSymbolContext, - GetCompileUnit); + LLDB_INSTRUMENT_VA(this); return SBCompileUnit(m_opaque_up ? m_opaque_up->comp_unit : nullptr); } SBFunction SBSymbolContext::GetFunction() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFunction, SBSymbolContext, GetFunction); + LLDB_INSTRUMENT_VA(this); Function *function = nullptr; @@ -91,13 +84,13 @@ SBFunction SBSymbolContext::GetFunction() { } SBBlock SBSymbolContext::GetBlock() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBSymbolContext, GetBlock); + LLDB_INSTRUMENT_VA(this); return SBBlock(m_opaque_up ? m_opaque_up->block : nullptr); } SBLineEntry SBSymbolContext::GetLineEntry() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBLineEntry, SBSymbolContext, GetLineEntry); + LLDB_INSTRUMENT_VA(this); SBLineEntry sb_line_entry; if (m_opaque_up) @@ -107,7 +100,7 @@ SBLineEntry SBSymbolContext::GetLineEntry() { } SBSymbol SBSymbolContext::GetSymbol() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSymbol, SBSymbolContext, GetSymbol); + LLDB_INSTRUMENT_VA(this); Symbol *symbol = nullptr; @@ -120,35 +113,31 @@ SBSymbol SBSymbolContext::GetSymbol() { } void SBSymbolContext::SetModule(lldb::SBModule module) { - LLDB_RECORD_METHOD(void, SBSymbolContext, SetModule, (lldb::SBModule), - module); + LLDB_INSTRUMENT_VA(this, module); ref().module_sp = module.GetSP(); } void SBSymbolContext::SetCompileUnit(lldb::SBCompileUnit compile_unit) { - LLDB_RECORD_METHOD(void, SBSymbolContext, SetCompileUnit, - (lldb::SBCompileUnit), compile_unit); + LLDB_INSTRUMENT_VA(this, compile_unit); ref().comp_unit = compile_unit.get(); } void SBSymbolContext::SetFunction(lldb::SBFunction function) { - LLDB_RECORD_METHOD(void, SBSymbolContext, SetFunction, (lldb::SBFunction), - function); + LLDB_INSTRUMENT_VA(this, function); ref().function = function.get(); } void SBSymbolContext::SetBlock(lldb::SBBlock block) { - LLDB_RECORD_METHOD(void, SBSymbolContext, SetBlock, (lldb::SBBlock), block); + LLDB_INSTRUMENT_VA(this, block); ref().block = block.GetPtr(); } void SBSymbolContext::SetLineEntry(lldb::SBLineEntry line_entry) { - LLDB_RECORD_METHOD(void, SBSymbolContext, SetLineEntry, (lldb::SBLineEntry), - line_entry); + LLDB_INSTRUMENT_VA(this, line_entry); if (line_entry.IsValid()) ref().line_entry = line_entry.ref(); @@ -157,8 +146,7 @@ void SBSymbolContext::SetLineEntry(lldb::SBLineEntry line_entry) { } void SBSymbolContext::SetSymbol(lldb::SBSymbol symbol) { - LLDB_RECORD_METHOD(void, SBSymbolContext, SetSymbol, (lldb::SBSymbol), - symbol); + LLDB_INSTRUMENT_VA(this, symbol); ref().symbol = symbol.get(); } @@ -189,8 +177,7 @@ lldb_private::SymbolContext *SBSymbolContext::get() const { } bool SBSymbolContext::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBSymbolContext, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -205,10 +192,7 @@ bool SBSymbolContext::GetDescription(SBStream &description) { SBSymbolContext SBSymbolContext::GetParentOfInlinedScope(const SBAddress &curr_frame_pc, SBAddress &parent_frame_addr) const { - LLDB_RECORD_METHOD_CONST(lldb::SBSymbolContext, SBSymbolContext, - GetParentOfInlinedScope, - (const lldb::SBAddress &, lldb::SBAddress &), - curr_frame_pc, parent_frame_addr); + LLDB_INSTRUMENT_VA(this, curr_frame_pc, parent_frame_addr); SBSymbolContext sb_sc; if (m_opaque_up.get() && curr_frame_pc.IsValid()) { diff --git a/lldb/source/API/SBSymbolContextList.cpp b/lldb/source/API/SBSymbolContextList.cpp index 2dddd2805cd7b..baa558caebbc0 100644 --- a/lldb/source/API/SBSymbolContextList.cpp +++ b/lldb/source/API/SBSymbolContextList.cpp @@ -7,22 +7,21 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBSymbolContextList.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/Symbol/SymbolContext.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; SBSymbolContextList::SBSymbolContextList() : m_opaque_up(new SymbolContextList()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContextList); + LLDB_INSTRUMENT_VA(this); } SBSymbolContextList::SBSymbolContextList(const SBSymbolContextList &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBSymbolContextList, - (const lldb::SBSymbolContextList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -31,9 +30,7 @@ SBSymbolContextList::~SBSymbolContextList() = default; const SBSymbolContextList &SBSymbolContextList:: operator=(const SBSymbolContextList &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBSymbolContextList &, - SBSymbolContextList, operator=,(const lldb::SBSymbolContextList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_up = clone(rhs.m_opaque_up); @@ -41,7 +38,7 @@ operator=(const SBSymbolContextList &rhs) { } uint32_t SBSymbolContextList::GetSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBSymbolContextList, GetSize); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->GetSize(); @@ -49,8 +46,7 @@ uint32_t SBSymbolContextList::GetSize() const { } SBSymbolContext SBSymbolContextList::GetContextAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBSymbolContextList, - GetContextAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBSymbolContext sb_sc; if (m_opaque_up) { @@ -62,34 +58,32 @@ SBSymbolContext SBSymbolContextList::GetContextAtIndex(uint32_t idx) { } void SBSymbolContextList::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBSymbolContextList, Clear); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) m_opaque_up->Clear(); } void SBSymbolContextList::Append(SBSymbolContext &sc) { - LLDB_RECORD_METHOD(void, SBSymbolContextList, Append, - (lldb::SBSymbolContext &), sc); + LLDB_INSTRUMENT_VA(this, sc); if (sc.IsValid() && m_opaque_up.get()) m_opaque_up->Append(*sc); } void SBSymbolContextList::Append(SBSymbolContextList &sc_list) { - LLDB_RECORD_METHOD(void, SBSymbolContextList, Append, - (lldb::SBSymbolContextList &), sc_list); + LLDB_INSTRUMENT_VA(this, sc_list); if (sc_list.IsValid() && m_opaque_up.get()) m_opaque_up->Append(*sc_list); } bool SBSymbolContextList::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContextList, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBSymbolContextList::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContextList, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up != nullptr; } @@ -104,8 +98,7 @@ lldb_private::SymbolContextList &SBSymbolContextList::operator*() const { } bool SBSymbolContextList::GetDescription(lldb::SBStream &description) { - LLDB_RECORD_METHOD(bool, SBSymbolContextList, GetDescription, - (lldb::SBStream &), description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); if (m_opaque_up) diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 5bec5610fcff6..75534b2343d42 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTarget.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/lldb-public.h" @@ -93,19 +93,18 @@ static Status AttachToProcess(ProcessAttachInfo &attach_info, Target &target) { } // SBTarget constructor -SBTarget::SBTarget() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTarget); } +SBTarget::SBTarget() { LLDB_INSTRUMENT_VA(this); } SBTarget::SBTarget(const SBTarget &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTarget, (const lldb::SBTarget &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBTarget::SBTarget(const TargetSP &target_sp) : m_opaque_sp(target_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTarget, (const lldb::TargetSP &), target_sp); + LLDB_INSTRUMENT_VA(this, target_sp); } const SBTarget &SBTarget::operator=(const SBTarget &rhs) { - LLDB_RECORD_METHOD(const lldb::SBTarget &, - SBTarget, operator=,(const lldb::SBTarget &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; @@ -116,22 +115,19 @@ const SBTarget &SBTarget::operator=(const SBTarget &rhs) { SBTarget::~SBTarget() = default; bool SBTarget::EventIsTargetEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBTarget, EventIsTargetEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Target::TargetEventData::GetEventDataFromEvent(event.get()) != nullptr; } SBTarget SBTarget::GetTargetFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBTarget, SBTarget, GetTargetFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Target::TargetEventData::GetTargetFromEvent(event.get()); } uint32_t SBTarget::GetNumModulesFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(uint32_t, SBTarget, GetNumModulesFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); const ModuleList module_list = Target::TargetEventData::GetModuleListFromEvent(event.get()); @@ -140,9 +136,7 @@ uint32_t SBTarget::GetNumModulesFromEvent(const SBEvent &event) { SBModule SBTarget::GetModuleAtIndexFromEvent(const uint32_t idx, const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBModule, SBTarget, GetModuleAtIndexFromEvent, - (const uint32_t, const lldb::SBEvent &), idx, - event); + LLDB_INSTRUMENT_VA(idx, event); const ModuleList module_list = Target::TargetEventData::GetModuleListFromEvent(event.get()); @@ -150,24 +144,23 @@ SBModule SBTarget::GetModuleAtIndexFromEvent(const uint32_t idx, } const char *SBTarget::GetBroadcasterClassName() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBTarget, - GetBroadcasterClassName); + LLDB_INSTRUMENT(); return Target::GetStaticBroadcasterClass().AsCString(); } bool SBTarget::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTarget, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTarget::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTarget, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr && m_opaque_sp->IsValid(); } SBProcess SBTarget::GetProcess() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBTarget, GetProcess); + LLDB_INSTRUMENT_VA(this); SBProcess sb_process; ProcessSP process_sp; @@ -181,7 +174,7 @@ SBProcess SBTarget::GetProcess() { } SBPlatform SBTarget::GetPlatform() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBPlatform, SBTarget, GetPlatform); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (!target_sp) @@ -194,7 +187,7 @@ SBPlatform SBTarget::GetPlatform() { } SBDebugger SBTarget::GetDebugger() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBDebugger, SBTarget, GetDebugger); + LLDB_INSTRUMENT_VA(this); SBDebugger debugger; TargetSP target_sp(GetSP()); @@ -204,7 +197,7 @@ SBDebugger SBTarget::GetDebugger() const { } SBStructuredData SBTarget::GetStatistics() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStructuredData, SBTarget, GetStatistics); + LLDB_INSTRUMENT_VA(this); SBStructuredData data; TargetSP target_sp(GetSP()); @@ -219,7 +212,7 @@ SBStructuredData SBTarget::GetStatistics() { } void SBTarget::SetCollectingStats(bool v) { - LLDB_RECORD_METHOD(void, SBTarget, SetCollectingStats, (bool), v); + LLDB_INSTRUMENT_VA(this, v); TargetSP target_sp(GetSP()); if (!target_sp) @@ -228,7 +221,7 @@ void SBTarget::SetCollectingStats(bool v) { } bool SBTarget::GetCollectingStats() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, GetCollectingStats); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (!target_sp) @@ -237,16 +230,14 @@ bool SBTarget::GetCollectingStats() { } SBProcess SBTarget::LoadCore(const char *core_file) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, LoadCore, (const char *), - core_file); + LLDB_INSTRUMENT_VA(this, core_file); lldb::SBError error; // Ignored return LoadCore(core_file, error); } SBProcess SBTarget::LoadCore(const char *core_file, lldb::SBError &error) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, LoadCore, - (const char *, lldb::SBError &), core_file, error); + LLDB_INSTRUMENT_VA(this, core_file, error); SBProcess sb_process; TargetSP target_sp(GetSP()); @@ -270,9 +261,7 @@ SBProcess SBTarget::LoadCore(const char *core_file, lldb::SBError &error) { SBProcess SBTarget::LaunchSimple(char const **argv, char const **envp, const char *working_directory) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, LaunchSimple, - (const char **, const char **, const char *), argv, envp, - working_directory); + LLDB_INSTRUMENT_VA(this, argv, envp, working_directory); TargetSP target_sp = GetSP(); if (!target_sp) @@ -295,7 +284,7 @@ SBProcess SBTarget::LaunchSimple(char const **argv, char const **envp, } SBError SBTarget::Install() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBTarget, Install); + LLDB_INSTRUMENT_VA(this); SBError sb_error; TargetSP target_sp(GetSP()); @@ -312,12 +301,9 @@ SBProcess SBTarget::Launch(SBListener &listener, char const **argv, const char *working_directory, uint32_t launch_flags, // See LaunchFlags bool stop_at_entry, lldb::SBError &error) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, Launch, - (lldb::SBListener &, const char **, const char **, - const char *, const char *, const char *, const char *, - uint32_t, bool, lldb::SBError &), - listener, argv, envp, stdin_path, stdout_path, stderr_path, - working_directory, launch_flags, stop_at_entry, error); + LLDB_INSTRUMENT_VA(this, listener, argv, envp, stdin_path, stdout_path, + stderr_path, working_directory, launch_flags, + stop_at_entry, error); SBProcess sb_process; ProcessSP process_sp; @@ -395,10 +381,7 @@ SBProcess SBTarget::Launch(SBListener &listener, char const **argv, } SBProcess SBTarget::Launch(SBLaunchInfo &sb_launch_info, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, Launch, - (lldb::SBLaunchInfo &, lldb::SBError &), sb_launch_info, - error); - + LLDB_INSTRUMENT_VA(this, sb_launch_info, error); SBProcess sb_process; TargetSP target_sp(GetSP()); @@ -444,9 +427,7 @@ SBProcess SBTarget::Launch(SBLaunchInfo &sb_launch_info, SBError &error) { } lldb::SBProcess SBTarget::Attach(SBAttachInfo &sb_attach_info, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, Attach, - (lldb::SBAttachInfo &, lldb::SBError &), sb_attach_info, - error); + LLDB_INSTRUMENT_VA(this, sb_attach_info, error); SBProcess sb_process; TargetSP target_sp(GetSP()); @@ -483,9 +464,7 @@ lldb::SBProcess SBTarget::AttachToProcessWithID( lldb::pid_t pid, // The process ID to attach to SBError &error // An error explaining what went wrong if attach fails ) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, AttachToProcessWithID, - (lldb::SBListener &, lldb::pid_t, lldb::SBError &), - listener, pid, error); + LLDB_INSTRUMENT_VA(this, listener, pid, error); SBProcess sb_process; TargetSP target_sp(GetSP()); @@ -515,9 +494,7 @@ lldb::SBProcess SBTarget::AttachToProcessWithName( bool wait_for, // if true wait for a new instance of "name" to be launched SBError &error // An error explaining what went wrong if attach fails ) { - LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, AttachToProcessWithName, - (lldb::SBListener &, const char *, bool, lldb::SBError &), - listener, name, wait_for, error); + LLDB_INSTRUMENT_VA(this, listener, name, wait_for, error); SBProcess sb_process; TargetSP target_sp(GetSP()); @@ -541,10 +518,7 @@ lldb::SBProcess SBTarget::AttachToProcessWithName( lldb::SBProcess SBTarget::ConnectRemote(SBListener &listener, const char *url, const char *plugin_name, SBError &error) { - LLDB_RECORD_METHOD( - lldb::SBProcess, SBTarget, ConnectRemote, - (lldb::SBListener &, const char *, const char *, lldb::SBError &), - listener, url, plugin_name, error); + LLDB_INSTRUMENT_VA(this, listener, url, plugin_name, error); SBProcess sb_process; ProcessSP process_sp; @@ -574,7 +548,7 @@ lldb::SBProcess SBTarget::ConnectRemote(SBListener &listener, const char *url, } SBFileSpec SBTarget::GetExecutable() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBTarget, GetExecutable); + LLDB_INSTRUMENT_VA(this); SBFileSpec exe_file_spec; TargetSP target_sp(GetSP()); @@ -588,15 +562,13 @@ SBFileSpec SBTarget::GetExecutable() { } bool SBTarget::operator==(const SBTarget &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBTarget, operator==,(const lldb::SBTarget &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_sp.get() == rhs.m_opaque_sp.get(); } bool SBTarget::operator!=(const SBTarget &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBTarget, operator!=,(const lldb::SBTarget &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_sp.get() != rhs.m_opaque_sp.get(); } @@ -608,8 +580,7 @@ void SBTarget::SetSP(const lldb::TargetSP &target_sp) { } lldb::SBAddress SBTarget::ResolveLoadAddress(lldb::addr_t vm_addr) { - LLDB_RECORD_METHOD(lldb::SBAddress, SBTarget, ResolveLoadAddress, - (lldb::addr_t), vm_addr); + LLDB_INSTRUMENT_VA(this, vm_addr); lldb::SBAddress sb_addr; Address &addr = sb_addr.ref(); @@ -627,8 +598,7 @@ lldb::SBAddress SBTarget::ResolveLoadAddress(lldb::addr_t vm_addr) { } lldb::SBAddress SBTarget::ResolveFileAddress(lldb::addr_t file_addr) { - LLDB_RECORD_METHOD(lldb::SBAddress, SBTarget, ResolveFileAddress, - (lldb::addr_t), file_addr); + LLDB_INSTRUMENT_VA(this, file_addr); lldb::SBAddress sb_addr; Address &addr = sb_addr.ref(); @@ -645,8 +615,7 @@ lldb::SBAddress SBTarget::ResolveFileAddress(lldb::addr_t file_addr) { lldb::SBAddress SBTarget::ResolvePastLoadAddress(uint32_t stop_id, lldb::addr_t vm_addr) { - LLDB_RECORD_METHOD(lldb::SBAddress, SBTarget, ResolvePastLoadAddress, - (uint32_t, lldb::addr_t), stop_id, vm_addr); + LLDB_INSTRUMENT_VA(this, stop_id, vm_addr); lldb::SBAddress sb_addr; Address &addr = sb_addr.ref(); @@ -666,9 +635,7 @@ lldb::SBAddress SBTarget::ResolvePastLoadAddress(uint32_t stop_id, SBSymbolContext SBTarget::ResolveSymbolContextForAddress(const SBAddress &addr, uint32_t resolve_scope) { - LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBTarget, - ResolveSymbolContextForAddress, - (const lldb::SBAddress &, uint32_t), addr, resolve_scope); + LLDB_INSTRUMENT_VA(this, addr, resolve_scope); SBSymbolContext sc; SymbolContextItem scope = static_cast(resolve_scope); @@ -683,9 +650,7 @@ SBTarget::ResolveSymbolContextForAddress(const SBAddress &addr, size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size, lldb::SBError &error) { - LLDB_RECORD_METHOD(size_t, SBTarget, ReadMemory, - (const lldb::SBAddress, void *, size_t, lldb::SBError &), - addr, buf, size, error); + LLDB_INSTRUMENT_VA(this, addr, buf, size, error); SBError sb_error; size_t bytes_read = 0; @@ -703,8 +668,7 @@ size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size, SBBreakpoint SBTarget::BreakpointCreateByLocation(const char *file, uint32_t line) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation, - (const char *, uint32_t), file, line); + LLDB_INSTRUMENT_VA(this, file, line); return SBBreakpoint( BreakpointCreateByLocation(SBFileSpec(file, false), line)); @@ -713,8 +677,7 @@ SBBreakpoint SBTarget::BreakpointCreateByLocation(const char *file, SBBreakpoint SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec, uint32_t line) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation, - (const lldb::SBFileSpec &, uint32_t), sb_file_spec, line); + LLDB_INSTRUMENT_VA(this, sb_file_spec, line); return BreakpointCreateByLocation(sb_file_spec, line, 0); } @@ -722,9 +685,7 @@ SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec, SBBreakpoint SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec, uint32_t line, lldb::addr_t offset) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation, - (const lldb::SBFileSpec &, uint32_t, lldb::addr_t), - sb_file_spec, line, offset); + LLDB_INSTRUMENT_VA(this, sb_file_spec, line, offset); SBFileSpecList empty_list; return BreakpointCreateByLocation(sb_file_spec, line, offset, empty_list); @@ -734,10 +695,7 @@ SBBreakpoint SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec, uint32_t line, lldb::addr_t offset, SBFileSpecList &sb_module_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation, - (const lldb::SBFileSpec &, uint32_t, lldb::addr_t, - lldb::SBFileSpecList &), - sb_file_spec, line, offset, sb_module_list); + LLDB_INSTRUMENT_VA(this, sb_file_spec, line, offset, sb_module_list); return BreakpointCreateByLocation(sb_file_spec, line, 0, offset, sb_module_list); @@ -746,10 +704,7 @@ SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec, SBBreakpoint SBTarget::BreakpointCreateByLocation( const SBFileSpec &sb_file_spec, uint32_t line, uint32_t column, lldb::addr_t offset, SBFileSpecList &sb_module_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation, - (const lldb::SBFileSpec &, uint32_t, uint32_t, - lldb::addr_t, lldb::SBFileSpecList &), - sb_file_spec, line, column, offset, sb_module_list); + LLDB_INSTRUMENT_VA(this, sb_file_spec, line, column, offset, sb_module_list); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -777,10 +732,7 @@ SBBreakpoint SBTarget::BreakpointCreateByLocation( const SBFileSpec &sb_file_spec, uint32_t line, uint32_t column, lldb::addr_t offset, SBFileSpecList &sb_module_list, bool move_to_nearest_code) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation, - (const lldb::SBFileSpec &, uint32_t, uint32_t, - lldb::addr_t, lldb::SBFileSpecList &, bool), - sb_file_spec, line, column, offset, sb_module_list, + LLDB_INSTRUMENT_VA(this, sb_file_spec, line, column, offset, sb_module_list, move_to_nearest_code); SBBreakpoint sb_bp; @@ -807,8 +759,7 @@ SBBreakpoint SBTarget::BreakpointCreateByLocation( SBBreakpoint SBTarget::BreakpointCreateByName(const char *symbol_name, const char *module_name) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName, - (const char *, const char *), symbol_name, module_name); + LLDB_INSTRUMENT_VA(this, symbol_name, module_name); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -839,10 +790,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByName(const char *symbol_name, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName, - (const char *, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_name, module_list, comp_unit_list); + LLDB_INSTRUMENT_VA(this, symbol_name, module_list, comp_unit_list); lldb::FunctionNameType name_type_mask = eFunctionNameTypeAuto; return BreakpointCreateByName(symbol_name, name_type_mask, @@ -853,10 +801,8 @@ SBTarget::BreakpointCreateByName(const char *symbol_name, lldb::SBBreakpoint SBTarget::BreakpointCreateByName( const char *symbol_name, uint32_t name_type_mask, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName, - (const char *, uint32_t, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_name, name_type_mask, module_list, comp_unit_list); + LLDB_INSTRUMENT_VA(this, symbol_name, name_type_mask, module_list, + comp_unit_list); return BreakpointCreateByName(symbol_name, name_type_mask, eLanguageTypeUnknown, module_list, @@ -867,12 +813,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByName( const char *symbol_name, uint32_t name_type_mask, LanguageType symbol_language, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName, - (const char *, uint32_t, lldb::LanguageType, - const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_name, name_type_mask, symbol_language, module_list, - comp_unit_list); + LLDB_INSTRUMENT_VA(this, symbol_name, name_type_mask, symbol_language, + module_list, comp_unit_list); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -893,11 +835,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByName( lldb::SBBreakpoint SBTarget::BreakpointCreateByNames( const char *symbol_names[], uint32_t num_names, uint32_t name_type_mask, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD( - lldb::SBBreakpoint, SBTarget, BreakpointCreateByNames, - (const char **, uint32_t, uint32_t, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_names, num_names, name_type_mask, module_list, comp_unit_list); + LLDB_INSTRUMENT_VA(this, symbol_names, num_names, name_type_mask, module_list, + comp_unit_list); return BreakpointCreateByNames(symbol_names, num_names, name_type_mask, eLanguageTypeUnknown, module_list, @@ -908,12 +847,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByNames( const char *symbol_names[], uint32_t num_names, uint32_t name_type_mask, LanguageType symbol_language, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByNames, - (const char **, uint32_t, uint32_t, lldb::LanguageType, - const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_names, num_names, name_type_mask, symbol_language, - module_list, comp_unit_list); + LLDB_INSTRUMENT_VA(this, symbol_names, num_names, name_type_mask, + symbol_language, module_list, comp_unit_list); return BreakpointCreateByNames(symbol_names, num_names, name_type_mask, eLanguageTypeUnknown, 0, module_list, @@ -924,12 +859,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByNames( const char *symbol_names[], uint32_t num_names, uint32_t name_type_mask, LanguageType symbol_language, lldb::addr_t offset, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByNames, - (const char **, uint32_t, uint32_t, lldb::LanguageType, - lldb::addr_t, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_names, num_names, name_type_mask, symbol_language, - offset, module_list, comp_unit_list); + LLDB_INSTRUMENT_VA(this, symbol_names, num_names, name_type_mask, + symbol_language, offset, module_list, comp_unit_list); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -949,9 +880,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByNames( SBBreakpoint SBTarget::BreakpointCreateByRegex(const char *symbol_name_regex, const char *module_name) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByRegex, - (const char *, const char *), symbol_name_regex, - module_name); + LLDB_INSTRUMENT_VA(this, symbol_name_regex, module_name); SBFileSpecList module_spec_list; SBFileSpecList comp_unit_list; @@ -966,10 +895,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByRegex(const char *symbol_name_regex, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByRegex, - (const char *, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_name_regex, module_list, comp_unit_list); + LLDB_INSTRUMENT_VA(this, symbol_name_regex, module_list, comp_unit_list); return BreakpointCreateByRegex(symbol_name_regex, eLanguageTypeUnknown, module_list, comp_unit_list); @@ -978,12 +904,8 @@ SBTarget::BreakpointCreateByRegex(const char *symbol_name_regex, lldb::SBBreakpoint SBTarget::BreakpointCreateByRegex( const char *symbol_name_regex, LanguageType symbol_language, const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) { - LLDB_RECORD_METHOD( - lldb::SBBreakpoint, SBTarget, BreakpointCreateByRegex, - (const char *, lldb::LanguageType, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - symbol_name_regex, symbol_language, module_list, comp_unit_list); - + LLDB_INSTRUMENT_VA(this, symbol_name_regex, symbol_language, module_list, + comp_unit_list); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -1003,8 +925,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByRegex( } SBBreakpoint SBTarget::BreakpointCreateByAddress(addr_t address) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByAddress, - (lldb::addr_t), address); + LLDB_INSTRUMENT_VA(this, address); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -1018,8 +939,7 @@ SBBreakpoint SBTarget::BreakpointCreateByAddress(addr_t address) { } SBBreakpoint SBTarget::BreakpointCreateBySBAddress(SBAddress &sb_address) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateBySBAddress, - (lldb::SBAddress &), sb_address); + LLDB_INSTRUMENT_VA(this, sb_address); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -1040,10 +960,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateBySourceRegex(const char *source_regex, const lldb::SBFileSpec &source_file, const char *module_name) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, - BreakpointCreateBySourceRegex, - (const char *, const lldb::SBFileSpec &, const char *), - source_regex, source_file, module_name); + LLDB_INSTRUMENT_VA(this, source_regex, source_file, module_name); SBFileSpecList module_spec_list; @@ -1063,11 +980,7 @@ SBTarget::BreakpointCreateBySourceRegex(const char *source_regex, lldb::SBBreakpoint SBTarget::BreakpointCreateBySourceRegex( const char *source_regex, const SBFileSpecList &module_list, const lldb::SBFileSpecList &source_file_list) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, - BreakpointCreateBySourceRegex, - (const char *, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &), - source_regex, module_list, source_file_list); + LLDB_INSTRUMENT_VA(this, source_regex, module_list, source_file_list); return BreakpointCreateBySourceRegex(source_regex, module_list, source_file_list, SBStringList()); @@ -1077,11 +990,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateBySourceRegex( const char *source_regex, const SBFileSpecList &module_list, const lldb::SBFileSpecList &source_file_list, const SBStringList &func_names) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, - BreakpointCreateBySourceRegex, - (const char *, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &, const lldb::SBStringList &), - source_regex, module_list, source_file_list, func_names); + LLDB_INSTRUMENT_VA(this, source_regex, module_list, source_file_list, + func_names); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -1106,9 +1016,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateBySourceRegex( lldb::SBBreakpoint SBTarget::BreakpointCreateForException(lldb::LanguageType language, bool catch_bp, bool throw_bp) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateForException, - (lldb::LanguageType, bool, bool), language, catch_bp, - throw_bp); + LLDB_INSTRUMENT_VA(this, language, catch_bp, throw_bp); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -1126,11 +1034,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateFromScript( const char *class_name, SBStructuredData &extra_args, const SBFileSpecList &module_list, const SBFileSpecList &file_list, bool request_hardware) { - LLDB_RECORD_METHOD( - lldb::SBBreakpoint, SBTarget, BreakpointCreateFromScript, - (const char *, lldb::SBStructuredData &, const lldb::SBFileSpecList &, - const lldb::SBFileSpecList &, bool), - class_name, extra_args, module_list, file_list, request_hardware); + LLDB_INSTRUMENT_VA(this, class_name, extra_args, module_list, file_list, + request_hardware); SBBreakpoint sb_bp; TargetSP target_sp(GetSP()); @@ -1153,7 +1058,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateFromScript( } uint32_t SBTarget::GetNumBreakpoints() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetNumBreakpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1164,8 +1069,7 @@ uint32_t SBTarget::GetNumBreakpoints() const { } SBBreakpoint SBTarget::GetBreakpointAtIndex(uint32_t idx) const { - LLDB_RECORD_METHOD_CONST(lldb::SBBreakpoint, SBTarget, GetBreakpointAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBBreakpoint sb_breakpoint; TargetSP target_sp(GetSP()); @@ -1177,8 +1081,7 @@ SBBreakpoint SBTarget::GetBreakpointAtIndex(uint32_t idx) const { } bool SBTarget::BreakpointDelete(break_id_t bp_id) { - LLDB_RECORD_METHOD(bool, SBTarget, BreakpointDelete, (lldb::break_id_t), - bp_id); + LLDB_INSTRUMENT_VA(this, bp_id); bool result = false; TargetSP target_sp(GetSP()); @@ -1191,8 +1094,7 @@ bool SBTarget::BreakpointDelete(break_id_t bp_id) { } SBBreakpoint SBTarget::FindBreakpointByID(break_id_t bp_id) { - LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, FindBreakpointByID, - (lldb::break_id_t), bp_id); + LLDB_INSTRUMENT_VA(this, bp_id); SBBreakpoint sb_breakpoint; TargetSP target_sp(GetSP()); @@ -1206,8 +1108,7 @@ SBBreakpoint SBTarget::FindBreakpointByID(break_id_t bp_id) { bool SBTarget::FindBreakpointsByName(const char *name, SBBreakpointList &bkpts) { - LLDB_RECORD_METHOD(bool, SBTarget, FindBreakpointsByName, - (const char *, lldb::SBBreakpointList &), name, bkpts); + LLDB_INSTRUMENT_VA(this, name, bkpts); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1228,8 +1129,7 @@ bool SBTarget::FindBreakpointsByName(const char *name, } void SBTarget::GetBreakpointNames(SBStringList &names) { - LLDB_RECORD_METHOD(void, SBTarget, GetBreakpointNames, (lldb::SBStringList &), - names); + LLDB_INSTRUMENT_VA(this, names); names.Clear(); @@ -1245,8 +1145,7 @@ void SBTarget::GetBreakpointNames(SBStringList &names) { } void SBTarget::DeleteBreakpointName(const char *name) { - LLDB_RECORD_METHOD(void, SBTarget, DeleteBreakpointName, (const char *), - name); + LLDB_INSTRUMENT_VA(this, name); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1256,7 +1155,7 @@ void SBTarget::DeleteBreakpointName(const char *name) { } bool SBTarget::EnableAllBreakpoints() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, EnableAllBreakpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1268,7 +1167,7 @@ bool SBTarget::EnableAllBreakpoints() { } bool SBTarget::DisableAllBreakpoints() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DisableAllBreakpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1280,7 +1179,7 @@ bool SBTarget::DisableAllBreakpoints() { } bool SBTarget::DeleteAllBreakpoints() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DeleteAllBreakpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1293,9 +1192,7 @@ bool SBTarget::DeleteAllBreakpoints() { lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file, SBBreakpointList &new_bps) { - LLDB_RECORD_METHOD(lldb::SBError, SBTarget, BreakpointsCreateFromFile, - (lldb::SBFileSpec &, lldb::SBBreakpointList &), - source_file, new_bps); + LLDB_INSTRUMENT_VA(this, source_file, new_bps); SBStringList empty_name_list; return BreakpointsCreateFromFile(source_file, empty_name_list, new_bps); @@ -1304,10 +1201,7 @@ lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file, lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file, SBStringList &matching_names, SBBreakpointList &new_bps) { - LLDB_RECORD_METHOD( - lldb::SBError, SBTarget, BreakpointsCreateFromFile, - (lldb::SBFileSpec &, lldb::SBStringList &, lldb::SBBreakpointList &), - source_file, matching_names, new_bps); + LLDB_INSTRUMENT_VA(this, source_file, matching_names, new_bps); SBError sberr; TargetSP target_sp(GetSP()); @@ -1339,8 +1233,7 @@ lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file, } lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file) { - LLDB_RECORD_METHOD(lldb::SBError, SBTarget, BreakpointsWriteToFile, - (lldb::SBFileSpec &), dest_file); + LLDB_INSTRUMENT_VA(this, dest_file); SBError sberr; TargetSP target_sp(GetSP()); @@ -1355,9 +1248,7 @@ lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file) { lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file, SBBreakpointList &bkpt_list, bool append) { - LLDB_RECORD_METHOD(lldb::SBError, SBTarget, BreakpointsWriteToFile, - (lldb::SBFileSpec &, lldb::SBBreakpointList &, bool), - dest_file, bkpt_list, append); + LLDB_INSTRUMENT_VA(this, dest_file, bkpt_list, append); SBError sberr; TargetSP target_sp(GetSP()); @@ -1375,7 +1266,7 @@ lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file, } uint32_t SBTarget::GetNumWatchpoints() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetNumWatchpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1386,8 +1277,7 @@ uint32_t SBTarget::GetNumWatchpoints() const { } SBWatchpoint SBTarget::GetWatchpointAtIndex(uint32_t idx) const { - LLDB_RECORD_METHOD_CONST(lldb::SBWatchpoint, SBTarget, GetWatchpointAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBWatchpoint sb_watchpoint; TargetSP target_sp(GetSP()); @@ -1399,9 +1289,7 @@ SBWatchpoint SBTarget::GetWatchpointAtIndex(uint32_t idx) const { } bool SBTarget::DeleteWatchpoint(watch_id_t wp_id) { - LLDB_RECORD_METHOD(bool, SBTarget, DeleteWatchpoint, (lldb::watch_id_t), - wp_id); - + LLDB_INSTRUMENT_VA(this, wp_id); bool result = false; TargetSP target_sp(GetSP()); @@ -1416,9 +1304,7 @@ bool SBTarget::DeleteWatchpoint(watch_id_t wp_id) { } SBWatchpoint SBTarget::FindWatchpointByID(lldb::watch_id_t wp_id) { - LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBTarget, FindWatchpointByID, - (lldb::watch_id_t), wp_id); - + LLDB_INSTRUMENT_VA(this, wp_id); SBWatchpoint sb_watchpoint; lldb::WatchpointSP watchpoint_sp; @@ -1437,9 +1323,7 @@ SBWatchpoint SBTarget::FindWatchpointByID(lldb::watch_id_t wp_id) { lldb::SBWatchpoint SBTarget::WatchAddress(lldb::addr_t addr, size_t size, bool read, bool write, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBTarget, WatchAddress, - (lldb::addr_t, size_t, bool, bool, lldb::SBError &), addr, - size, read, write, error); + LLDB_INSTRUMENT_VA(this, addr, size, read, write, error); SBWatchpoint sb_watchpoint; lldb::WatchpointSP watchpoint_sp; @@ -1472,7 +1356,7 @@ lldb::SBWatchpoint SBTarget::WatchAddress(lldb::addr_t addr, size_t size, } bool SBTarget::EnableAllWatchpoints() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, EnableAllWatchpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1486,7 +1370,7 @@ bool SBTarget::EnableAllWatchpoints() { } bool SBTarget::DisableAllWatchpoints() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DisableAllWatchpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1501,9 +1385,7 @@ bool SBTarget::DisableAllWatchpoints() { SBValue SBTarget::CreateValueFromAddress(const char *name, SBAddress addr, SBType type) { - LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, CreateValueFromAddress, - (const char *, lldb::SBAddress, lldb::SBType), name, addr, - type); + LLDB_INSTRUMENT_VA(this, name, addr, type); SBValue sb_value; lldb::ValueObjectSP new_value_sp; @@ -1521,9 +1403,7 @@ SBValue SBTarget::CreateValueFromAddress(const char *name, SBAddress addr, lldb::SBValue SBTarget::CreateValueFromData(const char *name, lldb::SBData data, lldb::SBType type) { - LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, CreateValueFromData, - (const char *, lldb::SBData, lldb::SBType), name, data, - type); + LLDB_INSTRUMENT_VA(this, name, data, type); SBValue sb_value; lldb::ValueObjectSP new_value_sp; @@ -1541,8 +1421,7 @@ lldb::SBValue SBTarget::CreateValueFromData(const char *name, lldb::SBData data, lldb::SBValue SBTarget::CreateValueFromExpression(const char *name, const char *expr) { - LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, CreateValueFromExpression, - (const char *, const char *), name, expr); + LLDB_INSTRUMENT_VA(this, name, expr); SBValue sb_value; lldb::ValueObjectSP new_value_sp; @@ -1557,7 +1436,7 @@ lldb::SBValue SBTarget::CreateValueFromExpression(const char *name, } bool SBTarget::DeleteAllWatchpoints() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DeleteAllWatchpoints); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1572,9 +1451,7 @@ bool SBTarget::DeleteAllWatchpoints() { void SBTarget::AppendImageSearchPath(const char *from, const char *to, lldb::SBError &error) { - LLDB_RECORD_METHOD(void, SBTarget, AppendImageSearchPath, - (const char *, const char *, lldb::SBError &), from, to, - error); + LLDB_INSTRUMENT_VA(this, from, to, error); TargetSP target_sp(GetSP()); if (!target_sp) @@ -1591,18 +1468,14 @@ void SBTarget::AppendImageSearchPath(const char *from, const char *to, lldb::SBModule SBTarget::AddModule(const char *path, const char *triple, const char *uuid_cstr) { - LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, AddModule, - (const char *, const char *, const char *), path, triple, - uuid_cstr); + LLDB_INSTRUMENT_VA(this, path, triple, uuid_cstr); return AddModule(path, triple, uuid_cstr, nullptr); } lldb::SBModule SBTarget::AddModule(const char *path, const char *triple, const char *uuid_cstr, const char *symfile) { - LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, AddModule, - (const char *, const char *, const char *, const char *), - path, triple, uuid_cstr, symfile); + LLDB_INSTRUMENT_VA(this, path, triple, uuid_cstr, symfile); lldb::SBModule sb_module; TargetSP target_sp(GetSP()); @@ -1629,8 +1502,7 @@ lldb::SBModule SBTarget::AddModule(const char *path, const char *triple, } lldb::SBModule SBTarget::AddModule(const SBModuleSpec &module_spec) { - LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, AddModule, - (const lldb::SBModuleSpec &), module_spec); + LLDB_INSTRUMENT_VA(this, module_spec); lldb::SBModule sb_module; TargetSP target_sp(GetSP()); @@ -1641,7 +1513,7 @@ lldb::SBModule SBTarget::AddModule(const SBModuleSpec &module_spec) { } bool SBTarget::AddModule(lldb::SBModule &module) { - LLDB_RECORD_METHOD(bool, SBTarget, AddModule, (lldb::SBModule &), module); + LLDB_INSTRUMENT_VA(this, module); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1652,7 +1524,7 @@ bool SBTarget::AddModule(lldb::SBModule &module) { } uint32_t SBTarget::GetNumModules() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetNumModules); + LLDB_INSTRUMENT_VA(this); uint32_t num = 0; TargetSP target_sp(GetSP()); @@ -1665,14 +1537,13 @@ uint32_t SBTarget::GetNumModules() const { } void SBTarget::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBTarget, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp.reset(); } SBModule SBTarget::FindModule(const SBFileSpec &sb_file_spec) { - LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, FindModule, - (const lldb::SBFileSpec &), sb_file_spec); + LLDB_INSTRUMENT_VA(this, sb_file_spec); SBModule sb_module; TargetSP target_sp(GetSP()); @@ -1685,8 +1556,7 @@ SBModule SBTarget::FindModule(const SBFileSpec &sb_file_spec) { } SBSymbolContextList SBTarget::FindCompileUnits(const SBFileSpec &sb_file_spec) { - LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindCompileUnits, - (const lldb::SBFileSpec &), sb_file_spec); + LLDB_INSTRUMENT_VA(this, sb_file_spec); SBSymbolContextList sb_sc_list; const TargetSP target_sp(GetSP()); @@ -1696,7 +1566,7 @@ SBSymbolContextList SBTarget::FindCompileUnits(const SBFileSpec &sb_file_spec) { } lldb::ByteOrder SBTarget::GetByteOrder() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::ByteOrder, SBTarget, GetByteOrder); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) @@ -1705,7 +1575,7 @@ lldb::ByteOrder SBTarget::GetByteOrder() { } const char *SBTarget::GetTriple() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTarget, GetTriple); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1720,7 +1590,7 @@ const char *SBTarget::GetTriple() { } uint32_t SBTarget::GetDataByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetDataByteSize); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1730,7 +1600,7 @@ uint32_t SBTarget::GetDataByteSize() { } uint32_t SBTarget::GetCodeByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetCodeByteSize); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1740,7 +1610,7 @@ uint32_t SBTarget::GetCodeByteSize() { } uint32_t SBTarget::GetMaximumNumberOfChildrenToDisplay() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetMaximumNumberOfChildrenToDisplay); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if(target_sp){ @@ -1750,7 +1620,7 @@ uint32_t SBTarget::GetMaximumNumberOfChildrenToDisplay() const { } uint32_t SBTarget::GetAddressByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetAddressByteSize); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) @@ -1759,8 +1629,7 @@ uint32_t SBTarget::GetAddressByteSize() { } SBModule SBTarget::GetModuleAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, GetModuleAtIndex, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); SBModule sb_module; ModuleSP module_sp; @@ -1775,7 +1644,7 @@ SBModule SBTarget::GetModuleAtIndex(uint32_t idx) { } bool SBTarget::RemoveModule(lldb::SBModule module) { - LLDB_RECORD_METHOD(bool, SBTarget, RemoveModule, (lldb::SBModule), module); + LLDB_INSTRUMENT_VA(this, module); TargetSP target_sp(GetSP()); if (target_sp) @@ -1784,9 +1653,7 @@ bool SBTarget::RemoveModule(lldb::SBModule module) { } SBBroadcaster SBTarget::GetBroadcaster() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBroadcaster, SBTarget, - GetBroadcaster); - + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); SBBroadcaster broadcaster(target_sp.get(), false); @@ -1796,9 +1663,7 @@ SBBroadcaster SBTarget::GetBroadcaster() const { bool SBTarget::GetDescription(SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTarget, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); Stream &strm = description.ref(); @@ -1813,8 +1678,7 @@ bool SBTarget::GetDescription(SBStream &description, lldb::SBSymbolContextList SBTarget::FindFunctions(const char *name, uint32_t name_type_mask) { - LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindFunctions, - (const char *, uint32_t), name, name_type_mask); + LLDB_INSTRUMENT_VA(this, name, name_type_mask); lldb::SBSymbolContextList sb_sc_list; if (!name || !name[0]) @@ -1837,9 +1701,7 @@ lldb::SBSymbolContextList SBTarget::FindFunctions(const char *name, lldb::SBSymbolContextList SBTarget::FindGlobalFunctions(const char *name, uint32_t max_matches, MatchType matchtype) { - LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindGlobalFunctions, - (const char *, uint32_t, lldb::MatchType), name, - max_matches, matchtype); + LLDB_INSTRUMENT_VA(this, name, max_matches, matchtype); lldb::SBSymbolContextList sb_sc_list; if (name && name[0]) { @@ -1873,8 +1735,7 @@ lldb::SBSymbolContextList SBTarget::FindGlobalFunctions(const char *name, } lldb::SBType SBTarget::FindFirstType(const char *typename_cstr) { - LLDB_RECORD_METHOD(lldb::SBType, SBTarget, FindFirstType, (const char *), - typename_cstr); + LLDB_INSTRUMENT_VA(this, typename_cstr); TargetSP target_sp(GetSP()); if (typename_cstr && typename_cstr[0] && target_sp) { @@ -1915,8 +1776,7 @@ lldb::SBType SBTarget::FindFirstType(const char *typename_cstr) { } SBType SBTarget::GetBasicType(lldb::BasicType type) { - LLDB_RECORD_METHOD(lldb::SBType, SBTarget, GetBasicType, (lldb::BasicType), - type); + LLDB_INSTRUMENT_VA(this, type); TargetSP target_sp(GetSP()); if (target_sp) { @@ -1928,8 +1788,7 @@ SBType SBTarget::GetBasicType(lldb::BasicType type) { } lldb::SBTypeList SBTarget::FindTypes(const char *typename_cstr) { - LLDB_RECORD_METHOD(lldb::SBTypeList, SBTarget, FindTypes, (const char *), - typename_cstr); + LLDB_INSTRUMENT_VA(this, typename_cstr); SBTypeList sb_type_list; TargetSP target_sp(GetSP()); @@ -1973,8 +1832,7 @@ lldb::SBTypeList SBTarget::FindTypes(const char *typename_cstr) { SBValueList SBTarget::FindGlobalVariables(const char *name, uint32_t max_matches) { - LLDB_RECORD_METHOD(lldb::SBValueList, SBTarget, FindGlobalVariables, - (const char *, uint32_t), name, max_matches); + LLDB_INSTRUMENT_VA(this, name, max_matches); SBValueList sb_value_list; @@ -2002,9 +1860,7 @@ SBValueList SBTarget::FindGlobalVariables(const char *name, SBValueList SBTarget::FindGlobalVariables(const char *name, uint32_t max_matches, MatchType matchtype) { - LLDB_RECORD_METHOD(lldb::SBValueList, SBTarget, FindGlobalVariables, - (const char *, uint32_t, lldb::MatchType), name, - max_matches, matchtype); + LLDB_INSTRUMENT_VA(this, name, max_matches, matchtype); SBValueList sb_value_list; @@ -2046,8 +1902,7 @@ SBValueList SBTarget::FindGlobalVariables(const char *name, } lldb::SBValue SBTarget::FindFirstGlobalVariable(const char *name) { - LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, FindFirstGlobalVariable, - (const char *), name); + LLDB_INSTRUMENT_VA(this, name); SBValueList sb_value_list(FindGlobalVariables(name, 1)); if (sb_value_list.IsValid() && sb_value_list.GetSize() > 0) @@ -2056,7 +1911,7 @@ lldb::SBValue SBTarget::FindFirstGlobalVariable(const char *name) { } SBSourceManager SBTarget::GetSourceManager() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSourceManager, SBTarget, GetSourceManager); + LLDB_INSTRUMENT_VA(this); SBSourceManager source_manager(*this); return source_manager; @@ -2064,8 +1919,7 @@ SBSourceManager SBTarget::GetSourceManager() { lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr, uint32_t count) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, ReadInstructions, - (lldb::SBAddress, uint32_t), base_addr, count); + LLDB_INSTRUMENT_VA(this, base_addr, count); return ReadInstructions(base_addr, count, nullptr); } @@ -2073,9 +1927,7 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr, lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr, uint32_t count, const char *flavor_string) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, ReadInstructions, - (lldb::SBAddress, uint32_t, const char *), base_addr, - count, flavor_string); + LLDB_INSTRUMENT_VA(this, base_addr, count, flavor_string); SBInstructionList sb_instructions; @@ -2105,9 +1957,7 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr, lldb::SBInstructionList SBTarget::GetInstructions(lldb::SBAddress base_addr, const void *buf, size_t size) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, GetInstructions, - (lldb::SBAddress, const void *, size_t), base_addr, buf, - size); + LLDB_INSTRUMENT_VA(this, base_addr, buf, size); return GetInstructionsWithFlavor(base_addr, nullptr, buf, size); } @@ -2116,10 +1966,7 @@ lldb::SBInstructionList SBTarget::GetInstructionsWithFlavor(lldb::SBAddress base_addr, const char *flavor_string, const void *buf, size_t size) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, - GetInstructionsWithFlavor, - (lldb::SBAddress, const char *, const void *, size_t), - base_addr, flavor_string, buf, size); + LLDB_INSTRUMENT_VA(this, base_addr, flavor_string, buf, size); SBInstructionList sb_instructions; @@ -2143,9 +1990,7 @@ SBTarget::GetInstructionsWithFlavor(lldb::SBAddress base_addr, lldb::SBInstructionList SBTarget::GetInstructions(lldb::addr_t base_addr, const void *buf, size_t size) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, GetInstructions, - (lldb::addr_t, const void *, size_t), base_addr, buf, - size); + LLDB_INSTRUMENT_VA(this, base_addr, buf, size); return GetInstructionsWithFlavor(ResolveLoadAddress(base_addr), nullptr, buf, size); @@ -2155,10 +2000,7 @@ lldb::SBInstructionList SBTarget::GetInstructionsWithFlavor(lldb::addr_t base_addr, const char *flavor_string, const void *buf, size_t size) { - LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, - GetInstructionsWithFlavor, - (lldb::addr_t, const char *, const void *, size_t), - base_addr, flavor_string, buf, size); + LLDB_INSTRUMENT_VA(this, base_addr, flavor_string, buf, size); return GetInstructionsWithFlavor(ResolveLoadAddress(base_addr), flavor_string, buf, size); @@ -2166,9 +2008,7 @@ SBTarget::GetInstructionsWithFlavor(lldb::addr_t base_addr, SBError SBTarget::SetSectionLoadAddress(lldb::SBSection section, lldb::addr_t section_base_addr) { - LLDB_RECORD_METHOD(lldb::SBError, SBTarget, SetSectionLoadAddress, - (lldb::SBSection, lldb::addr_t), section, - section_base_addr); + LLDB_INSTRUMENT_VA(this, section, section_base_addr); SBError sb_error; TargetSP target_sp(GetSP()); @@ -2204,8 +2044,7 @@ SBError SBTarget::SetSectionLoadAddress(lldb::SBSection section, } SBError SBTarget::ClearSectionLoadAddress(lldb::SBSection section) { - LLDB_RECORD_METHOD(lldb::SBError, SBTarget, ClearSectionLoadAddress, - (lldb::SBSection), section); + LLDB_INSTRUMENT_VA(this, section); SBError sb_error; @@ -2240,8 +2079,7 @@ SBError SBTarget::ClearSectionLoadAddress(lldb::SBSection section) { SBError SBTarget::SetModuleLoadAddress(lldb::SBModule module, int64_t slide_offset) { - LLDB_RECORD_METHOD(lldb::SBError, SBTarget, SetModuleLoadAddress, - (lldb::SBModule, int64_t), module, slide_offset); + LLDB_INSTRUMENT_VA(this, module, slide_offset); SBError sb_error; @@ -2274,8 +2112,7 @@ SBError SBTarget::SetModuleLoadAddress(lldb::SBModule module, } SBError SBTarget::ClearModuleLoadAddress(lldb::SBModule module) { - LLDB_RECORD_METHOD(lldb::SBError, SBTarget, ClearModuleLoadAddress, - (lldb::SBModule), module); + LLDB_INSTRUMENT_VA(this, module); SBError sb_error; @@ -2327,8 +2164,7 @@ SBError SBTarget::ClearModuleLoadAddress(lldb::SBModule module) { lldb::SBSymbolContextList SBTarget::FindSymbols(const char *name, lldb::SymbolType symbol_type) { - LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindSymbols, - (const char *, lldb::SymbolType), name, symbol_type); + LLDB_INSTRUMENT_VA(this, name, symbol_type); SBSymbolContextList sb_sc_list; if (name && name[0]) { @@ -2341,8 +2177,7 @@ lldb::SBSymbolContextList SBTarget::FindSymbols(const char *name, } lldb::SBValue SBTarget::EvaluateExpression(const char *expr) { - LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, EvaluateExpression, - (const char *), expr); + LLDB_INSTRUMENT_VA(this, expr); TargetSP target_sp(GetSP()); if (!target_sp) @@ -2358,9 +2193,7 @@ lldb::SBValue SBTarget::EvaluateExpression(const char *expr) { lldb::SBValue SBTarget::EvaluateExpression(const char *expr, const SBExpressionOptions &options) { - LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, EvaluateExpression, - (const char *, const lldb::SBExpressionOptions &), expr, - options); + LLDB_INSTRUMENT_VA(this, expr, options); Log *expr_log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); SBValue expr_result; @@ -2393,7 +2226,7 @@ lldb::SBValue SBTarget::EvaluateExpression(const char *expr, } lldb::addr_t SBTarget::GetStackRedZoneSize() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBTarget, GetStackRedZoneSize); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -2410,8 +2243,7 @@ lldb::addr_t SBTarget::GetStackRedZoneSize() { } bool SBTarget::IsLoaded(const SBModule &module) const { - LLDB_RECORD_METHOD_CONST(bool, SBTarget, IsLoaded, (const lldb::SBModule &), - module); + LLDB_INSTRUMENT_VA(this, module); TargetSP target_sp(GetSP()); if (!target_sp) @@ -2425,7 +2257,7 @@ bool SBTarget::IsLoaded(const SBModule &module) const { } lldb::SBLaunchInfo SBTarget::GetLaunchInfo() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBLaunchInfo, SBTarget, GetLaunchInfo); + LLDB_INSTRUMENT_VA(this); lldb::SBLaunchInfo launch_info(nullptr); TargetSP target_sp(GetSP()); @@ -2435,8 +2267,7 @@ lldb::SBLaunchInfo SBTarget::GetLaunchInfo() const { } void SBTarget::SetLaunchInfo(const lldb::SBLaunchInfo &launch_info) { - LLDB_RECORD_METHOD(void, SBTarget, SetLaunchInfo, - (const lldb::SBLaunchInfo &), launch_info); + LLDB_INSTRUMENT_VA(this, launch_info); TargetSP target_sp(GetSP()); if (target_sp) @@ -2444,7 +2275,7 @@ void SBTarget::SetLaunchInfo(const lldb::SBLaunchInfo &launch_info) { } SBEnvironment SBTarget::GetEnvironment() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBEnvironment, SBTarget, GetEnvironment); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) { @@ -2455,7 +2286,7 @@ SBEnvironment SBTarget::GetEnvironment() { } lldb::SBTrace SBTarget::GetTrace() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTrace, SBTarget, GetTrace); + LLDB_INSTRUMENT_VA(this); TargetSP target_sp(GetSP()); if (target_sp) @@ -2465,8 +2296,7 @@ lldb::SBTrace SBTarget::GetTrace() { } lldb::SBTrace SBTarget::CreateTrace(lldb::SBError &error) { - LLDB_RECORD_METHOD(lldb::SBTrace, SBTarget, CreateTrace, (lldb::SBError &), - error); + LLDB_INSTRUMENT_VA(this, error); TargetSP target_sp(GetSP()); error.Clear(); diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index f10477c553705..46a6c2759140b 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBThread.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBDebugger.h" @@ -40,6 +39,7 @@ #include "lldb/Target/ThreadPlanStepInstruction.h" #include "lldb/Target/ThreadPlanStepOut.h" #include "lldb/Target/ThreadPlanStepRange.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/State.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StructuredData.h" @@ -51,24 +51,23 @@ using namespace lldb; using namespace lldb_private; const char *SBThread::GetBroadcasterClassName() { - LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBThread, - GetBroadcasterClassName); + LLDB_INSTRUMENT(); return Thread::GetStaticBroadcasterClass().AsCString(); } // Constructors SBThread::SBThread() : m_opaque_sp(new ExecutionContextRef()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBThread); + LLDB_INSTRUMENT_VA(this); } SBThread::SBThread(const ThreadSP &lldb_object_sp) : m_opaque_sp(new ExecutionContextRef(lldb_object_sp)) { - LLDB_RECORD_CONSTRUCTOR(SBThread, (const lldb::ThreadSP &), lldb_object_sp); + LLDB_INSTRUMENT_VA(this, lldb_object_sp); } SBThread::SBThread(const SBThread &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBThread, (const lldb::SBThread &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = clone(rhs.m_opaque_sp); } @@ -76,8 +75,7 @@ SBThread::SBThread(const SBThread &rhs) { // Assignment operator const lldb::SBThread &SBThread::operator=(const SBThread &rhs) { - LLDB_RECORD_METHOD(const lldb::SBThread &, - SBThread, operator=,(const lldb::SBThread &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = clone(rhs.m_opaque_sp); @@ -88,7 +86,7 @@ const lldb::SBThread &SBThread::operator=(const SBThread &rhs) { SBThread::~SBThread() = default; lldb::SBQueue SBThread::GetQueue() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBQueue, SBThread, GetQueue); + LLDB_INSTRUMENT_VA(this); SBQueue sb_queue; QueueSP queue_sp; @@ -109,11 +107,11 @@ lldb::SBQueue SBThread::GetQueue() const { } bool SBThread::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThread, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBThread::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThread, operator bool); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -130,13 +128,13 @@ SBThread::operator bool() const { } void SBThread::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBThread, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp->Clear(); } StopReason SBThread::GetStopReason() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::StopReason, SBThread, GetStopReason); + LLDB_INSTRUMENT_VA(this); StopReason reason = eStopReasonInvalid; std::unique_lock lock; @@ -153,7 +151,7 @@ StopReason SBThread::GetStopReason() { } size_t SBThread::GetStopReasonDataCount() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBThread, GetStopReasonDataCount); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -210,8 +208,7 @@ size_t SBThread::GetStopReasonDataCount() { } uint64_t SBThread::GetStopReasonDataAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(uint64_t, SBThread, GetStopReasonDataAtIndex, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -280,8 +277,7 @@ uint64_t SBThread::GetStopReasonDataAtIndex(uint32_t idx) { } bool SBThread::GetStopReasonExtendedInfoAsJSON(lldb::SBStream &stream) { - LLDB_RECORD_METHOD(bool, SBThread, GetStopReasonExtendedInfoAsJSON, - (lldb::SBStream &), stream); + LLDB_INSTRUMENT_VA(this, stream); Stream &strm = stream.ref(); @@ -303,9 +299,7 @@ bool SBThread::GetStopReasonExtendedInfoAsJSON(lldb::SBStream &stream) { SBThreadCollection SBThread::GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type) { - LLDB_RECORD_METHOD(lldb::SBThreadCollection, SBThread, - GetStopReasonExtendedBacktraces, - (lldb::InstrumentationRuntimeType), type); + LLDB_INSTRUMENT_VA(this, type); SBThreadCollection threads; @@ -328,8 +322,7 @@ SBThread::GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type) { } size_t SBThread::GetStopDescription(char *dst, size_t dst_len) { - LLDB_RECORD_METHOD(size_t, SBThread, GetStopDescription, (char *, size_t), - dst, "", dst_len); + LLDB_INSTRUMENT_VA(this, dst, dst_len); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -357,7 +350,7 @@ size_t SBThread::GetStopDescription(char *dst, size_t dst_len) { } SBValue SBThread::GetStopReturnValue() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBThread, GetStopReturnValue); + LLDB_INSTRUMENT_VA(this); ValueObjectSP return_valobj_sp; std::unique_lock lock; @@ -381,7 +374,7 @@ void SBThread::SetThread(const ThreadSP &lldb_object_sp) { } lldb::tid_t SBThread::GetThreadID() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::tid_t, SBThread, GetThreadID); + LLDB_INSTRUMENT_VA(this); ThreadSP thread_sp(m_opaque_sp->GetThreadSP()); if (thread_sp) @@ -390,7 +383,7 @@ lldb::tid_t SBThread::GetThreadID() const { } uint32_t SBThread::GetIndexID() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBThread, GetIndexID); + LLDB_INSTRUMENT_VA(this); ThreadSP thread_sp(m_opaque_sp->GetThreadSP()); if (thread_sp) @@ -399,7 +392,7 @@ uint32_t SBThread::GetIndexID() const { } const char *SBThread::GetName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBThread, GetName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; std::unique_lock lock; @@ -416,7 +409,7 @@ const char *SBThread::GetName() const { } const char *SBThread::GetQueueName() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBThread, GetQueueName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; std::unique_lock lock; @@ -433,7 +426,7 @@ const char *SBThread::GetQueueName() const { } lldb::queue_id_t SBThread::GetQueueID() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::queue_id_t, SBThread, GetQueueID); + LLDB_INSTRUMENT_VA(this); queue_id_t id = LLDB_INVALID_QUEUE_ID; std::unique_lock lock; @@ -450,8 +443,7 @@ lldb::queue_id_t SBThread::GetQueueID() const { } bool SBThread::GetInfoItemByPathAsString(const char *path, SBStream &strm) { - LLDB_RECORD_METHOD(bool, SBThread, GetInfoItemByPathAsString, - (const char *, lldb::SBStream &), path, strm); + LLDB_INSTRUMENT_VA(this, path, strm); bool success = false; std::unique_lock lock; @@ -532,16 +524,14 @@ SBError SBThread::ResumeNewPlan(ExecutionContext &exe_ctx, } void SBThread::StepOver(lldb::RunMode stop_other_threads) { - LLDB_RECORD_METHOD(void, SBThread, StepOver, (lldb::RunMode), - stop_other_threads); + LLDB_INSTRUMENT_VA(this, stop_other_threads); SBError error; // Ignored StepOver(stop_other_threads, error); } void SBThread::StepOver(lldb::RunMode stop_other_threads, SBError &error) { - LLDB_RECORD_METHOD(void, SBThread, StepOver, (lldb::RunMode, lldb::SBError &), - stop_other_threads, error); + LLDB_INSTRUMENT_VA(this, stop_other_threads, error); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -573,16 +563,14 @@ void SBThread::StepOver(lldb::RunMode stop_other_threads, SBError &error) { } void SBThread::StepInto(lldb::RunMode stop_other_threads) { - LLDB_RECORD_METHOD(void, SBThread, StepInto, (lldb::RunMode), - stop_other_threads); + LLDB_INSTRUMENT_VA(this, stop_other_threads); StepInto(nullptr, stop_other_threads); } void SBThread::StepInto(const char *target_name, lldb::RunMode stop_other_threads) { - LLDB_RECORD_METHOD(void, SBThread, StepInto, (const char *, lldb::RunMode), - target_name, stop_other_threads); + LLDB_INSTRUMENT_VA(this, target_name, stop_other_threads); SBError error; // Ignored StepInto(target_name, LLDB_INVALID_LINE_NUMBER, error, stop_other_threads); @@ -590,10 +578,7 @@ void SBThread::StepInto(const char *target_name, void SBThread::StepInto(const char *target_name, uint32_t end_line, SBError &error, lldb::RunMode stop_other_threads) { - LLDB_RECORD_METHOD(void, SBThread, StepInto, - (const char *, uint32_t, lldb::SBError &, lldb::RunMode), - target_name, end_line, error, stop_other_threads); - + LLDB_INSTRUMENT_VA(this, target_name, end_line, error, stop_other_threads); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -640,14 +625,14 @@ void SBThread::StepInto(const char *target_name, uint32_t end_line, } void SBThread::StepOut() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBThread, StepOut); + LLDB_INSTRUMENT_VA(this); SBError error; // Ignored StepOut(error); } void SBThread::StepOut(SBError &error) { - LLDB_RECORD_METHOD(void, SBThread, StepOut, (lldb::SBError &), error); + LLDB_INSTRUMENT_VA(this, error); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -675,17 +660,14 @@ void SBThread::StepOut(SBError &error) { } void SBThread::StepOutOfFrame(SBFrame &sb_frame) { - LLDB_RECORD_METHOD(void, SBThread, StepOutOfFrame, (lldb::SBFrame &), - sb_frame); + LLDB_INSTRUMENT_VA(this, sb_frame); SBError error; // Ignored StepOutOfFrame(sb_frame, error); } void SBThread::StepOutOfFrame(SBFrame &sb_frame, SBError &error) { - LLDB_RECORD_METHOD(void, SBThread, StepOutOfFrame, - (lldb::SBFrame &, lldb::SBError &), sb_frame, error); - + LLDB_INSTRUMENT_VA(this, sb_frame, error); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -722,15 +704,14 @@ void SBThread::StepOutOfFrame(SBFrame &sb_frame, SBError &error) { } void SBThread::StepInstruction(bool step_over) { - LLDB_RECORD_METHOD(void, SBThread, StepInstruction, (bool), step_over); + LLDB_INSTRUMENT_VA(this, step_over); SBError error; // Ignored StepInstruction(step_over, error); } void SBThread::StepInstruction(bool step_over, SBError &error) { - LLDB_RECORD_METHOD(void, SBThread, StepInstruction, (bool, lldb::SBError &), - step_over, error); + LLDB_INSTRUMENT_VA(this, step_over, error); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -752,15 +733,14 @@ void SBThread::StepInstruction(bool step_over, SBError &error) { } void SBThread::RunToAddress(lldb::addr_t addr) { - LLDB_RECORD_METHOD(void, SBThread, RunToAddress, (lldb::addr_t), addr); + LLDB_INSTRUMENT_VA(this, addr); SBError error; // Ignored RunToAddress(addr, error); } void SBThread::RunToAddress(lldb::addr_t addr, SBError &error) { - LLDB_RECORD_METHOD(void, SBThread, RunToAddress, - (lldb::addr_t, lldb::SBError &), addr, error); + LLDB_INSTRUMENT_VA(this, addr, error); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -789,9 +769,7 @@ void SBThread::RunToAddress(lldb::addr_t addr, SBError &error) { SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame, lldb::SBFileSpec &sb_file_spec, uint32_t line) { - LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepOverUntil, - (lldb::SBFrame &, lldb::SBFileSpec &, uint32_t), sb_frame, - sb_file_spec, line); + LLDB_INSTRUMENT_VA(this, sb_frame, sb_file_spec, line); SBError sb_error; char path[PATH_MAX]; @@ -908,17 +886,14 @@ SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame, } SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name) { - LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepUsingScriptedThreadPlan, - (const char *), script_class_name); + LLDB_INSTRUMENT_VA(this, script_class_name); return StepUsingScriptedThreadPlan(script_class_name, true); } SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name, bool resume_immediately) { - LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepUsingScriptedThreadPlan, - (const char *, bool), script_class_name, - resume_immediately); + LLDB_INSTRUMENT_VA(this, script_class_name, resume_immediately); lldb::SBStructuredData no_data; return StepUsingScriptedThreadPlan(script_class_name, no_data, @@ -928,9 +903,7 @@ SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name, SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name, SBStructuredData &args_data, bool resume_immediately) { - LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepUsingScriptedThreadPlan, - (const char *, lldb::SBStructuredData &, bool), - script_class_name, args_data, resume_immediately); + LLDB_INSTRUMENT_VA(this, script_class_name, args_data, resume_immediately); SBError error; @@ -966,8 +939,7 @@ SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name, } SBError SBThread::JumpToLine(lldb::SBFileSpec &file_spec, uint32_t line) { - LLDB_RECORD_METHOD(lldb::SBError, SBThread, JumpToLine, - (lldb::SBFileSpec &, uint32_t), file_spec, line); + LLDB_INSTRUMENT_VA(this, file_spec, line); SBError sb_error; @@ -987,8 +959,7 @@ SBError SBThread::JumpToLine(lldb::SBFileSpec &file_spec, uint32_t line) { } SBError SBThread::ReturnFromFrame(SBFrame &frame, SBValue &return_value) { - LLDB_RECORD_METHOD(lldb::SBError, SBThread, ReturnFromFrame, - (lldb::SBFrame &, lldb::SBValue &), frame, return_value); + LLDB_INSTRUMENT_VA(this, frame, return_value); SBError sb_error; @@ -1005,8 +976,7 @@ SBError SBThread::ReturnFromFrame(SBFrame &frame, SBValue &return_value) { } SBError SBThread::UnwindInnermostExpression() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBThread, - UnwindInnermostExpression); + LLDB_INSTRUMENT_VA(this); SBError sb_error; @@ -1024,14 +994,14 @@ SBError SBThread::UnwindInnermostExpression() { } bool SBThread::Suspend() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, Suspend); + LLDB_INSTRUMENT_VA(this); SBError error; // Ignored return Suspend(error); } bool SBThread::Suspend(SBError &error) { - LLDB_RECORD_METHOD(bool, SBThread, Suspend, (lldb::SBError &), error); + LLDB_INSTRUMENT_VA(this, error); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1051,14 +1021,14 @@ bool SBThread::Suspend(SBError &error) { } bool SBThread::Resume() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, Resume); + LLDB_INSTRUMENT_VA(this); SBError error; // Ignored return Resume(error); } bool SBThread::Resume(SBError &error) { - LLDB_RECORD_METHOD(bool, SBThread, Resume, (lldb::SBError &), error); + LLDB_INSTRUMENT_VA(this, error); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1079,7 +1049,7 @@ bool SBThread::Resume(SBError &error) { } bool SBThread::IsSuspended() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, IsSuspended); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1090,7 +1060,7 @@ bool SBThread::IsSuspended() { } bool SBThread::IsStopped() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, IsStopped); + LLDB_INSTRUMENT_VA(this); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1101,7 +1071,7 @@ bool SBThread::IsStopped() { } SBProcess SBThread::GetProcess() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBThread, GetProcess); + LLDB_INSTRUMENT_VA(this); SBProcess sb_process; std::unique_lock lock; @@ -1117,7 +1087,7 @@ SBProcess SBThread::GetProcess() { } uint32_t SBThread::GetNumFrames() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBThread, GetNumFrames); + LLDB_INSTRUMENT_VA(this); uint32_t num_frames = 0; std::unique_lock lock; @@ -1134,7 +1104,7 @@ uint32_t SBThread::GetNumFrames() { } SBFrame SBThread::GetFrameAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBFrame, SBThread, GetFrameAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBFrame sb_frame; StackFrameSP frame_sp; @@ -1153,7 +1123,7 @@ SBFrame SBThread::GetFrameAtIndex(uint32_t idx) { } lldb::SBFrame SBThread::GetSelectedFrame() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFrame, SBThread, GetSelectedFrame); + LLDB_INSTRUMENT_VA(this); SBFrame sb_frame; StackFrameSP frame_sp; @@ -1172,8 +1142,7 @@ lldb::SBFrame SBThread::GetSelectedFrame() { } lldb::SBFrame SBThread::SetSelectedFrame(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBFrame, SBThread, SetSelectedFrame, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); SBFrame sb_frame; StackFrameSP frame_sp; @@ -1196,45 +1165,39 @@ lldb::SBFrame SBThread::SetSelectedFrame(uint32_t idx) { } bool SBThread::EventIsThreadEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBThread, EventIsThreadEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Thread::ThreadEventData::GetEventDataFromEvent(event.get()) != nullptr; } SBFrame SBThread::GetStackFrameFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBFrame, SBThread, GetStackFrameFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Thread::ThreadEventData::GetStackFrameFromEvent(event.get()); } SBThread SBThread::GetThreadFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBThread, SBThread, GetThreadFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Thread::ThreadEventData::GetThreadFromEvent(event.get()); } bool SBThread::operator==(const SBThread &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBThread, operator==,(const lldb::SBThread &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_sp->GetThreadSP().get() == rhs.m_opaque_sp->GetThreadSP().get(); } bool SBThread::operator!=(const SBThread &rhs) const { - LLDB_RECORD_METHOD_CONST(bool, SBThread, operator!=,(const lldb::SBThread &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); return m_opaque_sp->GetThreadSP().get() != rhs.m_opaque_sp->GetThreadSP().get(); } bool SBThread::GetStatus(SBStream &status) const { - LLDB_RECORD_METHOD_CONST(bool, SBThread, GetStatus, (lldb::SBStream &), - status); + LLDB_INSTRUMENT_VA(this, status); Stream &strm = status.ref(); @@ -1250,15 +1213,13 @@ bool SBThread::GetStatus(SBStream &status) const { } bool SBThread::GetDescription(SBStream &description) const { - LLDB_RECORD_METHOD_CONST(bool, SBThread, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); return GetDescription(description, false); } bool SBThread::GetDescription(SBStream &description, bool stop_format) const { - LLDB_RECORD_METHOD_CONST(bool, SBThread, GetDescription, - (lldb::SBStream &, bool), description, stop_format); + LLDB_INSTRUMENT_VA(this, description, stop_format); Stream &strm = description.ref(); @@ -1278,8 +1239,7 @@ bool SBThread::GetDescription(SBStream &description, bool stop_format) const { } SBThread SBThread::GetExtendedBacktraceThread(const char *type) { - LLDB_RECORD_METHOD(lldb::SBThread, SBThread, GetExtendedBacktraceThread, - (const char *), type); + LLDB_INSTRUMENT_VA(this, type); std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); @@ -1313,8 +1273,7 @@ SBThread SBThread::GetExtendedBacktraceThread(const char *type) { } uint32_t SBThread::GetExtendedBacktraceOriginatingIndexID() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBThread, - GetExtendedBacktraceOriginatingIndexID); + LLDB_INSTRUMENT_VA(this); ThreadSP thread_sp(m_opaque_sp->GetThreadSP()); if (thread_sp) @@ -1323,7 +1282,7 @@ uint32_t SBThread::GetExtendedBacktraceOriginatingIndexID() { } SBValue SBThread::GetCurrentException() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBThread, GetCurrentException); + LLDB_INSTRUMENT_VA(this); ThreadSP thread_sp(m_opaque_sp->GetThreadSP()); if (!thread_sp) @@ -1333,8 +1292,7 @@ SBValue SBThread::GetCurrentException() { } SBThread SBThread::GetCurrentExceptionBacktrace() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBThread, SBThread, - GetCurrentExceptionBacktrace); + LLDB_INSTRUMENT_VA(this); ThreadSP thread_sp(m_opaque_sp->GetThreadSP()); if (!thread_sp) @@ -1344,7 +1302,7 @@ SBThread SBThread::GetCurrentExceptionBacktrace() { } bool SBThread::SafeToCallFunctions() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, SafeToCallFunctions); + LLDB_INSTRUMENT_VA(this); ThreadSP thread_sp(m_opaque_sp->GetThreadSP()); if (thread_sp) diff --git a/lldb/source/API/SBThreadCollection.cpp b/lldb/source/API/SBThreadCollection.cpp index fcf066381c98e..9d688e0122397 100644 --- a/lldb/source/API/SBThreadCollection.cpp +++ b/lldb/source/API/SBThreadCollection.cpp @@ -7,28 +7,23 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBThreadCollection.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBThread.h" #include "lldb/Target/ThreadList.h" +#include "lldb/Utility/Instrumentation.h" using namespace lldb; using namespace lldb_private; -SBThreadCollection::SBThreadCollection() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBThreadCollection); -} +SBThreadCollection::SBThreadCollection() { LLDB_INSTRUMENT_VA(this); } SBThreadCollection::SBThreadCollection(const SBThreadCollection &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBThreadCollection, - (const lldb::SBThreadCollection &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBThreadCollection &SBThreadCollection:: operator=(const SBThreadCollection &rhs) { - LLDB_RECORD_METHOD( - const lldb::SBThreadCollection &, - SBThreadCollection, operator=,(const lldb::SBThreadCollection &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; @@ -61,17 +56,17 @@ const lldb::ThreadCollectionSP &SBThreadCollection::operator*() const { } bool SBThreadCollection::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadCollection, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBThreadCollection::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadCollection, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } size_t SBThreadCollection::GetSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBThreadCollection, GetSize); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) return m_opaque_sp->GetSize(); @@ -79,8 +74,7 @@ size_t SBThreadCollection::GetSize() { } SBThread SBThreadCollection::GetThreadAtIndex(size_t idx) { - LLDB_RECORD_METHOD(lldb::SBThread, SBThreadCollection, GetThreadAtIndex, - (size_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBThread thread; if (m_opaque_sp && idx < m_opaque_sp->GetSize()) diff --git a/lldb/source/API/SBThreadPlan.cpp b/lldb/source/API/SBThreadPlan.cpp index 26fcca5c2e23f..2e66ac1208392 100644 --- a/lldb/source/API/SBThreadPlan.cpp +++ b/lldb/source/API/SBThreadPlan.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBThread.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBFileSpec.h" #include "lldb/API/SBStream.h" @@ -50,22 +50,20 @@ using namespace lldb; using namespace lldb_private; // Constructors -SBThreadPlan::SBThreadPlan() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBThreadPlan); } +SBThreadPlan::SBThreadPlan() { LLDB_INSTRUMENT_VA(this); } SBThreadPlan::SBThreadPlan(const ThreadPlanSP &lldb_object_sp) : m_opaque_wp(lldb_object_sp) { - LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (const lldb::ThreadPlanSP &), - lldb_object_sp); + LLDB_INSTRUMENT_VA(this, lldb_object_sp); } SBThreadPlan::SBThreadPlan(const SBThreadPlan &rhs) : m_opaque_wp(rhs.m_opaque_wp) { - LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (const lldb::SBThreadPlan &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name) { - LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (lldb::SBThread &, const char *), - sb_thread, class_name); + LLDB_INSTRUMENT_VA(this, sb_thread, class_name); Thread *thread = sb_thread.get(); if (thread) @@ -75,9 +73,7 @@ SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name) { SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name, lldb::SBStructuredData &args_data) { - LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (lldb::SBThread &, const char *, - SBStructuredData &), - sb_thread, class_name, args_data); + LLDB_INSTRUMENT_VA(this, sb_thread, class_name, args_data); Thread *thread = sb_thread.get(); if (thread) @@ -88,8 +84,7 @@ SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name, // Assignment operator const lldb::SBThreadPlan &SBThreadPlan::operator=(const SBThreadPlan &rhs) { - LLDB_RECORD_METHOD(const lldb::SBThreadPlan &, - SBThreadPlan, operator=,(const lldb::SBThreadPlan &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_wp = rhs.m_opaque_wp; @@ -99,42 +94,41 @@ const lldb::SBThreadPlan &SBThreadPlan::operator=(const SBThreadPlan &rhs) { SBThreadPlan::~SBThreadPlan() = default; bool SBThreadPlan::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadPlan, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBThreadPlan::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadPlan, operator bool); + LLDB_INSTRUMENT_VA(this); return static_cast(GetSP()); } void SBThreadPlan::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBThreadPlan, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_wp.reset(); } lldb::StopReason SBThreadPlan::GetStopReason() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::StopReason, SBThreadPlan, GetStopReason); + LLDB_INSTRUMENT_VA(this); return eStopReasonNone; } size_t SBThreadPlan::GetStopReasonDataCount() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBThreadPlan, GetStopReasonDataCount); + LLDB_INSTRUMENT_VA(this); return 0; } uint64_t SBThreadPlan::GetStopReasonDataAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(uint64_t, SBThreadPlan, GetStopReasonDataAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); return 0; } SBThread SBThreadPlan::GetThread() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBThreadPlan, GetThread); + LLDB_INSTRUMENT_VA(this); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { @@ -144,8 +138,7 @@ SBThread SBThreadPlan::GetThread() const { } bool SBThreadPlan::GetDescription(lldb::SBStream &description) const { - LLDB_RECORD_METHOD_CONST(bool, SBThreadPlan, GetDescription, - (lldb::SBStream &), description); + LLDB_INSTRUMENT_VA(this, description); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { @@ -161,7 +154,7 @@ void SBThreadPlan::SetThreadPlan(const ThreadPlanSP &lldb_object_wp) { } void SBThreadPlan::SetPlanComplete(bool success) { - LLDB_RECORD_METHOD(void, SBThreadPlan, SetPlanComplete, (bool), success); + LLDB_INSTRUMENT_VA(this, success); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) @@ -169,7 +162,7 @@ void SBThreadPlan::SetPlanComplete(bool success) { } bool SBThreadPlan::IsPlanComplete() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, IsPlanComplete); + LLDB_INSTRUMENT_VA(this); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) @@ -178,7 +171,7 @@ bool SBThreadPlan::IsPlanComplete() { } bool SBThreadPlan::IsPlanStale() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, IsPlanStale); + LLDB_INSTRUMENT_VA(this); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) @@ -187,7 +180,7 @@ bool SBThreadPlan::IsPlanStale() { } bool SBThreadPlan::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, IsValid); + LLDB_INSTRUMENT_VA(this); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) @@ -196,7 +189,7 @@ bool SBThreadPlan::IsValid() { } bool SBThreadPlan::GetStopOthers() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, GetStopOthers); + LLDB_INSTRUMENT_VA(this); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) @@ -205,7 +198,7 @@ bool SBThreadPlan::GetStopOthers() { } void SBThreadPlan::SetStopOthers(bool stop_others) { - LLDB_RECORD_METHOD(void, SBThreadPlan, SetStopOthers, (bool), stop_others); + LLDB_INSTRUMENT_VA(this, stop_others); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) @@ -221,9 +214,7 @@ void SBThreadPlan::SetStopOthers(bool stop_others) { SBThreadPlan SBThreadPlan::QueueThreadPlanForStepOverRange(SBAddress &sb_start_address, lldb::addr_t size) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepOverRange, - (lldb::SBAddress &, lldb::addr_t), sb_start_address, size); + LLDB_INSTRUMENT_VA(this, sb_start_address, size); SBError error; return QueueThreadPlanForStepOverRange(sb_start_address, size, error); @@ -231,10 +222,7 @@ SBThreadPlan::QueueThreadPlanForStepOverRange(SBAddress &sb_start_address, SBThreadPlan SBThreadPlan::QueueThreadPlanForStepOverRange( SBAddress &sb_start_address, lldb::addr_t size, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepOverRange, - (lldb::SBAddress &, lldb::addr_t, lldb::SBError &), - sb_start_address, size, error); + LLDB_INSTRUMENT_VA(this, sb_start_address, size, error); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { @@ -265,9 +253,7 @@ SBThreadPlan SBThreadPlan::QueueThreadPlanForStepOverRange( SBThreadPlan SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address, lldb::addr_t size) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepInRange, - (lldb::SBAddress &, lldb::addr_t), sb_start_address, size); + LLDB_INSTRUMENT_VA(this, sb_start_address, size); SBError error; return QueueThreadPlanForStepInRange(sb_start_address, size, error); @@ -276,10 +262,7 @@ SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address, SBThreadPlan SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address, lldb::addr_t size, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepInRange, - (lldb::SBAddress &, lldb::addr_t, lldb::SBError &), - sb_start_address, size, error); + LLDB_INSTRUMENT_VA(this, sb_start_address, size, error); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { @@ -310,9 +293,7 @@ SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address, SBThreadPlan SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to, bool first_insn) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepOut, (uint32_t, bool), - frame_idx_to_step_to, first_insn); + LLDB_INSTRUMENT_VA(this, frame_idx_to_step_to, first_insn); SBError error; return QueueThreadPlanForStepOut(frame_idx_to_step_to, first_insn, error); @@ -321,10 +302,7 @@ SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to, SBThreadPlan SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to, bool first_insn, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepOut, - (uint32_t, bool, lldb::SBError &), frame_idx_to_step_to, - first_insn, error); + LLDB_INSTRUMENT_VA(this, frame_idx_to_step_to, first_insn, error); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { @@ -350,9 +328,7 @@ SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to, SBThreadPlan SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForRunToAddress, (lldb::SBAddress), - sb_address); + LLDB_INSTRUMENT_VA(this, sb_address); SBError error; return QueueThreadPlanForRunToAddress(sb_address, error); @@ -360,9 +336,7 @@ SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address) { SBThreadPlan SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForRunToAddress, - (lldb::SBAddress, lldb::SBError &), sb_address, error); + LLDB_INSTRUMENT_VA(this, sb_address, error); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { @@ -387,9 +361,7 @@ SBThreadPlan SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address, SBThreadPlan SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepScripted, (const char *), - script_class_name); + LLDB_INSTRUMENT_VA(this, script_class_name); SBError error; return QueueThreadPlanForStepScripted(script_class_name, error); @@ -398,9 +370,7 @@ SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name) { SBThreadPlan SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepScripted, - (const char *, lldb::SBError &), script_class_name, error); + LLDB_INSTRUMENT_VA(this, script_class_name, error); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { @@ -424,10 +394,7 @@ SBThreadPlan SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name, lldb::SBStructuredData &args_data, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan, - QueueThreadPlanForStepScripted, - (const char *, lldb::SBStructuredData &, lldb::SBError &), - script_class_name, args_data, error); + LLDB_INSTRUMENT_VA(this, script_class_name, args_data, error); ThreadPlanSP thread_plan_sp(GetSP()); if (thread_plan_sp) { diff --git a/lldb/source/API/SBTrace.cpp b/lldb/source/API/SBTrace.cpp index 4e13e774e366c..64a675e2e16cd 100644 --- a/lldb/source/API/SBTrace.cpp +++ b/lldb/source/API/SBTrace.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/Target/Process.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBStructuredData.h" #include "lldb/API/SBThread.h" @@ -20,20 +20,19 @@ using namespace lldb; using namespace lldb_private; -SBTrace::SBTrace() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTrace); } +SBTrace::SBTrace() { LLDB_INSTRUMENT_VA(this); } SBTrace::SBTrace(const lldb::TraceSP &trace_sp) : m_opaque_sp(trace_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTrace, (const lldb::TraceSP &), trace_sp); + LLDB_INSTRUMENT_VA(this, trace_sp); } const char *SBTrace::GetStartConfigurationHelp() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTrace, GetStartConfigurationHelp); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp ? m_opaque_sp->GetStartConfigurationHelp() : nullptr; } SBError SBTrace::Start(const SBStructuredData &configuration) { - LLDB_RECORD_METHOD(SBError, SBTrace, Start, (const SBStructuredData &), - configuration); + LLDB_INSTRUMENT_VA(this, configuration); SBError error; if (!m_opaque_sp) error.SetErrorString("error: invalid trace"); @@ -45,9 +44,7 @@ SBError SBTrace::Start(const SBStructuredData &configuration) { SBError SBTrace::Start(const SBThread &thread, const SBStructuredData &configuration) { - LLDB_RECORD_METHOD(SBError, SBTrace, Start, - (const SBThread &, const SBStructuredData &), thread, - configuration); + LLDB_INSTRUMENT_VA(this, thread, configuration); SBError error; if (!m_opaque_sp) @@ -63,7 +60,7 @@ SBError SBTrace::Start(const SBThread &thread, } SBError SBTrace::Stop() { - LLDB_RECORD_METHOD_NO_ARGS(SBError, SBTrace, Stop); + LLDB_INSTRUMENT_VA(this); SBError error; if (!m_opaque_sp) error.SetErrorString("error: invalid trace"); @@ -73,7 +70,7 @@ SBError SBTrace::Stop() { } SBError SBTrace::Stop(const SBThread &thread) { - LLDB_RECORD_METHOD(SBError, SBTrace, Stop, (const SBThread &), thread); + LLDB_INSTRUMENT_VA(this, thread); SBError error; if (!m_opaque_sp) error.SetErrorString("error: invalid trace"); @@ -83,11 +80,11 @@ SBError SBTrace::Stop(const SBThread &thread) { } bool SBTrace::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTrace, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTrace::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTrace, operator bool); + LLDB_INSTRUMENT_VA(this); return (bool)m_opaque_sp; } diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp index bd759c8f6bf90..da9202bf9386b 100644 --- a/lldb/source/API/SBType.cpp +++ b/lldb/source/API/SBType.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBType.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBModule.h" #include "lldb/API/SBStream.h" @@ -17,6 +16,7 @@ #include "lldb/Symbol/Type.h" #include "lldb/Symbol/TypeSystem.h" #include "lldb/Utility/ConstString.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" #include "llvm/ADT/APSInt.h" @@ -26,7 +26,7 @@ using namespace lldb; using namespace lldb_private; -SBType::SBType() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBType); } +SBType::SBType() { LLDB_INSTRUMENT_VA(this); } SBType::SBType(const CompilerType &type) : m_opaque_sp(new TypeImpl( @@ -39,7 +39,7 @@ SBType::SBType(const lldb::TypeImplSP &type_impl_sp) : m_opaque_sp(type_impl_sp) {} SBType::SBType(const SBType &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBType, (const lldb::SBType &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -51,7 +51,7 @@ SBType::SBType(const SBType &rhs) { //{} // bool SBType::operator==(SBType &rhs) { - LLDB_RECORD_METHOD(bool, SBType, operator==,(lldb::SBType &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -63,7 +63,7 @@ bool SBType::operator==(SBType &rhs) { } bool SBType::operator!=(SBType &rhs) { - LLDB_RECORD_METHOD(bool, SBType, operator!=,(lldb::SBType &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return rhs.IsValid(); @@ -81,8 +81,7 @@ void SBType::SetSP(const lldb::TypeImplSP &type_impl_sp) { } SBType &SBType::operator=(const SBType &rhs) { - LLDB_RECORD_METHOD(lldb::SBType &, SBType, operator=,(const lldb::SBType &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -107,11 +106,11 @@ const TypeImpl &SBType::ref() const { } bool SBType::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBType, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBType::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBType, operator bool); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp.get() == nullptr) return false; @@ -120,7 +119,7 @@ SBType::operator bool() const { } uint64_t SBType::GetByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBType, GetByteSize); + LLDB_INSTRUMENT_VA(this); if (IsValid()) if (llvm::Optional size = @@ -130,7 +129,7 @@ uint64_t SBType::GetByteSize() { } bool SBType::IsPointerType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsPointerType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -138,7 +137,7 @@ bool SBType::IsPointerType() { } bool SBType::IsArrayType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsArrayType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -147,7 +146,7 @@ bool SBType::IsArrayType() { } bool SBType::IsVectorType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsVectorType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -155,7 +154,7 @@ bool SBType::IsVectorType() { } bool SBType::IsReferenceType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsReferenceType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -163,7 +162,7 @@ bool SBType::IsReferenceType() { } SBType SBType::GetPointerType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetPointerType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -172,7 +171,7 @@ SBType SBType::GetPointerType() { } SBType SBType::GetPointeeType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetPointeeType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -180,7 +179,7 @@ SBType SBType::GetPointeeType() { } SBType SBType::GetReferenceType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetReferenceType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -188,7 +187,7 @@ SBType SBType::GetReferenceType() { } SBType SBType::GetTypedefedType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetTypedefedType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -196,7 +195,7 @@ SBType SBType::GetTypedefedType() { } SBType SBType::GetDereferencedType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetDereferencedType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -204,7 +203,7 @@ SBType SBType::GetDereferencedType() { } SBType SBType::GetArrayElementType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetArrayElementType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -213,7 +212,7 @@ SBType SBType::GetArrayElementType() { } SBType SBType::GetArrayType(uint64_t size) { - LLDB_RECORD_METHOD(lldb::SBType, SBType, GetArrayType, (uint64_t), size); + LLDB_INSTRUMENT_VA(this, size); if (!IsValid()) return SBType(); @@ -222,7 +221,7 @@ SBType SBType::GetArrayType(uint64_t size) { } SBType SBType::GetVectorElementType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetVectorElementType); + LLDB_INSTRUMENT_VA(this); SBType type_sb; if (IsValid()) { @@ -235,7 +234,7 @@ SBType SBType::GetVectorElementType() { } bool SBType::IsFunctionType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsFunctionType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -243,7 +242,7 @@ bool SBType::IsFunctionType() { } bool SBType::IsPolymorphicClass() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsPolymorphicClass); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -251,7 +250,7 @@ bool SBType::IsPolymorphicClass() { } bool SBType::IsTypedefType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsTypedefType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -259,7 +258,7 @@ bool SBType::IsTypedefType() { } bool SBType::IsAnonymousType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsAnonymousType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -267,7 +266,7 @@ bool SBType::IsAnonymousType() { } bool SBType::IsScopedEnumerationType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsScopedEnumerationType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -275,7 +274,7 @@ bool SBType::IsScopedEnumerationType() { } lldb::SBType SBType::GetFunctionReturnType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetFunctionReturnType); + LLDB_INSTRUMENT_VA(this); if (IsValid()) { CompilerType return_type( @@ -287,8 +286,7 @@ lldb::SBType SBType::GetFunctionReturnType() { } lldb::SBTypeList SBType::GetFunctionArgumentTypes() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeList, SBType, - GetFunctionArgumentTypes); + LLDB_INSTRUMENT_VA(this); SBTypeList sb_type_list; if (IsValid()) { @@ -302,7 +300,7 @@ lldb::SBTypeList SBType::GetFunctionArgumentTypes() { } uint32_t SBType::GetNumberOfMemberFunctions() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfMemberFunctions); + LLDB_INSTRUMENT_VA(this); if (IsValid()) { return m_opaque_sp->GetCompilerType(true).GetNumMemberFunctions(); @@ -311,8 +309,7 @@ uint32_t SBType::GetNumberOfMemberFunctions() { } lldb::SBTypeMemberFunction SBType::GetMemberFunctionAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBTypeMemberFunction, SBType, - GetMemberFunctionAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBTypeMemberFunction sb_func_type; if (IsValid()) @@ -322,7 +319,7 @@ lldb::SBTypeMemberFunction SBType::GetMemberFunctionAtIndex(uint32_t idx) { } lldb::SBType SBType::GetUnqualifiedType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetUnqualifiedType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -330,7 +327,7 @@ lldb::SBType SBType::GetUnqualifiedType() { } lldb::SBType SBType::GetCanonicalType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetCanonicalType); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetCanonicalType()))); @@ -338,7 +335,7 @@ lldb::SBType SBType::GetCanonicalType() { } SBType SBType::GetEnumerationIntegerType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetEnumerationIntegerType); + LLDB_INSTRUMENT_VA(this); if (IsValid()) { return SBType( @@ -348,7 +345,7 @@ SBType SBType::GetEnumerationIntegerType() { } lldb::BasicType SBType::GetBasicType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::BasicType, SBType, GetBasicType); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetCompilerType(false).GetBasicTypeEnumeration(); @@ -356,8 +353,7 @@ lldb::BasicType SBType::GetBasicType() { } SBType SBType::GetBasicType(lldb::BasicType basic_type) { - LLDB_RECORD_METHOD(lldb::SBType, SBType, GetBasicType, (lldb::BasicType), - basic_type); + LLDB_INSTRUMENT_VA(this, basic_type); if (IsValid() && m_opaque_sp->IsValid()) return SBType( @@ -366,7 +362,7 @@ SBType SBType::GetBasicType(lldb::BasicType basic_type) { } uint32_t SBType::GetNumberOfDirectBaseClasses() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfDirectBaseClasses); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetCompilerType(true).GetNumDirectBaseClasses(); @@ -374,7 +370,7 @@ uint32_t SBType::GetNumberOfDirectBaseClasses() { } uint32_t SBType::GetNumberOfVirtualBaseClasses() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfVirtualBaseClasses); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetCompilerType(true).GetNumVirtualBaseClasses(); @@ -382,7 +378,7 @@ uint32_t SBType::GetNumberOfVirtualBaseClasses() { } uint32_t SBType::GetNumberOfFields() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfFields); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetCompilerType(true).GetNumFields(); @@ -391,9 +387,7 @@ uint32_t SBType::GetNumberOfFields() { bool SBType::GetDescription(SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBType, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); Stream &strm = description.ref(); @@ -406,8 +400,7 @@ bool SBType::GetDescription(SBStream &description, } SBTypeMember SBType::GetDirectBaseClassAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBTypeMember, SBType, GetDirectBaseClassAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBTypeMember sb_type_member; if (IsValid()) { @@ -423,8 +416,7 @@ SBTypeMember SBType::GetDirectBaseClassAtIndex(uint32_t idx) { } SBTypeMember SBType::GetVirtualBaseClassAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBTypeMember, SBType, GetVirtualBaseClassAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); SBTypeMember sb_type_member; if (IsValid()) { @@ -440,8 +432,7 @@ SBTypeMember SBType::GetVirtualBaseClassAtIndex(uint32_t idx) { } SBTypeEnumMemberList SBType::GetEnumMembers() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeEnumMemberList, SBType, - GetEnumMembers); + LLDB_INSTRUMENT_VA(this); SBTypeEnumMemberList sb_enum_member_list; if (IsValid()) { @@ -463,8 +454,7 @@ SBTypeEnumMemberList SBType::GetEnumMembers() { } SBTypeMember SBType::GetFieldAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBTypeMember, SBType, GetFieldAtIndex, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); SBTypeMember sb_type_member; if (IsValid()) { @@ -490,7 +480,7 @@ SBTypeMember SBType::GetFieldAtIndex(uint32_t idx) { } bool SBType::IsTypeComplete() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsTypeComplete); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -498,7 +488,7 @@ bool SBType::IsTypeComplete() { } uint32_t SBType::GetTypeFlags() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetTypeFlags); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return 0; @@ -506,7 +496,7 @@ uint32_t SBType::GetTypeFlags() { } lldb::SBModule SBType::GetModule() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBModule, SBType, GetModule); + LLDB_INSTRUMENT_VA(this); lldb::SBModule sb_module; if (!IsValid()) @@ -517,7 +507,7 @@ lldb::SBModule SBType::GetModule() { } const char *SBType::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBType, GetName); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return ""; @@ -525,7 +515,7 @@ const char *SBType::GetName() { } const char *SBType::GetDisplayTypeName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBType, GetDisplayTypeName); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return ""; @@ -533,7 +523,7 @@ const char *SBType::GetDisplayTypeName() { } lldb::TypeClass SBType::GetTypeClass() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::TypeClass, SBType, GetTypeClass); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetCompilerType(true).GetTypeClass(); @@ -541,7 +531,7 @@ lldb::TypeClass SBType::GetTypeClass() { } uint32_t SBType::GetNumberOfTemplateArguments() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfTemplateArguments); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments(); @@ -549,8 +539,7 @@ uint32_t SBType::GetNumberOfTemplateArguments() { } lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBType, SBType, GetTemplateArgumentType, (uint32_t), - idx); + LLDB_INSTRUMENT_VA(this, idx); if (!IsValid()) return SBType(); @@ -574,8 +563,7 @@ lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) { } lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::TemplateArgumentKind, SBType, - GetTemplateArgumentKind, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); if (IsValid()) return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(idx); @@ -583,12 +571,12 @@ lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) { } SBTypeList::SBTypeList() : m_opaque_up(new TypeListImpl()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeList); + LLDB_INSTRUMENT_VA(this); } SBTypeList::SBTypeList(const SBTypeList &rhs) : m_opaque_up(new TypeListImpl()) { - LLDB_RECORD_CONSTRUCTOR(SBTypeList, (const lldb::SBTypeList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); for (uint32_t i = 0, rhs_size = const_cast(rhs).GetSize(); i < rhs_size; i++) @@ -596,18 +584,17 @@ SBTypeList::SBTypeList(const SBTypeList &rhs) } bool SBTypeList::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeList, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeList::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeList, operator bool); + LLDB_INSTRUMENT_VA(this); return (m_opaque_up != nullptr); } SBTypeList &SBTypeList::operator=(const SBTypeList &rhs) { - LLDB_RECORD_METHOD(lldb::SBTypeList &, - SBTypeList, operator=,(const lldb::SBTypeList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_up = std::make_unique(); @@ -619,15 +606,14 @@ SBTypeList &SBTypeList::operator=(const SBTypeList &rhs) { } void SBTypeList::Append(SBType type) { - LLDB_RECORD_METHOD(void, SBTypeList, Append, (lldb::SBType), type); + LLDB_INSTRUMENT_VA(this, type); if (type.IsValid()) m_opaque_up->Append(type.m_opaque_sp); } SBType SBTypeList::GetTypeAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBType, SBTypeList, GetTypeAtIndex, (uint32_t), - index); + LLDB_INSTRUMENT_VA(this, index); if (m_opaque_up) return SBType(m_opaque_up->GetTypeAtIndex(index)); @@ -635,19 +621,19 @@ SBType SBTypeList::GetTypeAtIndex(uint32_t index) { } uint32_t SBTypeList::GetSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeList, GetSize); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetSize(); } SBTypeList::~SBTypeList() = default; -SBTypeMember::SBTypeMember() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeMember); } +SBTypeMember::SBTypeMember() { LLDB_INSTRUMENT_VA(this); } SBTypeMember::~SBTypeMember() = default; SBTypeMember::SBTypeMember(const SBTypeMember &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBTypeMember, (const lldb::SBTypeMember &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { if (rhs.IsValid()) @@ -656,8 +642,7 @@ SBTypeMember::SBTypeMember(const SBTypeMember &rhs) { } lldb::SBTypeMember &SBTypeMember::operator=(const lldb::SBTypeMember &rhs) { - LLDB_RECORD_METHOD(lldb::SBTypeMember &, - SBTypeMember, operator=,(const lldb::SBTypeMember &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { if (rhs.IsValid()) @@ -667,17 +652,17 @@ lldb::SBTypeMember &SBTypeMember::operator=(const lldb::SBTypeMember &rhs) { } bool SBTypeMember::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMember, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeMember::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMember, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up.get(); } const char *SBTypeMember::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMember, GetName); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->GetName().GetCString(); @@ -685,7 +670,7 @@ const char *SBTypeMember::GetName() { } SBType SBTypeMember::GetType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeMember, GetType); + LLDB_INSTRUMENT_VA(this); SBType sb_type; if (m_opaque_up) { @@ -695,7 +680,7 @@ SBType SBTypeMember::GetType() { } uint64_t SBTypeMember::GetOffsetInBytes() { - LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBTypeMember, GetOffsetInBytes); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->GetBitOffset() / 8u; @@ -703,7 +688,7 @@ uint64_t SBTypeMember::GetOffsetInBytes() { } uint64_t SBTypeMember::GetOffsetInBits() { - LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBTypeMember, GetOffsetInBits); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->GetBitOffset(); @@ -711,7 +696,7 @@ uint64_t SBTypeMember::GetOffsetInBits() { } bool SBTypeMember::IsBitfield() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeMember, IsBitfield); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->GetIsBitfield(); @@ -719,7 +704,7 @@ bool SBTypeMember::IsBitfield() { } uint32_t SBTypeMember::GetBitfieldSizeInBits() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeMember, GetBitfieldSizeInBits); + LLDB_INSTRUMENT_VA(this); if (m_opaque_up) return m_opaque_up->GetBitfieldBitSize(); @@ -728,9 +713,7 @@ uint32_t SBTypeMember::GetBitfieldSizeInBits() { bool SBTypeMember::GetDescription(lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeMember, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); Stream &strm = description.ref(); @@ -771,24 +754,18 @@ TypeMemberImpl &SBTypeMember::ref() { const TypeMemberImpl &SBTypeMember::ref() const { return *m_opaque_up; } -SBTypeMemberFunction::SBTypeMemberFunction() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeMemberFunction); -} +SBTypeMemberFunction::SBTypeMemberFunction() { LLDB_INSTRUMENT_VA(this); } SBTypeMemberFunction::~SBTypeMemberFunction() = default; SBTypeMemberFunction::SBTypeMemberFunction(const SBTypeMemberFunction &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTypeMemberFunction, - (const lldb::SBTypeMemberFunction &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } lldb::SBTypeMemberFunction &SBTypeMemberFunction:: operator=(const lldb::SBTypeMemberFunction &rhs) { - LLDB_RECORD_METHOD( - lldb::SBTypeMemberFunction &, - SBTypeMemberFunction, operator=,(const lldb::SBTypeMemberFunction &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = rhs.m_opaque_sp; @@ -796,17 +773,17 @@ operator=(const lldb::SBTypeMemberFunction &rhs) { } bool SBTypeMemberFunction::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMemberFunction, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeMemberFunction::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMemberFunction, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get(); } const char *SBTypeMemberFunction::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMemberFunction, GetName); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) return m_opaque_sp->GetName().GetCString(); @@ -814,8 +791,7 @@ const char *SBTypeMemberFunction::GetName() { } const char *SBTypeMemberFunction::GetDemangledName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMemberFunction, - GetDemangledName); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) { ConstString mangled_str = m_opaque_sp->GetMangledName(); @@ -828,8 +804,7 @@ const char *SBTypeMemberFunction::GetDemangledName() { } const char *SBTypeMemberFunction::GetMangledName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMemberFunction, - GetMangledName); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) return m_opaque_sp->GetMangledName().GetCString(); @@ -837,7 +812,7 @@ const char *SBTypeMemberFunction::GetMangledName() { } SBType SBTypeMemberFunction::GetType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeMemberFunction, GetType); + LLDB_INSTRUMENT_VA(this); SBType sb_type; if (m_opaque_sp) { @@ -847,7 +822,7 @@ SBType SBTypeMemberFunction::GetType() { } lldb::SBType SBTypeMemberFunction::GetReturnType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeMemberFunction, GetReturnType); + LLDB_INSTRUMENT_VA(this); SBType sb_type; if (m_opaque_sp) { @@ -857,8 +832,7 @@ lldb::SBType SBTypeMemberFunction::GetReturnType() { } uint32_t SBTypeMemberFunction::GetNumberOfArguments() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeMemberFunction, - GetNumberOfArguments); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) return m_opaque_sp->GetNumArguments(); @@ -866,8 +840,7 @@ uint32_t SBTypeMemberFunction::GetNumberOfArguments() { } lldb::SBType SBTypeMemberFunction::GetArgumentTypeAtIndex(uint32_t i) { - LLDB_RECORD_METHOD(lldb::SBType, SBTypeMemberFunction, GetArgumentTypeAtIndex, - (uint32_t), i); + LLDB_INSTRUMENT_VA(this, i); SBType sb_type; if (m_opaque_sp) { @@ -878,8 +851,7 @@ lldb::SBType SBTypeMemberFunction::GetArgumentTypeAtIndex(uint32_t i) { } lldb::MemberFunctionKind SBTypeMemberFunction::GetKind() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::MemberFunctionKind, SBTypeMemberFunction, - GetKind); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp) return m_opaque_sp->GetKind(); @@ -888,9 +860,7 @@ lldb::MemberFunctionKind SBTypeMemberFunction::GetKind() { bool SBTypeMemberFunction::GetDescription( lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeMemberFunction, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBTypeCategory.cpp b/lldb/source/API/SBTypeCategory.cpp index 17aca3267e676..7d929fe497954 100644 --- a/lldb/source/API/SBTypeCategory.cpp +++ b/lldb/source/API/SBTypeCategory.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTypeCategory.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBTypeFilter.h" @@ -26,9 +26,7 @@ using namespace lldb_private; typedef std::pair ImplType; -SBTypeCategory::SBTypeCategory() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeCategory); -} +SBTypeCategory::SBTypeCategory() { LLDB_INSTRUMENT_VA(this); } SBTypeCategory::SBTypeCategory(const char *name) { DataVisualization::Categories::GetCategory(ConstString(name), m_opaque_sp); @@ -36,23 +34,23 @@ SBTypeCategory::SBTypeCategory(const char *name) { SBTypeCategory::SBTypeCategory(const lldb::SBTypeCategory &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTypeCategory, (const lldb::SBTypeCategory &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBTypeCategory::~SBTypeCategory() = default; bool SBTypeCategory::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeCategory, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeCategory::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeCategory, operator bool); + LLDB_INSTRUMENT_VA(this); return (m_opaque_sp.get() != nullptr); } bool SBTypeCategory::GetEnabled() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeCategory, GetEnabled); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -60,7 +58,7 @@ bool SBTypeCategory::GetEnabled() { } void SBTypeCategory::SetEnabled(bool enabled) { - LLDB_RECORD_METHOD(void, SBTypeCategory, SetEnabled, (bool), enabled); + LLDB_INSTRUMENT_VA(this, enabled); if (!IsValid()) return; @@ -71,7 +69,7 @@ void SBTypeCategory::SetEnabled(bool enabled) { } const char *SBTypeCategory::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeCategory, GetName); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return nullptr; @@ -79,8 +77,7 @@ const char *SBTypeCategory::GetName() { } lldb::LanguageType SBTypeCategory::GetLanguageAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::LanguageType, SBTypeCategory, GetLanguageAtIndex, - (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); if (IsValid()) return m_opaque_sp->GetLanguageAtIndex(idx); @@ -88,7 +85,7 @@ lldb::LanguageType SBTypeCategory::GetLanguageAtIndex(uint32_t idx) { } uint32_t SBTypeCategory::GetNumLanguages() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumLanguages); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetNumLanguages(); @@ -96,15 +93,14 @@ uint32_t SBTypeCategory::GetNumLanguages() { } void SBTypeCategory::AddLanguage(lldb::LanguageType language) { - LLDB_RECORD_METHOD(void, SBTypeCategory, AddLanguage, (lldb::LanguageType), - language); + LLDB_INSTRUMENT_VA(this, language); if (IsValid()) m_opaque_sp->AddLanguage(language); } uint32_t SBTypeCategory::GetNumFormats() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumFormats); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return 0; @@ -114,7 +110,7 @@ uint32_t SBTypeCategory::GetNumFormats() { } uint32_t SBTypeCategory::GetNumSummaries() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumSummaries); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return 0; @@ -123,7 +119,7 @@ uint32_t SBTypeCategory::GetNumSummaries() { } uint32_t SBTypeCategory::GetNumFilters() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumFilters); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return 0; @@ -132,7 +128,7 @@ uint32_t SBTypeCategory::GetNumFilters() { } uint32_t SBTypeCategory::GetNumSynthetics() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumSynthetics); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return 0; @@ -142,8 +138,7 @@ uint32_t SBTypeCategory::GetNumSynthetics() { lldb::SBTypeNameSpecifier SBTypeCategory::GetTypeNameSpecifierForFilterAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory, - GetTypeNameSpecifierForFilterAtIndex, (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeNameSpecifier(); @@ -153,8 +148,7 @@ SBTypeCategory::GetTypeNameSpecifierForFilterAtIndex(uint32_t index) { lldb::SBTypeNameSpecifier SBTypeCategory::GetTypeNameSpecifierForFormatAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory, - GetTypeNameSpecifierForFormatAtIndex, (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeNameSpecifier(); @@ -164,8 +158,7 @@ SBTypeCategory::GetTypeNameSpecifierForFormatAtIndex(uint32_t index) { lldb::SBTypeNameSpecifier SBTypeCategory::GetTypeNameSpecifierForSummaryAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory, - GetTypeNameSpecifierForSummaryAtIndex, (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeNameSpecifier(); @@ -175,9 +168,7 @@ SBTypeCategory::GetTypeNameSpecifierForSummaryAtIndex(uint32_t index) { lldb::SBTypeNameSpecifier SBTypeCategory::GetTypeNameSpecifierForSyntheticAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory, - GetTypeNameSpecifierForSyntheticAtIndex, (uint32_t), - index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeNameSpecifier(); @@ -186,8 +177,7 @@ SBTypeCategory::GetTypeNameSpecifierForSyntheticAtIndex(uint32_t index) { } SBTypeFilter SBTypeCategory::GetFilterForType(SBTypeNameSpecifier spec) { - LLDB_RECORD_METHOD(lldb::SBTypeFilter, SBTypeCategory, GetFilterForType, - (lldb::SBTypeNameSpecifier), spec); + LLDB_INSTRUMENT_VA(this, spec); if (!IsValid()) return SBTypeFilter(); @@ -213,8 +203,7 @@ SBTypeFilter SBTypeCategory::GetFilterForType(SBTypeNameSpecifier spec) { return lldb::SBTypeFilter(filter_sp); } SBTypeFormat SBTypeCategory::GetFormatForType(SBTypeNameSpecifier spec) { - LLDB_RECORD_METHOD(lldb::SBTypeFormat, SBTypeCategory, GetFormatForType, - (lldb::SBTypeNameSpecifier), spec); + LLDB_INSTRUMENT_VA(this, spec); if (!IsValid()) return SBTypeFormat(); @@ -238,8 +227,7 @@ SBTypeFormat SBTypeCategory::GetFormatForType(SBTypeNameSpecifier spec) { } SBTypeSummary SBTypeCategory::GetSummaryForType(SBTypeNameSpecifier spec) { - LLDB_RECORD_METHOD(lldb::SBTypeSummary, SBTypeCategory, GetSummaryForType, - (lldb::SBTypeNameSpecifier), spec); + LLDB_INSTRUMENT_VA(this, spec); if (!IsValid()) return SBTypeSummary(); @@ -263,8 +251,7 @@ SBTypeSummary SBTypeCategory::GetSummaryForType(SBTypeNameSpecifier spec) { } SBTypeSynthetic SBTypeCategory::GetSyntheticForType(SBTypeNameSpecifier spec) { - LLDB_RECORD_METHOD(lldb::SBTypeSynthetic, SBTypeCategory, GetSyntheticForType, - (lldb::SBTypeNameSpecifier), spec); + LLDB_INSTRUMENT_VA(this, spec); if (!IsValid()) return SBTypeSynthetic(); @@ -291,8 +278,7 @@ SBTypeSynthetic SBTypeCategory::GetSyntheticForType(SBTypeNameSpecifier spec) { } SBTypeFilter SBTypeCategory::GetFilterAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeFilter, SBTypeCategory, GetFilterAtIndex, - (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeFilter(); @@ -309,8 +295,7 @@ SBTypeFilter SBTypeCategory::GetFilterAtIndex(uint32_t index) { } SBTypeFormat SBTypeCategory::GetFormatAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeFormat, SBTypeCategory, GetFormatAtIndex, - (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeFormat(); @@ -318,8 +303,7 @@ SBTypeFormat SBTypeCategory::GetFormatAtIndex(uint32_t index) { } SBTypeSummary SBTypeCategory::GetSummaryAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeSummary, SBTypeCategory, GetSummaryAtIndex, - (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeSummary(); @@ -327,8 +311,7 @@ SBTypeSummary SBTypeCategory::GetSummaryAtIndex(uint32_t index) { } SBTypeSynthetic SBTypeCategory::GetSyntheticAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeSynthetic, SBTypeCategory, GetSyntheticAtIndex, - (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (!IsValid()) return SBTypeSynthetic(); @@ -346,9 +329,7 @@ SBTypeSynthetic SBTypeCategory::GetSyntheticAtIndex(uint32_t index) { bool SBTypeCategory::AddTypeFormat(SBTypeNameSpecifier type_name, SBTypeFormat format) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeFormat, - (lldb::SBTypeNameSpecifier, lldb::SBTypeFormat), type_name, - format); + LLDB_INSTRUMENT_VA(this, type_name, format); if (!IsValid()) return false; @@ -370,8 +351,7 @@ bool SBTypeCategory::AddTypeFormat(SBTypeNameSpecifier type_name, } bool SBTypeCategory::DeleteTypeFormat(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeFormat, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); if (!IsValid()) return false; @@ -389,9 +369,7 @@ bool SBTypeCategory::DeleteTypeFormat(SBTypeNameSpecifier type_name) { bool SBTypeCategory::AddTypeSummary(SBTypeNameSpecifier type_name, SBTypeSummary summary) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeSummary, - (lldb::SBTypeNameSpecifier, lldb::SBTypeSummary), - type_name, summary); + LLDB_INSTRUMENT_VA(this, type_name, summary); if (!IsValid()) return false; @@ -447,8 +425,7 @@ bool SBTypeCategory::AddTypeSummary(SBTypeNameSpecifier type_name, } bool SBTypeCategory::DeleteTypeSummary(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeSummary, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); if (!IsValid()) return false; @@ -466,9 +443,7 @@ bool SBTypeCategory::DeleteTypeSummary(SBTypeNameSpecifier type_name) { bool SBTypeCategory::AddTypeFilter(SBTypeNameSpecifier type_name, SBTypeFilter filter) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeFilter, - (lldb::SBTypeNameSpecifier, lldb::SBTypeFilter), type_name, - filter); + LLDB_INSTRUMENT_VA(this, type_name, filter); if (!IsValid()) return false; @@ -490,8 +465,7 @@ bool SBTypeCategory::AddTypeFilter(SBTypeNameSpecifier type_name, } bool SBTypeCategory::DeleteTypeFilter(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeFilter, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); if (!IsValid()) return false; @@ -509,9 +483,7 @@ bool SBTypeCategory::DeleteTypeFilter(SBTypeNameSpecifier type_name) { bool SBTypeCategory::AddTypeSynthetic(SBTypeNameSpecifier type_name, SBTypeSynthetic synth) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeSynthetic, - (lldb::SBTypeNameSpecifier, lldb::SBTypeSynthetic), - type_name, synth); + LLDB_INSTRUMENT_VA(this, type_name, synth); if (!IsValid()) return false; @@ -567,8 +539,7 @@ bool SBTypeCategory::AddTypeSynthetic(SBTypeNameSpecifier type_name, } bool SBTypeCategory::DeleteTypeSynthetic(SBTypeNameSpecifier type_name) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeSynthetic, - (lldb::SBTypeNameSpecifier), type_name); + LLDB_INSTRUMENT_VA(this, type_name); if (!IsValid()) return false; @@ -586,9 +557,7 @@ bool SBTypeCategory::DeleteTypeSynthetic(SBTypeNameSpecifier type_name) { bool SBTypeCategory::GetDescription(lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); if (!IsValid()) return false; @@ -598,9 +567,7 @@ bool SBTypeCategory::GetDescription(lldb::SBStream &description, lldb::SBTypeCategory &SBTypeCategory:: operator=(const lldb::SBTypeCategory &rhs) { - LLDB_RECORD_METHOD(lldb::SBTypeCategory &, - SBTypeCategory, operator=,(const lldb::SBTypeCategory &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -609,8 +576,7 @@ operator=(const lldb::SBTypeCategory &rhs) { } bool SBTypeCategory::operator==(lldb::SBTypeCategory &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, operator==,(lldb::SBTypeCategory &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -619,8 +585,7 @@ bool SBTypeCategory::operator==(lldb::SBTypeCategory &rhs) { } bool SBTypeCategory::operator!=(lldb::SBTypeCategory &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeCategory, operator!=,(lldb::SBTypeCategory &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return rhs.IsValid(); diff --git a/lldb/source/API/SBTypeEnumMember.cpp b/lldb/source/API/SBTypeEnumMember.cpp index d5fc382a35e8e..a3d99bd57e310 100644 --- a/lldb/source/API/SBTypeEnumMember.cpp +++ b/lldb/source/API/SBTypeEnumMember.cpp @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTypeEnumMember.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBType.h" #include "lldb/Symbol/CompilerType.h" #include "lldb/Symbol/Type.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Stream.h" #include @@ -21,9 +21,7 @@ using namespace lldb; using namespace lldb_private; -SBTypeEnumMember::SBTypeEnumMember() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeEnumMember); -} +SBTypeEnumMember::SBTypeEnumMember() { LLDB_INSTRUMENT_VA(this); } SBTypeEnumMember::~SBTypeEnumMember() = default; @@ -32,16 +30,13 @@ SBTypeEnumMember::SBTypeEnumMember( : m_opaque_sp(enum_member_sp) {} SBTypeEnumMember::SBTypeEnumMember(const SBTypeEnumMember &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBTypeEnumMember, (const lldb::SBTypeEnumMember &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_sp = clone(rhs.m_opaque_sp); } SBTypeEnumMember &SBTypeEnumMember::operator=(const SBTypeEnumMember &rhs) { - LLDB_RECORD_METHOD( - SBTypeEnumMember &, - SBTypeEnumMember, operator=,(const lldb::SBTypeEnumMember &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_sp = clone(rhs.m_opaque_sp); @@ -49,17 +44,17 @@ SBTypeEnumMember &SBTypeEnumMember::operator=(const SBTypeEnumMember &rhs) { } bool SBTypeEnumMember::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeEnumMember, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeEnumMember::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeEnumMember, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get(); } const char *SBTypeEnumMember::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeEnumMember, GetName); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp.get()) return m_opaque_sp->GetName().GetCString(); @@ -67,7 +62,7 @@ const char *SBTypeEnumMember::GetName() { } int64_t SBTypeEnumMember::GetValueAsSigned() { - LLDB_RECORD_METHOD_NO_ARGS(int64_t, SBTypeEnumMember, GetValueAsSigned); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp.get()) return m_opaque_sp->GetValueAsSigned(); @@ -75,7 +70,7 @@ int64_t SBTypeEnumMember::GetValueAsSigned() { } uint64_t SBTypeEnumMember::GetValueAsUnsigned() { - LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBTypeEnumMember, GetValueAsUnsigned); + LLDB_INSTRUMENT_VA(this); if (m_opaque_sp.get()) return m_opaque_sp->GetValueAsUnsigned(); @@ -83,7 +78,7 @@ uint64_t SBTypeEnumMember::GetValueAsUnsigned() { } SBType SBTypeEnumMember::GetType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeEnumMember, GetType); + LLDB_INSTRUMENT_VA(this); SBType sb_type; if (m_opaque_sp.get()) { @@ -108,13 +103,12 @@ const TypeEnumMemberImpl &SBTypeEnumMember::ref() const { SBTypeEnumMemberList::SBTypeEnumMemberList() : m_opaque_up(new TypeEnumMemberListImpl()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeEnumMemberList); + LLDB_INSTRUMENT_VA(this); } SBTypeEnumMemberList::SBTypeEnumMemberList(const SBTypeEnumMemberList &rhs) : m_opaque_up(new TypeEnumMemberListImpl()) { - LLDB_RECORD_CONSTRUCTOR(SBTypeEnumMemberList, - (const lldb::SBTypeEnumMemberList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); for (uint32_t i = 0, rhs_size = const_cast(rhs).GetSize(); @@ -123,21 +117,18 @@ SBTypeEnumMemberList::SBTypeEnumMemberList(const SBTypeEnumMemberList &rhs) } bool SBTypeEnumMemberList::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeEnumMemberList, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeEnumMemberList::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeEnumMemberList, operator bool); + LLDB_INSTRUMENT_VA(this); return (m_opaque_up != nullptr); } SBTypeEnumMemberList &SBTypeEnumMemberList:: operator=(const SBTypeEnumMemberList &rhs) { - LLDB_RECORD_METHOD( - lldb::SBTypeEnumMemberList &, - SBTypeEnumMemberList, operator=,(const lldb::SBTypeEnumMemberList &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_up = std::make_unique(); @@ -151,8 +142,7 @@ operator=(const SBTypeEnumMemberList &rhs) { } void SBTypeEnumMemberList::Append(SBTypeEnumMember enum_member) { - LLDB_RECORD_METHOD(void, SBTypeEnumMemberList, Append, - (lldb::SBTypeEnumMember), enum_member); + LLDB_INSTRUMENT_VA(this, enum_member); if (enum_member.IsValid()) m_opaque_up->Append(enum_member.m_opaque_sp); @@ -160,8 +150,7 @@ void SBTypeEnumMemberList::Append(SBTypeEnumMember enum_member) { SBTypeEnumMember SBTypeEnumMemberList::GetTypeEnumMemberAtIndex(uint32_t index) { - LLDB_RECORD_METHOD(lldb::SBTypeEnumMember, SBTypeEnumMemberList, - GetTypeEnumMemberAtIndex, (uint32_t), index); + LLDB_INSTRUMENT_VA(this, index); if (m_opaque_up) return SBTypeEnumMember(m_opaque_up->GetTypeEnumMemberAtIndex(index)); @@ -169,7 +158,7 @@ SBTypeEnumMemberList::GetTypeEnumMemberAtIndex(uint32_t index) { } uint32_t SBTypeEnumMemberList::GetSize() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeEnumMemberList, GetSize); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetSize(); } @@ -178,9 +167,7 @@ SBTypeEnumMemberList::~SBTypeEnumMemberList() = default; bool SBTypeEnumMember::GetDescription( lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeEnumMember, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); Stream &strm = description.ref(); diff --git a/lldb/source/API/SBTypeFilter.cpp b/lldb/source/API/SBTypeFilter.cpp index a36d93854098f..94f222b254b22 100644 --- a/lldb/source/API/SBTypeFilter.cpp +++ b/lldb/source/API/SBTypeFilter.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTypeFilter.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBStream.h" @@ -16,32 +16,32 @@ using namespace lldb; using namespace lldb_private; -SBTypeFilter::SBTypeFilter() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFilter); } +SBTypeFilter::SBTypeFilter() { LLDB_INSTRUMENT_VA(this); } SBTypeFilter::SBTypeFilter(uint32_t options) : m_opaque_sp(TypeFilterImplSP(new TypeFilterImpl(options))) { - LLDB_RECORD_CONSTRUCTOR(SBTypeFilter, (uint32_t), options); + LLDB_INSTRUMENT_VA(this, options); } SBTypeFilter::SBTypeFilter(const lldb::SBTypeFilter &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTypeFilter, (const lldb::SBTypeFilter &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBTypeFilter::~SBTypeFilter() = default; bool SBTypeFilter::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFilter, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeFilter::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFilter, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } uint32_t SBTypeFilter::GetOptions() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeFilter, GetOptions); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetOptions(); @@ -49,7 +49,7 @@ uint32_t SBTypeFilter::GetOptions() { } void SBTypeFilter::SetOptions(uint32_t value) { - LLDB_RECORD_METHOD(void, SBTypeFilter, SetOptions, (uint32_t), value); + LLDB_INSTRUMENT_VA(this, value); if (CopyOnWrite_Impl()) m_opaque_sp->SetOptions(value); @@ -57,9 +57,7 @@ void SBTypeFilter::SetOptions(uint32_t value) { bool SBTypeFilter::GetDescription(lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeFilter, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); if (!IsValid()) return false; @@ -70,15 +68,14 @@ bool SBTypeFilter::GetDescription(lldb::SBStream &description, } void SBTypeFilter::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBTypeFilter, Clear); + LLDB_INSTRUMENT_VA(this); if (CopyOnWrite_Impl()) m_opaque_sp->Clear(); } uint32_t SBTypeFilter::GetNumberOfExpressionPaths() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeFilter, - GetNumberOfExpressionPaths); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetCount(); @@ -86,8 +83,7 @@ uint32_t SBTypeFilter::GetNumberOfExpressionPaths() { } const char *SBTypeFilter::GetExpressionPathAtIndex(uint32_t i) { - LLDB_RECORD_METHOD(const char *, SBTypeFilter, GetExpressionPathAtIndex, - (uint32_t), i); + LLDB_INSTRUMENT_VA(this, i); if (IsValid()) { const char *item = m_opaque_sp->GetExpressionPathAtIndex(i); @@ -99,8 +95,7 @@ const char *SBTypeFilter::GetExpressionPathAtIndex(uint32_t i) { } bool SBTypeFilter::ReplaceExpressionPathAtIndex(uint32_t i, const char *item) { - LLDB_RECORD_METHOD(bool, SBTypeFilter, ReplaceExpressionPathAtIndex, - (uint32_t, const char *), i, item); + LLDB_INSTRUMENT_VA(this, i, item); if (CopyOnWrite_Impl()) return m_opaque_sp->SetExpressionPathAtIndex(i, item); @@ -109,16 +104,14 @@ bool SBTypeFilter::ReplaceExpressionPathAtIndex(uint32_t i, const char *item) { } void SBTypeFilter::AppendExpressionPath(const char *item) { - LLDB_RECORD_METHOD(void, SBTypeFilter, AppendExpressionPath, (const char *), - item); + LLDB_INSTRUMENT_VA(this, item); if (CopyOnWrite_Impl()) m_opaque_sp->AddExpressionPath(item); } lldb::SBTypeFilter &SBTypeFilter::operator=(const lldb::SBTypeFilter &rhs) { - LLDB_RECORD_METHOD(lldb::SBTypeFilter &, - SBTypeFilter, operator=,(const lldb::SBTypeFilter &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -127,8 +120,7 @@ lldb::SBTypeFilter &SBTypeFilter::operator=(const lldb::SBTypeFilter &rhs) { } bool SBTypeFilter::operator==(lldb::SBTypeFilter &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeFilter, operator==,(lldb::SBTypeFilter &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -137,8 +129,7 @@ bool SBTypeFilter::operator==(lldb::SBTypeFilter &rhs) { } bool SBTypeFilter::IsEqualTo(lldb::SBTypeFilter &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeFilter, IsEqualTo, (lldb::SBTypeFilter &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -155,8 +146,7 @@ bool SBTypeFilter::IsEqualTo(lldb::SBTypeFilter &rhs) { } bool SBTypeFilter::operator!=(lldb::SBTypeFilter &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeFilter, operator!=,(lldb::SBTypeFilter &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); diff --git a/lldb/source/API/SBTypeFormat.cpp b/lldb/source/API/SBTypeFormat.cpp index 139bbebaa3f43..86e11e8b8fde6 100644 --- a/lldb/source/API/SBTypeFormat.cpp +++ b/lldb/source/API/SBTypeFormat.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTypeFormat.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBStream.h" @@ -16,41 +16,39 @@ using namespace lldb; using namespace lldb_private; -SBTypeFormat::SBTypeFormat() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFormat); } +SBTypeFormat::SBTypeFormat() { LLDB_INSTRUMENT_VA(this); } SBTypeFormat::SBTypeFormat(lldb::Format format, uint32_t options) : m_opaque_sp( TypeFormatImplSP(new TypeFormatImpl_Format(format, options))) { - LLDB_RECORD_CONSTRUCTOR(SBTypeFormat, (lldb::Format, uint32_t), format, - options); + LLDB_INSTRUMENT_VA(this, format, options); } SBTypeFormat::SBTypeFormat(const char *type, uint32_t options) : m_opaque_sp(TypeFormatImplSP(new TypeFormatImpl_EnumType( ConstString(type ? type : ""), options))) { - LLDB_RECORD_CONSTRUCTOR(SBTypeFormat, (const char *, uint32_t), type, - options); + LLDB_INSTRUMENT_VA(this, type, options); } SBTypeFormat::SBTypeFormat(const lldb::SBTypeFormat &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTypeFormat, (const lldb::SBTypeFormat &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBTypeFormat::~SBTypeFormat() = default; bool SBTypeFormat::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFormat, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeFormat::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFormat, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } lldb::Format SBTypeFormat::GetFormat() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::Format, SBTypeFormat, GetFormat); + LLDB_INSTRUMENT_VA(this); if (IsValid() && m_opaque_sp->GetType() == TypeFormatImpl::Type::eTypeFormat) return ((TypeFormatImpl_Format *)m_opaque_sp.get())->GetFormat(); @@ -58,7 +56,7 @@ lldb::Format SBTypeFormat::GetFormat() { } const char *SBTypeFormat::GetTypeName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeFormat, GetTypeName); + LLDB_INSTRUMENT_VA(this); if (IsValid() && m_opaque_sp->GetType() == TypeFormatImpl::Type::eTypeEnum) return ((TypeFormatImpl_EnumType *)m_opaque_sp.get()) @@ -68,7 +66,7 @@ const char *SBTypeFormat::GetTypeName() { } uint32_t SBTypeFormat::GetOptions() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeFormat, GetOptions); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_sp->GetOptions(); @@ -76,14 +74,14 @@ uint32_t SBTypeFormat::GetOptions() { } void SBTypeFormat::SetFormat(lldb::Format fmt) { - LLDB_RECORD_METHOD(void, SBTypeFormat, SetFormat, (lldb::Format), fmt); + LLDB_INSTRUMENT_VA(this, fmt); if (CopyOnWrite_Impl(Type::eTypeFormat)) ((TypeFormatImpl_Format *)m_opaque_sp.get())->SetFormat(fmt); } void SBTypeFormat::SetTypeName(const char *type) { - LLDB_RECORD_METHOD(void, SBTypeFormat, SetTypeName, (const char *), type); + LLDB_INSTRUMENT_VA(this, type); if (CopyOnWrite_Impl(Type::eTypeEnum)) ((TypeFormatImpl_EnumType *)m_opaque_sp.get()) @@ -91,7 +89,7 @@ void SBTypeFormat::SetTypeName(const char *type) { } void SBTypeFormat::SetOptions(uint32_t value) { - LLDB_RECORD_METHOD(void, SBTypeFormat, SetOptions, (uint32_t), value); + LLDB_INSTRUMENT_VA(this, value); if (CopyOnWrite_Impl(Type::eTypeKeepSame)) m_opaque_sp->SetOptions(value); @@ -99,9 +97,7 @@ void SBTypeFormat::SetOptions(uint32_t value) { bool SBTypeFormat::GetDescription(lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeFormat, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); if (!IsValid()) return false; @@ -112,8 +108,7 @@ bool SBTypeFormat::GetDescription(lldb::SBStream &description, } lldb::SBTypeFormat &SBTypeFormat::operator=(const lldb::SBTypeFormat &rhs) { - LLDB_RECORD_METHOD(lldb::SBTypeFormat &, - SBTypeFormat, operator=,(const lldb::SBTypeFormat &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -122,8 +117,7 @@ lldb::SBTypeFormat &SBTypeFormat::operator=(const lldb::SBTypeFormat &rhs) { } bool SBTypeFormat::operator==(lldb::SBTypeFormat &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeFormat, operator==,(lldb::SBTypeFormat &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -131,8 +125,7 @@ bool SBTypeFormat::operator==(lldb::SBTypeFormat &rhs) { } bool SBTypeFormat::IsEqualTo(lldb::SBTypeFormat &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeFormat, IsEqualTo, (lldb::SBTypeFormat &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -144,8 +137,7 @@ bool SBTypeFormat::IsEqualTo(lldb::SBTypeFormat &rhs) { } bool SBTypeFormat::operator!=(lldb::SBTypeFormat &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeFormat, operator!=,(lldb::SBTypeFormat &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); diff --git a/lldb/source/API/SBTypeNameSpecifier.cpp b/lldb/source/API/SBTypeNameSpecifier.cpp index 3b9dd8300c873..bc83a1d664d0d 100644 --- a/lldb/source/API/SBTypeNameSpecifier.cpp +++ b/lldb/source/API/SBTypeNameSpecifier.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTypeNameSpecifier.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBType.h" @@ -17,21 +17,18 @@ using namespace lldb; using namespace lldb_private; -SBTypeNameSpecifier::SBTypeNameSpecifier() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeNameSpecifier); -} +SBTypeNameSpecifier::SBTypeNameSpecifier() { LLDB_INSTRUMENT_VA(this); } SBTypeNameSpecifier::SBTypeNameSpecifier(const char *name, bool is_regex) : m_opaque_sp(new TypeNameSpecifierImpl(name, is_regex)) { - LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier, (const char *, bool), name, - is_regex); + LLDB_INSTRUMENT_VA(this, name, is_regex); if (name == nullptr || (*name) == 0) m_opaque_sp.reset(); } SBTypeNameSpecifier::SBTypeNameSpecifier(SBType type) { - LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier, (lldb::SBType), type); + LLDB_INSTRUMENT_VA(this, type); if (type.IsValid()) m_opaque_sp = TypeNameSpecifierImplSP( @@ -40,24 +37,23 @@ SBTypeNameSpecifier::SBTypeNameSpecifier(SBType type) { SBTypeNameSpecifier::SBTypeNameSpecifier(const lldb::SBTypeNameSpecifier &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier, - (const lldb::SBTypeNameSpecifier &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBTypeNameSpecifier::~SBTypeNameSpecifier() = default; bool SBTypeNameSpecifier::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeNameSpecifier, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeNameSpecifier::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeNameSpecifier, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } const char *SBTypeNameSpecifier::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeNameSpecifier, GetName); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return nullptr; @@ -66,7 +62,7 @@ const char *SBTypeNameSpecifier::GetName() { } SBType SBTypeNameSpecifier::GetType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeNameSpecifier, GetType); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return SBType(); @@ -77,7 +73,7 @@ SBType SBTypeNameSpecifier::GetType() { } bool SBTypeNameSpecifier::IsRegex() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeNameSpecifier, IsRegex); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -87,9 +83,7 @@ bool SBTypeNameSpecifier::IsRegex() { bool SBTypeNameSpecifier::GetDescription( lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeNameSpecifier, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); if (!IsValid()) return false; @@ -100,9 +94,7 @@ bool SBTypeNameSpecifier::GetDescription( lldb::SBTypeNameSpecifier &SBTypeNameSpecifier:: operator=(const lldb::SBTypeNameSpecifier &rhs) { - LLDB_RECORD_METHOD( - lldb::SBTypeNameSpecifier &, - SBTypeNameSpecifier, operator=,(const lldb::SBTypeNameSpecifier &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -111,8 +103,7 @@ operator=(const lldb::SBTypeNameSpecifier &rhs) { } bool SBTypeNameSpecifier::operator==(lldb::SBTypeNameSpecifier &rhs) { - LLDB_RECORD_METHOD( - bool, SBTypeNameSpecifier, operator==,(lldb::SBTypeNameSpecifier &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -120,8 +111,7 @@ bool SBTypeNameSpecifier::operator==(lldb::SBTypeNameSpecifier &rhs) { } bool SBTypeNameSpecifier::IsEqualTo(lldb::SBTypeNameSpecifier &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeNameSpecifier, IsEqualTo, - (lldb::SBTypeNameSpecifier &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -135,8 +125,7 @@ bool SBTypeNameSpecifier::IsEqualTo(lldb::SBTypeNameSpecifier &rhs) { } bool SBTypeNameSpecifier::operator!=(lldb::SBTypeNameSpecifier &rhs) { - LLDB_RECORD_METHOD( - bool, SBTypeNameSpecifier, operator!=,(lldb::SBTypeNameSpecifier &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); diff --git a/lldb/source/API/SBTypeSummary.cpp b/lldb/source/API/SBTypeSummary.cpp index af0888078d8fc..a65dfc987ad21 100644 --- a/lldb/source/API/SBTypeSummary.cpp +++ b/lldb/source/API/SBTypeSummary.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTypeSummary.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "Utils.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBValue.h" #include "lldb/DataFormatters/DataVisualization.h" +#include "lldb/Utility/Instrumentation.h" #include "llvm/Support/Casting.h" @@ -19,15 +19,14 @@ using namespace lldb; using namespace lldb_private; SBTypeSummaryOptions::SBTypeSummaryOptions() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSummaryOptions); + LLDB_INSTRUMENT_VA(this); m_opaque_up = std::make_unique(); } SBTypeSummaryOptions::SBTypeSummaryOptions( const lldb::SBTypeSummaryOptions &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBTypeSummaryOptions, - (const lldb::SBTypeSummaryOptions &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_up = clone(rhs.m_opaque_up); } @@ -35,18 +34,17 @@ SBTypeSummaryOptions::SBTypeSummaryOptions( SBTypeSummaryOptions::~SBTypeSummaryOptions() = default; bool SBTypeSummaryOptions::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummaryOptions, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeSummaryOptions::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSummaryOptions, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up.get(); } lldb::LanguageType SBTypeSummaryOptions::GetLanguage() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::LanguageType, SBTypeSummaryOptions, - GetLanguage); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_up->GetLanguage(); @@ -54,8 +52,7 @@ lldb::LanguageType SBTypeSummaryOptions::GetLanguage() { } lldb::TypeSummaryCapping SBTypeSummaryOptions::GetCapping() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::TypeSummaryCapping, SBTypeSummaryOptions, - GetCapping); + LLDB_INSTRUMENT_VA(this); if (IsValid()) return m_opaque_up->GetCapping(); @@ -63,16 +60,14 @@ lldb::TypeSummaryCapping SBTypeSummaryOptions::GetCapping() { } void SBTypeSummaryOptions::SetLanguage(lldb::LanguageType l) { - LLDB_RECORD_METHOD(void, SBTypeSummaryOptions, SetLanguage, - (lldb::LanguageType), l); + LLDB_INSTRUMENT_VA(this, l); if (IsValid()) m_opaque_up->SetLanguage(l); } void SBTypeSummaryOptions::SetCapping(lldb::TypeSummaryCapping c) { - LLDB_RECORD_METHOD(void, SBTypeSummaryOptions, SetCapping, - (lldb::TypeSummaryCapping), c); + LLDB_INSTRUMENT_VA(this, c); if (IsValid()) m_opaque_up->SetCapping(c); @@ -102,20 +97,14 @@ const lldb_private::TypeSummaryOptions &SBTypeSummaryOptions::ref() const { SBTypeSummaryOptions::SBTypeSummaryOptions( const lldb_private::TypeSummaryOptions &lldb_object) : m_opaque_up(std::make_unique(lldb_object)) { - LLDB_RECORD_CONSTRUCTOR(SBTypeSummaryOptions, - (const lldb_private::TypeSummaryOptions &), - lldb_object); + LLDB_INSTRUMENT_VA(this, lldb_object); } -SBTypeSummary::SBTypeSummary() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSummary); -} +SBTypeSummary::SBTypeSummary() { LLDB_INSTRUMENT_VA(this); } SBTypeSummary SBTypeSummary::CreateWithSummaryString(const char *data, uint32_t options) { - LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSummary, SBTypeSummary, - CreateWithSummaryString, (const char *, uint32_t), - data, options); + LLDB_INSTRUMENT_VA(data, options); if (!data || data[0] == 0) return SBTypeSummary(); @@ -126,9 +115,7 @@ SBTypeSummary SBTypeSummary::CreateWithSummaryString(const char *data, SBTypeSummary SBTypeSummary::CreateWithFunctionName(const char *data, uint32_t options) { - LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSummary, SBTypeSummary, - CreateWithFunctionName, (const char *, uint32_t), - data, options); + LLDB_INSTRUMENT_VA(data, options); if (!data || data[0] == 0) return SBTypeSummary(); @@ -139,9 +126,7 @@ SBTypeSummary SBTypeSummary::CreateWithFunctionName(const char *data, SBTypeSummary SBTypeSummary::CreateWithScriptCode(const char *data, uint32_t options) { - LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSummary, SBTypeSummary, - CreateWithScriptCode, (const char *, uint32_t), - data, options); + LLDB_INSTRUMENT_VA(data, options); if (!data || data[0] == 0) return SBTypeSummary(); @@ -153,10 +138,7 @@ SBTypeSummary SBTypeSummary::CreateWithScriptCode(const char *data, SBTypeSummary SBTypeSummary::CreateWithCallback(FormatCallback cb, uint32_t options, const char *description) { - LLDB_RECORD_STATIC_METHOD( - lldb::SBTypeSummary, SBTypeSummary, CreateWithCallback, - (lldb::SBTypeSummary::FormatCallback, uint32_t, const char *), cb, - options, description); + LLDB_INSTRUMENT_VA(cb, options, description); SBTypeSummary retval; if (cb) { @@ -180,23 +162,23 @@ SBTypeSummary SBTypeSummary::CreateWithCallback(FormatCallback cb, SBTypeSummary::SBTypeSummary(const lldb::SBTypeSummary &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTypeSummary, (const lldb::SBTypeSummary &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBTypeSummary::~SBTypeSummary() = default; bool SBTypeSummary::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSummary, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeSummary::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSummary, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } bool SBTypeSummary::IsFunctionCode() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummary, IsFunctionCode); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -209,7 +191,7 @@ bool SBTypeSummary::IsFunctionCode() { } bool SBTypeSummary::IsFunctionName() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummary, IsFunctionName); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -222,7 +204,7 @@ bool SBTypeSummary::IsFunctionName() { } bool SBTypeSummary::IsSummaryString() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummary, IsSummaryString); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -231,7 +213,7 @@ bool SBTypeSummary::IsSummaryString() { } const char *SBTypeSummary::GetData() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeSummary, GetData); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return nullptr; @@ -249,7 +231,7 @@ const char *SBTypeSummary::GetData() { } uint32_t SBTypeSummary::GetOptions() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeSummary, GetOptions); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return lldb::eTypeOptionNone; @@ -257,7 +239,7 @@ uint32_t SBTypeSummary::GetOptions() { } void SBTypeSummary::SetOptions(uint32_t value) { - LLDB_RECORD_METHOD(void, SBTypeSummary, SetOptions, (uint32_t), value); + LLDB_INSTRUMENT_VA(this, value); if (!CopyOnWrite_Impl()) return; @@ -265,8 +247,7 @@ void SBTypeSummary::SetOptions(uint32_t value) { } void SBTypeSummary::SetSummaryString(const char *data) { - LLDB_RECORD_METHOD(void, SBTypeSummary, SetSummaryString, (const char *), - data); + LLDB_INSTRUMENT_VA(this, data); if (!IsValid()) return; @@ -278,8 +259,7 @@ void SBTypeSummary::SetSummaryString(const char *data) { } void SBTypeSummary::SetFunctionName(const char *data) { - LLDB_RECORD_METHOD(void, SBTypeSummary, SetFunctionName, (const char *), - data); + LLDB_INSTRUMENT_VA(this, data); if (!IsValid()) return; @@ -291,8 +271,7 @@ void SBTypeSummary::SetFunctionName(const char *data) { } void SBTypeSummary::SetFunctionCode(const char *data) { - LLDB_RECORD_METHOD(void, SBTypeSummary, SetFunctionCode, (const char *), - data); + LLDB_INSTRUMENT_VA(this, data); if (!IsValid()) return; @@ -305,9 +284,7 @@ void SBTypeSummary::SetFunctionCode(const char *data) { bool SBTypeSummary::GetDescription(lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeSummary, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); if (!CopyOnWrite_Impl()) return false; @@ -318,8 +295,7 @@ bool SBTypeSummary::GetDescription(lldb::SBStream &description, } bool SBTypeSummary::DoesPrintValue(lldb::SBValue value) { - LLDB_RECORD_METHOD(bool, SBTypeSummary, DoesPrintValue, (lldb::SBValue), - value); + LLDB_INSTRUMENT_VA(this, value); if (!IsValid()) return false; @@ -328,9 +304,7 @@ bool SBTypeSummary::DoesPrintValue(lldb::SBValue value) { } lldb::SBTypeSummary &SBTypeSummary::operator=(const lldb::SBTypeSummary &rhs) { - LLDB_RECORD_METHOD(lldb::SBTypeSummary &, - SBTypeSummary, operator=,(const lldb::SBTypeSummary &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -339,8 +313,7 @@ lldb::SBTypeSummary &SBTypeSummary::operator=(const lldb::SBTypeSummary &rhs) { } bool SBTypeSummary::operator==(lldb::SBTypeSummary &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeSummary, operator==,(lldb::SBTypeSummary &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -348,8 +321,7 @@ bool SBTypeSummary::operator==(lldb::SBTypeSummary &rhs) { } bool SBTypeSummary::IsEqualTo(lldb::SBTypeSummary &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeSummary, IsEqualTo, (lldb::SBTypeSummary &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (IsValid()) { // valid and invalid are different @@ -389,8 +361,7 @@ bool SBTypeSummary::IsEqualTo(lldb::SBTypeSummary &rhs) { } bool SBTypeSummary::operator!=(lldb::SBTypeSummary &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeSummary, operator!=,(lldb::SBTypeSummary &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); diff --git a/lldb/source/API/SBTypeSynthetic.cpp b/lldb/source/API/SBTypeSynthetic.cpp index 5b8a03b27077d..7258ff04745de 100644 --- a/lldb/source/API/SBTypeSynthetic.cpp +++ b/lldb/source/API/SBTypeSynthetic.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBTypeSynthetic.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBStream.h" @@ -16,15 +16,11 @@ using namespace lldb; using namespace lldb_private; -SBTypeSynthetic::SBTypeSynthetic() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSynthetic); -} +SBTypeSynthetic::SBTypeSynthetic() { LLDB_INSTRUMENT_VA(this); } SBTypeSynthetic SBTypeSynthetic::CreateWithClassName(const char *data, uint32_t options) { - LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSynthetic, SBTypeSynthetic, - CreateWithClassName, (const char *, uint32_t), data, - options); + LLDB_INSTRUMENT_VA(data, options); if (!data || data[0] == 0) return SBTypeSynthetic(); @@ -34,9 +30,7 @@ SBTypeSynthetic SBTypeSynthetic::CreateWithClassName(const char *data, SBTypeSynthetic SBTypeSynthetic::CreateWithScriptCode(const char *data, uint32_t options) { - LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSynthetic, SBTypeSynthetic, - CreateWithScriptCode, (const char *, uint32_t), - data, options); + LLDB_INSTRUMENT_VA(data, options); if (!data || data[0] == 0) return SBTypeSynthetic(); @@ -46,24 +40,23 @@ SBTypeSynthetic SBTypeSynthetic::CreateWithScriptCode(const char *data, SBTypeSynthetic::SBTypeSynthetic(const lldb::SBTypeSynthetic &rhs) : m_opaque_sp(rhs.m_opaque_sp) { - LLDB_RECORD_CONSTRUCTOR(SBTypeSynthetic, (const lldb::SBTypeSynthetic &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBTypeSynthetic::~SBTypeSynthetic() = default; bool SBTypeSynthetic::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSynthetic, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBTypeSynthetic::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSynthetic, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_sp.get() != nullptr; } bool SBTypeSynthetic::IsClassCode() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSynthetic, IsClassCode); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -72,7 +65,7 @@ bool SBTypeSynthetic::IsClassCode() { } bool SBTypeSynthetic::IsClassName() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSynthetic, IsClassName); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -80,7 +73,7 @@ bool SBTypeSynthetic::IsClassName() { } const char *SBTypeSynthetic::GetData() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeSynthetic, GetData); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return nullptr; @@ -91,21 +84,21 @@ const char *SBTypeSynthetic::GetData() { } void SBTypeSynthetic::SetClassName(const char *data) { - LLDB_RECORD_METHOD(void, SBTypeSynthetic, SetClassName, (const char *), data); + LLDB_INSTRUMENT_VA(this, data); if (IsValid() && data && *data) m_opaque_sp->SetPythonClassName(data); } void SBTypeSynthetic::SetClassCode(const char *data) { - LLDB_RECORD_METHOD(void, SBTypeSynthetic, SetClassCode, (const char *), data); + LLDB_INSTRUMENT_VA(this, data); if (IsValid() && data && *data) m_opaque_sp->SetPythonCode(data); } uint32_t SBTypeSynthetic::GetOptions() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeSynthetic, GetOptions); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return lldb::eTypeOptionNone; @@ -113,7 +106,7 @@ uint32_t SBTypeSynthetic::GetOptions() { } void SBTypeSynthetic::SetOptions(uint32_t value) { - LLDB_RECORD_METHOD(void, SBTypeSynthetic, SetOptions, (uint32_t), value); + LLDB_INSTRUMENT_VA(this, value); if (!CopyOnWrite_Impl()) return; @@ -122,9 +115,7 @@ void SBTypeSynthetic::SetOptions(uint32_t value) { bool SBTypeSynthetic::GetDescription(lldb::SBStream &description, lldb::DescriptionLevel description_level) { - LLDB_RECORD_METHOD(bool, SBTypeSynthetic, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - description_level); + LLDB_INSTRUMENT_VA(this, description, description_level); if (m_opaque_sp) { description.Printf("%s\n", m_opaque_sp->GetDescription().c_str()); @@ -135,9 +126,7 @@ bool SBTypeSynthetic::GetDescription(lldb::SBStream &description, lldb::SBTypeSynthetic &SBTypeSynthetic:: operator=(const lldb::SBTypeSynthetic &rhs) { - LLDB_RECORD_METHOD(lldb::SBTypeSynthetic &, - SBTypeSynthetic, operator=,(const lldb::SBTypeSynthetic &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { m_opaque_sp = rhs.m_opaque_sp; @@ -146,8 +135,7 @@ operator=(const lldb::SBTypeSynthetic &rhs) { } bool SBTypeSynthetic::operator==(lldb::SBTypeSynthetic &rhs) { - LLDB_RECORD_METHOD( - bool, SBTypeSynthetic, operator==,(lldb::SBTypeSynthetic &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -155,8 +143,7 @@ bool SBTypeSynthetic::operator==(lldb::SBTypeSynthetic &rhs) { } bool SBTypeSynthetic::IsEqualTo(lldb::SBTypeSynthetic &rhs) { - LLDB_RECORD_METHOD(bool, SBTypeSynthetic, IsEqualTo, - (lldb::SBTypeSynthetic &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); @@ -174,8 +161,7 @@ bool SBTypeSynthetic::IsEqualTo(lldb::SBTypeSynthetic &rhs) { } bool SBTypeSynthetic::operator!=(lldb::SBTypeSynthetic &rhs) { - LLDB_RECORD_METHOD( - bool, SBTypeSynthetic, operator!=,(lldb::SBTypeSynthetic &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (!IsValid()) return !rhs.IsValid(); diff --git a/lldb/source/API/SBUnixSignals.cpp b/lldb/source/API/SBUnixSignals.cpp index 6ee7afb04e7b6..dc7a68255d131 100644 --- a/lldb/source/API/SBUnixSignals.cpp +++ b/lldb/source/API/SBUnixSignals.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Process.h" #include "lldb/Target/UnixSignals.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/lldb-defines.h" #include "lldb/API/SBUnixSignals.h" @@ -17,13 +17,11 @@ using namespace lldb; using namespace lldb_private; -SBUnixSignals::SBUnixSignals() { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBUnixSignals); -} +SBUnixSignals::SBUnixSignals() { LLDB_INSTRUMENT_VA(this); } SBUnixSignals::SBUnixSignals(const SBUnixSignals &rhs) : m_opaque_wp(rhs.m_opaque_wp) { - LLDB_RECORD_CONSTRUCTOR(SBUnixSignals, (const lldb::SBUnixSignals &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } SBUnixSignals::SBUnixSignals(ProcessSP &process_sp) @@ -33,9 +31,7 @@ SBUnixSignals::SBUnixSignals(PlatformSP &platform_sp) : m_opaque_wp(platform_sp ? platform_sp->GetUnixSignals() : nullptr) {} const SBUnixSignals &SBUnixSignals::operator=(const SBUnixSignals &rhs) { - LLDB_RECORD_METHOD(const lldb::SBUnixSignals &, - SBUnixSignals, operator=,(const lldb::SBUnixSignals &), - rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) m_opaque_wp = rhs.m_opaque_wp; @@ -51,24 +47,23 @@ void SBUnixSignals::SetSP(const UnixSignalsSP &signals_sp) { } void SBUnixSignals::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBUnixSignals, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_wp.reset(); } bool SBUnixSignals::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBUnixSignals, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBUnixSignals::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBUnixSignals, operator bool); + LLDB_INSTRUMENT_VA(this); return static_cast(GetSP()); } const char *SBUnixSignals::GetSignalAsCString(int32_t signo) const { - LLDB_RECORD_METHOD_CONST(const char *, SBUnixSignals, GetSignalAsCString, - (int32_t), signo); + LLDB_INSTRUMENT_VA(this, signo); if (auto signals_sp = GetSP()) return signals_sp->GetSignalAsCString(signo); @@ -77,8 +72,7 @@ const char *SBUnixSignals::GetSignalAsCString(int32_t signo) const { } int32_t SBUnixSignals::GetSignalNumberFromName(const char *name) const { - LLDB_RECORD_METHOD_CONST(int32_t, SBUnixSignals, GetSignalNumberFromName, - (const char *), name); + LLDB_INSTRUMENT_VA(this, name); if (auto signals_sp = GetSP()) return signals_sp->GetSignalNumberFromName(name); @@ -87,8 +81,7 @@ int32_t SBUnixSignals::GetSignalNumberFromName(const char *name) const { } bool SBUnixSignals::GetShouldSuppress(int32_t signo) const { - LLDB_RECORD_METHOD_CONST(bool, SBUnixSignals, GetShouldSuppress, (int32_t), - signo); + LLDB_INSTRUMENT_VA(this, signo); if (auto signals_sp = GetSP()) return signals_sp->GetShouldSuppress(signo); @@ -97,8 +90,7 @@ bool SBUnixSignals::GetShouldSuppress(int32_t signo) const { } bool SBUnixSignals::SetShouldSuppress(int32_t signo, bool value) { - LLDB_RECORD_METHOD(bool, SBUnixSignals, SetShouldSuppress, (int32_t, bool), - signo, value); + LLDB_INSTRUMENT_VA(this, signo, value); auto signals_sp = GetSP(); @@ -109,8 +101,7 @@ bool SBUnixSignals::SetShouldSuppress(int32_t signo, bool value) { } bool SBUnixSignals::GetShouldStop(int32_t signo) const { - LLDB_RECORD_METHOD_CONST(bool, SBUnixSignals, GetShouldStop, (int32_t), - signo); + LLDB_INSTRUMENT_VA(this, signo); if (auto signals_sp = GetSP()) return signals_sp->GetShouldStop(signo); @@ -119,8 +110,7 @@ bool SBUnixSignals::GetShouldStop(int32_t signo) const { } bool SBUnixSignals::SetShouldStop(int32_t signo, bool value) { - LLDB_RECORD_METHOD(bool, SBUnixSignals, SetShouldStop, (int32_t, bool), signo, - value); + LLDB_INSTRUMENT_VA(this, signo, value); auto signals_sp = GetSP(); @@ -131,8 +121,7 @@ bool SBUnixSignals::SetShouldStop(int32_t signo, bool value) { } bool SBUnixSignals::GetShouldNotify(int32_t signo) const { - LLDB_RECORD_METHOD_CONST(bool, SBUnixSignals, GetShouldNotify, (int32_t), - signo); + LLDB_INSTRUMENT_VA(this, signo); if (auto signals_sp = GetSP()) return signals_sp->GetShouldNotify(signo); @@ -141,8 +130,7 @@ bool SBUnixSignals::GetShouldNotify(int32_t signo) const { } bool SBUnixSignals::SetShouldNotify(int32_t signo, bool value) { - LLDB_RECORD_METHOD(bool, SBUnixSignals, SetShouldNotify, (int32_t, bool), - signo, value); + LLDB_INSTRUMENT_VA(this, signo, value); auto signals_sp = GetSP(); @@ -153,7 +141,7 @@ bool SBUnixSignals::SetShouldNotify(int32_t signo, bool value) { } int32_t SBUnixSignals::GetNumSignals() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(int32_t, SBUnixSignals, GetNumSignals); + LLDB_INSTRUMENT_VA(this); if (auto signals_sp = GetSP()) return signals_sp->GetNumSignals(); @@ -162,8 +150,7 @@ int32_t SBUnixSignals::GetNumSignals() const { } int32_t SBUnixSignals::GetSignalAtIndex(int32_t index) const { - LLDB_RECORD_METHOD_CONST(int32_t, SBUnixSignals, GetSignalAtIndex, (int32_t), - index); + LLDB_INSTRUMENT_VA(this, index); if (auto signals_sp = GetSP()) return signals_sp->GetSignalAtIndex(index); diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp index 151ccba8381e1..20581cfabdd66 100644 --- a/lldb/source/API/SBValue.cpp +++ b/lldb/source/API/SBValue.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBValue.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBDeclaration.h" #include "lldb/API/SBStream.h" @@ -215,23 +215,22 @@ class ValueLocker { Status m_lock_error; }; -SBValue::SBValue() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValue); } +SBValue::SBValue() { LLDB_INSTRUMENT_VA(this); } SBValue::SBValue(const lldb::ValueObjectSP &value_sp) { - LLDB_RECORD_CONSTRUCTOR(SBValue, (const lldb::ValueObjectSP &), value_sp); + LLDB_INSTRUMENT_VA(this, value_sp); SetSP(value_sp); } SBValue::SBValue(const SBValue &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBValue, (const lldb::SBValue &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); SetSP(rhs.m_opaque_sp); } SBValue &SBValue::operator=(const SBValue &rhs) { - LLDB_RECORD_METHOD(lldb::SBValue &, - SBValue, operator=,(const lldb::SBValue &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { SetSP(rhs.m_opaque_sp); @@ -242,11 +241,11 @@ SBValue &SBValue::operator=(const SBValue &rhs) { SBValue::~SBValue() = default; bool SBValue::IsValid() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBValue::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBValue, operator bool); + LLDB_INSTRUMENT_VA(this); // If this function ever changes to anything that does more than just check // if the opaque shared pointer is non NULL, then we need to update all "if @@ -256,13 +255,13 @@ SBValue::operator bool() const { } void SBValue::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBValue, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_sp.reset(); } SBError SBValue::GetError() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBValue, GetError); + LLDB_INSTRUMENT_VA(this); SBError sb_error; @@ -278,7 +277,7 @@ SBError SBValue::GetError() { } user_id_t SBValue::GetID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::user_id_t, SBValue, GetID); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -288,7 +287,7 @@ user_id_t SBValue::GetID() { } const char *SBValue::GetName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; ValueLocker locker; @@ -300,7 +299,7 @@ const char *SBValue::GetName() { } const char *SBValue::GetTypeName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetTypeName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; ValueLocker locker; @@ -313,7 +312,7 @@ const char *SBValue::GetTypeName() { } const char *SBValue::GetDisplayTypeName() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetDisplayTypeName); + LLDB_INSTRUMENT_VA(this); const char *name = nullptr; ValueLocker locker; @@ -326,7 +325,7 @@ const char *SBValue::GetDisplayTypeName() { } size_t SBValue::GetByteSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBValue, GetByteSize); + LLDB_INSTRUMENT_VA(this); size_t result = 0; @@ -340,7 +339,7 @@ size_t SBValue::GetByteSize() { } bool SBValue::IsInScope() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsInScope); + LLDB_INSTRUMENT_VA(this); bool result = false; @@ -354,7 +353,7 @@ bool SBValue::IsInScope() { } const char *SBValue::GetValue() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetValue); + LLDB_INSTRUMENT_VA(this); const char *cstr = nullptr; ValueLocker locker; @@ -367,7 +366,7 @@ const char *SBValue::GetValue() { } ValueType SBValue::GetValueType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::ValueType, SBValue, GetValueType); + LLDB_INSTRUMENT_VA(this); ValueType result = eValueTypeInvalid; ValueLocker locker; @@ -379,7 +378,7 @@ ValueType SBValue::GetValueType() { } const char *SBValue::GetObjectDescription() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetObjectDescription); + LLDB_INSTRUMENT_VA(this); const char *cstr = nullptr; ValueLocker locker; @@ -392,7 +391,7 @@ const char *SBValue::GetObjectDescription() { } SBType SBValue::GetType() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBValue, GetType); + LLDB_INSTRUMENT_VA(this); SBType sb_type; ValueLocker locker; @@ -407,7 +406,7 @@ SBType SBValue::GetType() { } bool SBValue::GetValueDidChange() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, GetValueDidChange); + LLDB_INSTRUMENT_VA(this); bool result = false; ValueLocker locker; @@ -421,7 +420,7 @@ bool SBValue::GetValueDidChange() { } const char *SBValue::GetSummary() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetSummary); + LLDB_INSTRUMENT_VA(this); const char *cstr = nullptr; ValueLocker locker; @@ -435,9 +434,7 @@ const char *SBValue::GetSummary() { const char *SBValue::GetSummary(lldb::SBStream &stream, lldb::SBTypeSummaryOptions &options) { - LLDB_RECORD_METHOD(const char *, SBValue, GetSummary, - (lldb::SBStream &, lldb::SBTypeSummaryOptions &), stream, - options); + LLDB_INSTRUMENT_VA(this, stream, options); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -451,7 +448,7 @@ const char *SBValue::GetSummary(lldb::SBStream &stream, } const char *SBValue::GetLocation() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetLocation); + LLDB_INSTRUMENT_VA(this); const char *cstr = nullptr; ValueLocker locker; @@ -464,16 +461,14 @@ const char *SBValue::GetLocation() { // Deprecated - use the one that takes an lldb::SBError bool SBValue::SetValueFromCString(const char *value_str) { - LLDB_RECORD_METHOD(bool, SBValue, SetValueFromCString, (const char *), - value_str); + LLDB_INSTRUMENT_VA(this, value_str); lldb::SBError dummy; return SetValueFromCString(value_str, dummy); } bool SBValue::SetValueFromCString(const char *value_str, lldb::SBError &error) { - LLDB_RECORD_METHOD(bool, SBValue, SetValueFromCString, - (const char *, lldb::SBError &), value_str, error); + LLDB_INSTRUMENT_VA(this, value_str, error); bool success = false; ValueLocker locker; @@ -488,7 +483,7 @@ bool SBValue::SetValueFromCString(const char *value_str, lldb::SBError &error) { } lldb::SBTypeFormat SBValue::GetTypeFormat() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeFormat, SBValue, GetTypeFormat); + LLDB_INSTRUMENT_VA(this); lldb::SBTypeFormat format; ValueLocker locker; @@ -504,7 +499,7 @@ lldb::SBTypeFormat SBValue::GetTypeFormat() { } lldb::SBTypeSummary SBValue::GetTypeSummary() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeSummary, SBValue, GetTypeSummary); + LLDB_INSTRUMENT_VA(this); lldb::SBTypeSummary summary; ValueLocker locker; @@ -520,7 +515,7 @@ lldb::SBTypeSummary SBValue::GetTypeSummary() { } lldb::SBTypeFilter SBValue::GetTypeFilter() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeFilter, SBValue, GetTypeFilter); + LLDB_INSTRUMENT_VA(this); lldb::SBTypeFilter filter; ValueLocker locker; @@ -540,7 +535,7 @@ lldb::SBTypeFilter SBValue::GetTypeFilter() { } lldb::SBTypeSynthetic SBValue::GetTypeSynthetic() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeSynthetic, SBValue, GetTypeSynthetic); + LLDB_INSTRUMENT_VA(this); lldb::SBTypeSynthetic synthetic; ValueLocker locker; @@ -561,9 +556,7 @@ lldb::SBTypeSynthetic SBValue::GetTypeSynthetic() { lldb::SBValue SBValue::CreateChildAtOffset(const char *name, uint32_t offset, SBType type) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateChildAtOffset, - (const char *, uint32_t, lldb::SBType), name, offset, - type); + LLDB_INSTRUMENT_VA(this, name, offset, type); lldb::SBValue sb_value; ValueLocker locker; @@ -581,7 +574,7 @@ lldb::SBValue SBValue::CreateChildAtOffset(const char *name, uint32_t offset, } lldb::SBValue SBValue::Cast(SBType type) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, Cast, (lldb::SBType), type); + LLDB_INSTRUMENT_VA(this, type); lldb::SBValue sb_value; ValueLocker locker; @@ -595,8 +588,7 @@ lldb::SBValue SBValue::Cast(SBType type) { lldb::SBValue SBValue::CreateValueFromExpression(const char *name, const char *expression) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromExpression, - (const char *, const char *), name, expression); + LLDB_INSTRUMENT_VA(this, name, expression); SBExpressionOptions options; options.ref().SetKeepInMemory(true); @@ -606,9 +598,7 @@ lldb::SBValue SBValue::CreateValueFromExpression(const char *name, lldb::SBValue SBValue::CreateValueFromExpression(const char *name, const char *expression, SBExpressionOptions &options) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromExpression, - (const char *, const char *, lldb::SBExpressionOptions &), - name, expression, options); + LLDB_INSTRUMENT_VA(this, name, expression, options); lldb::SBValue sb_value; ValueLocker locker; @@ -628,9 +618,7 @@ lldb::SBValue SBValue::CreateValueFromExpression(const char *name, lldb::SBValue SBValue::CreateValueFromAddress(const char *name, lldb::addr_t address, SBType sb_type) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromAddress, - (const char *, lldb::addr_t, lldb::SBType), name, address, - sb_type); + LLDB_INSTRUMENT_VA(this, name, address, sb_type); lldb::SBValue sb_value; ValueLocker locker; @@ -649,9 +637,7 @@ lldb::SBValue SBValue::CreateValueFromAddress(const char *name, lldb::SBValue SBValue::CreateValueFromData(const char *name, SBData data, SBType sb_type) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromData, - (const char *, lldb::SBData, lldb::SBType), name, data, - sb_type); + LLDB_INSTRUMENT_VA(this, name, data, sb_type); lldb::SBValue sb_value; lldb::ValueObjectSP new_value_sp; @@ -669,7 +655,7 @@ lldb::SBValue SBValue::CreateValueFromData(const char *name, SBData data, } SBValue SBValue::GetChildAtIndex(uint32_t idx) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildAtIndex, (uint32_t), idx); + LLDB_INSTRUMENT_VA(this, idx); const bool can_create_synthetic = false; lldb::DynamicValueType use_dynamic = eNoDynamicValues; @@ -686,9 +672,7 @@ SBValue SBValue::GetChildAtIndex(uint32_t idx) { SBValue SBValue::GetChildAtIndex(uint32_t idx, lldb::DynamicValueType use_dynamic, bool can_create_synthetic) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildAtIndex, - (uint32_t, lldb::DynamicValueType, bool), idx, use_dynamic, - can_create_synthetic); + LLDB_INSTRUMENT_VA(this, idx, use_dynamic, can_create_synthetic); lldb::ValueObjectSP child_sp; @@ -709,8 +693,7 @@ SBValue SBValue::GetChildAtIndex(uint32_t idx, } uint32_t SBValue::GetIndexOfChildWithName(const char *name) { - LLDB_RECORD_METHOD(uint32_t, SBValue, GetIndexOfChildWithName, (const char *), - name); + LLDB_INSTRUMENT_VA(this, name); uint32_t idx = UINT32_MAX; ValueLocker locker; @@ -722,8 +705,7 @@ uint32_t SBValue::GetIndexOfChildWithName(const char *name) { } SBValue SBValue::GetChildMemberWithName(const char *name) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildMemberWithName, - (const char *), name); + LLDB_INSTRUMENT_VA(this, name); lldb::DynamicValueType use_dynamic_value = eNoDynamicValues; TargetSP target_sp; @@ -738,9 +720,7 @@ SBValue SBValue::GetChildMemberWithName(const char *name) { SBValue SBValue::GetChildMemberWithName(const char *name, lldb::DynamicValueType use_dynamic_value) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildMemberWithName, - (const char *, lldb::DynamicValueType), name, - use_dynamic_value); + LLDB_INSTRUMENT_VA(this, name, use_dynamic_value); lldb::ValueObjectSP child_sp; const ConstString str_name(name); @@ -758,8 +738,7 @@ SBValue::GetChildMemberWithName(const char *name, } lldb::SBValue SBValue::GetDynamicValue(lldb::DynamicValueType use_dynamic) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetDynamicValue, - (lldb::DynamicValueType), use_dynamic); + LLDB_INSTRUMENT_VA(this, use_dynamic); SBValue value_sb; if (IsValid()) { @@ -771,7 +750,7 @@ lldb::SBValue SBValue::GetDynamicValue(lldb::DynamicValueType use_dynamic) { } lldb::SBValue SBValue::GetStaticValue() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, GetStaticValue); + LLDB_INSTRUMENT_VA(this); SBValue value_sb; if (IsValid()) { @@ -784,7 +763,7 @@ lldb::SBValue SBValue::GetStaticValue() { } lldb::SBValue SBValue::GetNonSyntheticValue() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, GetNonSyntheticValue); + LLDB_INSTRUMENT_VA(this); SBValue value_sb; if (IsValid()) { @@ -796,8 +775,7 @@ lldb::SBValue SBValue::GetNonSyntheticValue() { } lldb::DynamicValueType SBValue::GetPreferDynamicValue() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::DynamicValueType, SBValue, - GetPreferDynamicValue); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return eNoDynamicValues; @@ -805,15 +783,14 @@ lldb::DynamicValueType SBValue::GetPreferDynamicValue() { } void SBValue::SetPreferDynamicValue(lldb::DynamicValueType use_dynamic) { - LLDB_RECORD_METHOD(void, SBValue, SetPreferDynamicValue, - (lldb::DynamicValueType), use_dynamic); + LLDB_INSTRUMENT_VA(this, use_dynamic); if (IsValid()) return m_opaque_sp->SetUseDynamic(use_dynamic); } bool SBValue::GetPreferSyntheticValue() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, GetPreferSyntheticValue); + LLDB_INSTRUMENT_VA(this); if (!IsValid()) return false; @@ -821,15 +798,14 @@ bool SBValue::GetPreferSyntheticValue() { } void SBValue::SetPreferSyntheticValue(bool use_synthetic) { - LLDB_RECORD_METHOD(void, SBValue, SetPreferSyntheticValue, (bool), - use_synthetic); + LLDB_INSTRUMENT_VA(this, use_synthetic); if (IsValid()) return m_opaque_sp->SetUseSynthetic(use_synthetic); } bool SBValue::IsDynamic() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsDynamic); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -839,7 +815,7 @@ bool SBValue::IsDynamic() { } bool SBValue::IsSynthetic() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsSynthetic); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -849,7 +825,7 @@ bool SBValue::IsSynthetic() { } bool SBValue::IsSyntheticChildrenGenerated() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsSyntheticChildrenGenerated); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -859,7 +835,7 @@ bool SBValue::IsSyntheticChildrenGenerated() { } void SBValue::SetSyntheticChildrenGenerated(bool is) { - LLDB_RECORD_METHOD(void, SBValue, SetSyntheticChildrenGenerated, (bool), is); + LLDB_INSTRUMENT_VA(this, is); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -868,8 +844,7 @@ void SBValue::SetSyntheticChildrenGenerated(bool is) { } lldb::SBValue SBValue::GetValueForExpressionPath(const char *expr_path) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetValueForExpressionPath, - (const char *), expr_path); + LLDB_INSTRUMENT_VA(this, expr_path); lldb::ValueObjectSP child_sp; ValueLocker locker; @@ -886,8 +861,7 @@ lldb::SBValue SBValue::GetValueForExpressionPath(const char *expr_path) { } int64_t SBValue::GetValueAsSigned(SBError &error, int64_t fail_value) { - LLDB_RECORD_METHOD(int64_t, SBValue, GetValueAsSigned, - (lldb::SBError &, int64_t), error, fail_value); + LLDB_INSTRUMENT_VA(this, error, fail_value); error.Clear(); ValueLocker locker; @@ -907,8 +881,7 @@ int64_t SBValue::GetValueAsSigned(SBError &error, int64_t fail_value) { } uint64_t SBValue::GetValueAsUnsigned(SBError &error, uint64_t fail_value) { - LLDB_RECORD_METHOD(uint64_t, SBValue, GetValueAsUnsigned, - (lldb::SBError &, uint64_t), error, fail_value); + LLDB_INSTRUMENT_VA(this, error, fail_value); error.Clear(); ValueLocker locker; @@ -928,7 +901,7 @@ uint64_t SBValue::GetValueAsUnsigned(SBError &error, uint64_t fail_value) { } int64_t SBValue::GetValueAsSigned(int64_t fail_value) { - LLDB_RECORD_METHOD(int64_t, SBValue, GetValueAsSigned, (int64_t), fail_value); + LLDB_INSTRUMENT_VA(this, fail_value); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -939,8 +912,7 @@ int64_t SBValue::GetValueAsSigned(int64_t fail_value) { } uint64_t SBValue::GetValueAsUnsigned(uint64_t fail_value) { - LLDB_RECORD_METHOD(uint64_t, SBValue, GetValueAsUnsigned, (uint64_t), - fail_value); + LLDB_INSTRUMENT_VA(this, fail_value); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -951,7 +923,7 @@ uint64_t SBValue::GetValueAsUnsigned(uint64_t fail_value) { } bool SBValue::MightHaveChildren() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, MightHaveChildren); + LLDB_INSTRUMENT_VA(this); bool has_children = false; ValueLocker locker; @@ -963,7 +935,7 @@ bool SBValue::MightHaveChildren() { } bool SBValue::IsRuntimeSupportValue() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsRuntimeSupportValue); + LLDB_INSTRUMENT_VA(this); bool is_support = false; ValueLocker locker; @@ -975,13 +947,13 @@ bool SBValue::IsRuntimeSupportValue() { } uint32_t SBValue::GetNumChildren() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBValue, GetNumChildren); + LLDB_INSTRUMENT_VA(this); return GetNumChildren(UINT32_MAX); } uint32_t SBValue::GetNumChildren(uint32_t max) { - LLDB_RECORD_METHOD(uint32_t, SBValue, GetNumChildren, (uint32_t), max); + LLDB_INSTRUMENT_VA(this, max); uint32_t num_children = 0; @@ -994,7 +966,7 @@ uint32_t SBValue::GetNumChildren(uint32_t max) { } SBValue SBValue::Dereference() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, Dereference); + LLDB_INSTRUMENT_VA(this); SBValue sb_value; ValueLocker locker; @@ -1009,13 +981,13 @@ SBValue SBValue::Dereference() { // Deprecated - please use GetType().IsPointerType() instead. bool SBValue::TypeIsPointerType() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, TypeIsPointerType); + LLDB_INSTRUMENT_VA(this); return GetType().IsPointerType(); } void *SBValue::GetOpaqueType() { - LLDB_RECORD_METHOD_NO_ARGS(void *, SBValue, GetOpaqueType); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1025,7 +997,7 @@ void *SBValue::GetOpaqueType() { } lldb::SBTarget SBValue::GetTarget() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTarget, SBValue, GetTarget); + LLDB_INSTRUMENT_VA(this); SBTarget sb_target; TargetSP target_sp; @@ -1038,7 +1010,7 @@ lldb::SBTarget SBValue::GetTarget() { } lldb::SBProcess SBValue::GetProcess() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBValue, GetProcess); + LLDB_INSTRUMENT_VA(this); SBProcess sb_process; ProcessSP process_sp; @@ -1051,7 +1023,7 @@ lldb::SBProcess SBValue::GetProcess() { } lldb::SBThread SBValue::GetThread() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBThread, SBValue, GetThread); + LLDB_INSTRUMENT_VA(this); SBThread sb_thread; ThreadSP thread_sp; @@ -1064,7 +1036,7 @@ lldb::SBThread SBValue::GetThread() { } lldb::SBFrame SBValue::GetFrame() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFrame, SBValue, GetFrame); + LLDB_INSTRUMENT_VA(this); SBFrame sb_frame; StackFrameSP frame_sp; @@ -1085,7 +1057,7 @@ lldb::ValueObjectSP SBValue::GetSP(ValueLocker &locker) const { } lldb::ValueObjectSP SBValue::GetSP() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ValueObjectSP, SBValue, GetSP); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; return GetSP(locker); @@ -1146,8 +1118,7 @@ void SBValue::SetSP(const lldb::ValueObjectSP &sp, } bool SBValue::GetExpressionPath(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBValue, GetExpressionPath, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1160,8 +1131,7 @@ bool SBValue::GetExpressionPath(SBStream &description) { bool SBValue::GetExpressionPath(SBStream &description, bool qualify_cxx_base_classes) { - LLDB_RECORD_METHOD(bool, SBValue, GetExpressionPath, (lldb::SBStream &, bool), - description, qualify_cxx_base_classes); + LLDB_INSTRUMENT_VA(this, description, qualify_cxx_base_classes); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1173,8 +1143,7 @@ bool SBValue::GetExpressionPath(SBStream &description, } lldb::SBValue SBValue::EvaluateExpression(const char *expr) const { - LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValue, EvaluateExpression, - (const char *), expr); + LLDB_INSTRUMENT_VA(this, expr); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1196,9 +1165,7 @@ lldb::SBValue SBValue::EvaluateExpression(const char *expr) const { lldb::SBValue SBValue::EvaluateExpression(const char *expr, const SBExpressionOptions &options) const { - LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValue, EvaluateExpression, - (const char *, const lldb::SBExpressionOptions &), - expr, options); + LLDB_INSTRUMENT_VA(this, expr, options); return EvaluateExpression(expr, options, nullptr); } @@ -1206,11 +1173,7 @@ SBValue::EvaluateExpression(const char *expr, lldb::SBValue SBValue::EvaluateExpression(const char *expr, const SBExpressionOptions &options, const char *name) const { - LLDB_RECORD_METHOD_CONST( - lldb::SBValue, SBValue, EvaluateExpression, - (const char *, const lldb::SBExpressionOptions &, const char *), expr, - options, name); - + LLDB_INSTRUMENT_VA(this, expr, options, name); if (!expr || expr[0] == '\0') { return SBValue(); @@ -1249,8 +1212,7 @@ lldb::SBValue SBValue::EvaluateExpression(const char *expr, } bool SBValue::GetDescription(SBStream &description) { - LLDB_RECORD_METHOD(bool, SBValue, GetDescription, (lldb::SBStream &), - description); + LLDB_INSTRUMENT_VA(this, description); Stream &strm = description.ref(); @@ -1265,7 +1227,7 @@ bool SBValue::GetDescription(SBStream &description) { } lldb::Format SBValue::GetFormat() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::Format, SBValue, GetFormat); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1275,7 +1237,7 @@ lldb::Format SBValue::GetFormat() { } void SBValue::SetFormat(lldb::Format format) { - LLDB_RECORD_METHOD(void, SBValue, SetFormat, (lldb::Format), format); + LLDB_INSTRUMENT_VA(this, format); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1284,7 +1246,7 @@ void SBValue::SetFormat(lldb::Format format) { } lldb::SBValue SBValue::AddressOf() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, AddressOf); + LLDB_INSTRUMENT_VA(this); SBValue sb_value; ValueLocker locker; @@ -1299,7 +1261,7 @@ lldb::SBValue SBValue::AddressOf() { } lldb::addr_t SBValue::GetLoadAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBValue, GetLoadAddress); + LLDB_INSTRUMENT_VA(this); lldb::addr_t value = LLDB_INVALID_ADDRESS; ValueLocker locker; @@ -1329,7 +1291,7 @@ lldb::addr_t SBValue::GetLoadAddress() { } lldb::SBAddress SBValue::GetAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBValue, GetAddress); + LLDB_INSTRUMENT_VA(this); Address addr; ValueLocker locker; @@ -1358,8 +1320,7 @@ lldb::SBAddress SBValue::GetAddress() { } lldb::SBData SBValue::GetPointeeData(uint32_t item_idx, uint32_t item_count) { - LLDB_RECORD_METHOD(lldb::SBData, SBValue, GetPointeeData, - (uint32_t, uint32_t), item_idx, item_count); + LLDB_INSTRUMENT_VA(this, item_idx, item_count); lldb::SBData sb_data; ValueLocker locker; @@ -1378,7 +1339,7 @@ lldb::SBData SBValue::GetPointeeData(uint32_t item_idx, uint32_t item_count) { } lldb::SBData SBValue::GetData() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBData, SBValue, GetData); + LLDB_INSTRUMENT_VA(this); lldb::SBData sb_data; ValueLocker locker; @@ -1395,8 +1356,7 @@ lldb::SBData SBValue::GetData() { } bool SBValue::SetData(lldb::SBData &data, SBError &error) { - LLDB_RECORD_METHOD(bool, SBValue, SetData, (lldb::SBData &, lldb::SBError &), - data, error); + LLDB_INSTRUMENT_VA(this, data, error); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1430,7 +1390,7 @@ bool SBValue::SetData(lldb::SBData &data, SBError &error) { } lldb::SBValue SBValue::Clone(const char *new_name) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValue, Clone, (const char *), new_name); + LLDB_INSTRUMENT_VA(this, new_name); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1442,7 +1402,7 @@ lldb::SBValue SBValue::Clone(const char *new_name) { } lldb::SBDeclaration SBValue::GetDeclaration() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBDeclaration, SBValue, GetDeclaration); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); @@ -1457,9 +1417,7 @@ lldb::SBDeclaration SBValue::GetDeclaration() { lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read, bool write, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBValue, Watch, - (bool, bool, bool, lldb::SBError &), resolve_location, - read, write, error); + LLDB_INSTRUMENT_VA(this, resolve_location, read, write, error); SBWatchpoint sb_watchpoint; @@ -1522,8 +1480,7 @@ lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read, bool write, // Backward compatibility fix in the interim. lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read, bool write) { - LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBValue, Watch, (bool, bool, bool), - resolve_location, read, write); + LLDB_INSTRUMENT_VA(this, resolve_location, read, write); SBError error; return Watch(resolve_location, read, write, error); @@ -1531,9 +1488,7 @@ lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read, lldb::SBWatchpoint SBValue::WatchPointee(bool resolve_location, bool read, bool write, SBError &error) { - LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBValue, WatchPointee, - (bool, bool, bool, lldb::SBError &), resolve_location, - read, write, error); + LLDB_INSTRUMENT_VA(this, resolve_location, read, write, error); SBWatchpoint sb_watchpoint; if (IsInScope() && GetType().IsPointerType()) @@ -1542,7 +1497,7 @@ lldb::SBWatchpoint SBValue::WatchPointee(bool resolve_location, bool read, } lldb::SBValue SBValue::Persist() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, Persist); + LLDB_INSTRUMENT_VA(this); ValueLocker locker; lldb::ValueObjectSP value_sp(GetSP(locker)); diff --git a/lldb/source/API/SBValueList.cpp b/lldb/source/API/SBValueList.cpp index 797c615ce585a..a67030c506f41 100644 --- a/lldb/source/API/SBValueList.cpp +++ b/lldb/source/API/SBValueList.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBValueList.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBStream.h" #include "lldb/API/SBValue.h" #include "lldb/Core/ValueObjectList.h" +#include "lldb/Utility/Instrumentation.h" #include @@ -67,10 +67,10 @@ class ValueListImpl { std::vector m_values; }; -SBValueList::SBValueList() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValueList); } +SBValueList::SBValueList() { LLDB_INSTRUMENT_VA(this); } SBValueList::SBValueList(const SBValueList &rhs) { - LLDB_RECORD_CONSTRUCTOR(SBValueList, (const lldb::SBValueList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (rhs.IsValid()) m_opaque_up = std::make_unique(*rhs); @@ -84,24 +84,23 @@ SBValueList::SBValueList(const ValueListImpl *lldb_object_ptr) { SBValueList::~SBValueList() = default; bool SBValueList::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBValueList, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBValueList::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBValueList, operator bool); + LLDB_INSTRUMENT_VA(this); return (m_opaque_up != nullptr); } void SBValueList::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBValueList, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_up.reset(); } const SBValueList &SBValueList::operator=(const SBValueList &rhs) { - LLDB_RECORD_METHOD(const lldb::SBValueList &, - SBValueList, operator=,(const lldb::SBValueList &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); if (this != &rhs) { if (rhs.IsValid()) @@ -123,8 +122,7 @@ const ValueListImpl *SBValueList::operator->() const { const ValueListImpl &SBValueList::operator*() const { return *m_opaque_up; } void SBValueList::Append(const SBValue &val_obj) { - LLDB_RECORD_METHOD(void, SBValueList, Append, (const lldb::SBValue &), - val_obj); + LLDB_INSTRUMENT_VA(this, val_obj); CreateIfNeeded(); m_opaque_up->Append(val_obj); @@ -138,8 +136,7 @@ void SBValueList::Append(lldb::ValueObjectSP &val_obj_sp) { } void SBValueList::Append(const lldb::SBValueList &value_list) { - LLDB_RECORD_METHOD(void, SBValueList, Append, (const lldb::SBValueList &), - value_list); + LLDB_INSTRUMENT_VA(this, value_list); if (value_list.IsValid()) { CreateIfNeeded(); @@ -148,9 +145,7 @@ void SBValueList::Append(const lldb::SBValueList &value_list) { } SBValue SBValueList::GetValueAtIndex(uint32_t idx) const { - LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValueList, GetValueAtIndex, - (uint32_t), idx); - + LLDB_INSTRUMENT_VA(this, idx); SBValue sb_value; if (m_opaque_up) @@ -160,7 +155,7 @@ SBValue SBValueList::GetValueAtIndex(uint32_t idx) const { } uint32_t SBValueList::GetSize() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBValueList, GetSize); + LLDB_INSTRUMENT_VA(this); uint32_t size = 0; if (m_opaque_up) @@ -175,8 +170,7 @@ void SBValueList::CreateIfNeeded() { } SBValue SBValueList::FindValueObjectByUID(lldb::user_id_t uid) { - LLDB_RECORD_METHOD(lldb::SBValue, SBValueList, FindValueObjectByUID, - (lldb::user_id_t), uid); + LLDB_INSTRUMENT_VA(this, uid); SBValue sb_value; if (m_opaque_up) @@ -185,8 +179,7 @@ SBValue SBValueList::FindValueObjectByUID(lldb::user_id_t uid) { } SBValue SBValueList::GetFirstValueByName(const char *name) const { - LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValueList, GetFirstValueByName, - (const char *), name); + LLDB_INSTRUMENT_VA(this, name); SBValue sb_value; if (m_opaque_up) diff --git a/lldb/source/API/SBVariablesOptions.cpp b/lldb/source/API/SBVariablesOptions.cpp index 4057e283e2b20..989d159139cca 100644 --- a/lldb/source/API/SBVariablesOptions.cpp +++ b/lldb/source/API/SBVariablesOptions.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBVariablesOptions.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBTarget.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/lldb-private.h" @@ -80,21 +80,17 @@ class VariablesOptionsImpl { SBVariablesOptions::SBVariablesOptions() : m_opaque_up(new VariablesOptionsImpl()) { - LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBVariablesOptions); + LLDB_INSTRUMENT_VA(this); } SBVariablesOptions::SBVariablesOptions(const SBVariablesOptions &options) : m_opaque_up(new VariablesOptionsImpl(options.ref())) { - LLDB_RECORD_CONSTRUCTOR(SBVariablesOptions, - (const lldb::SBVariablesOptions &), options); + LLDB_INSTRUMENT_VA(this, options); } SBVariablesOptions &SBVariablesOptions:: operator=(const SBVariablesOptions &options) { - LLDB_RECORD_METHOD( - lldb::SBVariablesOptions &, - SBVariablesOptions, operator=,(const lldb::SBVariablesOptions &), - options); + LLDB_INSTRUMENT_VA(this, options); m_opaque_up = std::make_unique(options.ref()); return *this; @@ -103,109 +99,97 @@ operator=(const SBVariablesOptions &options) { SBVariablesOptions::~SBVariablesOptions() = default; bool SBVariablesOptions::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBVariablesOptions::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, operator bool); + LLDB_INSTRUMENT_VA(this); return m_opaque_up != nullptr; } bool SBVariablesOptions::GetIncludeArguments() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, - GetIncludeArguments); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetIncludeArguments(); } void SBVariablesOptions::SetIncludeArguments(bool arguments) { - LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeArguments, (bool), - arguments); + LLDB_INSTRUMENT_VA(this, arguments); m_opaque_up->SetIncludeArguments(arguments); } bool SBVariablesOptions::GetIncludeRecognizedArguments( const lldb::SBTarget &target) const { - LLDB_RECORD_METHOD_CONST(bool, SBVariablesOptions, - GetIncludeRecognizedArguments, - (const lldb::SBTarget &), target); + LLDB_INSTRUMENT_VA(this, target); return m_opaque_up->GetIncludeRecognizedArguments(target.GetSP()); } void SBVariablesOptions::SetIncludeRecognizedArguments(bool arguments) { - LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeRecognizedArguments, - (bool), arguments); + LLDB_INSTRUMENT_VA(this, arguments); m_opaque_up->SetIncludeRecognizedArguments(arguments); } bool SBVariablesOptions::GetIncludeLocals() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, GetIncludeLocals); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetIncludeLocals(); } void SBVariablesOptions::SetIncludeLocals(bool locals) { - LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeLocals, (bool), - locals); + LLDB_INSTRUMENT_VA(this, locals); m_opaque_up->SetIncludeLocals(locals); } bool SBVariablesOptions::GetIncludeStatics() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, GetIncludeStatics); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetIncludeStatics(); } void SBVariablesOptions::SetIncludeStatics(bool statics) { - LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeStatics, (bool), - statics); + LLDB_INSTRUMENT_VA(this, statics); m_opaque_up->SetIncludeStatics(statics); } bool SBVariablesOptions::GetInScopeOnly() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, GetInScopeOnly); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetInScopeOnly(); } void SBVariablesOptions::SetInScopeOnly(bool in_scope_only) { - LLDB_RECORD_METHOD(void, SBVariablesOptions, SetInScopeOnly, (bool), - in_scope_only); + LLDB_INSTRUMENT_VA(this, in_scope_only); m_opaque_up->SetInScopeOnly(in_scope_only); } bool SBVariablesOptions::GetIncludeRuntimeSupportValues() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, - GetIncludeRuntimeSupportValues); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetIncludeRuntimeSupportValues(); } void SBVariablesOptions::SetIncludeRuntimeSupportValues( bool runtime_support_values) { - LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeRuntimeSupportValues, - (bool), runtime_support_values); + LLDB_INSTRUMENT_VA(this, runtime_support_values); m_opaque_up->SetIncludeRuntimeSupportValues(runtime_support_values); } lldb::DynamicValueType SBVariablesOptions::GetUseDynamic() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::DynamicValueType, SBVariablesOptions, - GetUseDynamic); + LLDB_INSTRUMENT_VA(this); return m_opaque_up->GetUseDynamic(); } void SBVariablesOptions::SetUseDynamic(lldb::DynamicValueType dynamic) { - LLDB_RECORD_METHOD(void, SBVariablesOptions, SetUseDynamic, - (lldb::DynamicValueType), dynamic); + LLDB_INSTRUMENT_VA(this, dynamic); m_opaque_up->SetUseDynamic(dynamic); } diff --git a/lldb/source/API/SBWatchpoint.cpp b/lldb/source/API/SBWatchpoint.cpp index 207141f5dca6d..f5bd9cd1a9467 100644 --- a/lldb/source/API/SBWatchpoint.cpp +++ b/lldb/source/API/SBWatchpoint.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "lldb/API/SBWatchpoint.h" -#include "lldb/Utility/ReproducerInstrumentation.h" #include "lldb/API/SBAddress.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBDefines.h" #include "lldb/API/SBEvent.h" #include "lldb/API/SBStream.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Breakpoint/Watchpoint.h" #include "lldb/Breakpoint/WatchpointList.h" @@ -26,21 +26,20 @@ using namespace lldb; using namespace lldb_private; -SBWatchpoint::SBWatchpoint() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBWatchpoint); } +SBWatchpoint::SBWatchpoint() { LLDB_INSTRUMENT_VA(this); } SBWatchpoint::SBWatchpoint(const lldb::WatchpointSP &wp_sp) : m_opaque_wp(wp_sp) { - LLDB_RECORD_CONSTRUCTOR(SBWatchpoint, (const lldb::WatchpointSP &), wp_sp); + LLDB_INSTRUMENT_VA(this, wp_sp); } SBWatchpoint::SBWatchpoint(const SBWatchpoint &rhs) : m_opaque_wp(rhs.m_opaque_wp) { - LLDB_RECORD_CONSTRUCTOR(SBWatchpoint, (const lldb::SBWatchpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); } const SBWatchpoint &SBWatchpoint::operator=(const SBWatchpoint &rhs) { - LLDB_RECORD_METHOD(const lldb::SBWatchpoint &, - SBWatchpoint, operator=,(const lldb::SBWatchpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); m_opaque_wp = rhs.m_opaque_wp; return *this; @@ -49,8 +48,7 @@ const SBWatchpoint &SBWatchpoint::operator=(const SBWatchpoint &rhs) { SBWatchpoint::~SBWatchpoint() = default; watch_id_t SBWatchpoint::GetID() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::watch_id_t, SBWatchpoint, GetID); - + LLDB_INSTRUMENT_VA(this); watch_id_t watch_id = LLDB_INVALID_WATCH_ID; lldb::WatchpointSP watchpoint_sp(GetSP()); @@ -61,31 +59,29 @@ watch_id_t SBWatchpoint::GetID() { } bool SBWatchpoint::IsValid() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBWatchpoint, IsValid); + LLDB_INSTRUMENT_VA(this); return this->operator bool(); } SBWatchpoint::operator bool() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBWatchpoint, operator bool); + LLDB_INSTRUMENT_VA(this); return bool(m_opaque_wp.lock()); } bool SBWatchpoint::operator==(const SBWatchpoint &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBWatchpoint, operator==,(const SBWatchpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return GetSP() == rhs.GetSP(); } bool SBWatchpoint::operator!=(const SBWatchpoint &rhs) const { - LLDB_RECORD_METHOD_CONST( - bool, SBWatchpoint, operator!=,(const SBWatchpoint &), rhs); + LLDB_INSTRUMENT_VA(this, rhs); return !(*this == rhs); } SBError SBWatchpoint::GetError() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBWatchpoint, GetError); + LLDB_INSTRUMENT_VA(this); SBError sb_error; lldb::WatchpointSP watchpoint_sp(GetSP()); @@ -96,7 +92,7 @@ SBError SBWatchpoint::GetError() { } int32_t SBWatchpoint::GetHardwareIndex() { - LLDB_RECORD_METHOD_NO_ARGS(int32_t, SBWatchpoint, GetHardwareIndex); + LLDB_INSTRUMENT_VA(this); int32_t hw_index = -1; @@ -111,7 +107,7 @@ int32_t SBWatchpoint::GetHardwareIndex() { } addr_t SBWatchpoint::GetWatchAddress() { - LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBWatchpoint, GetWatchAddress); + LLDB_INSTRUMENT_VA(this); addr_t ret_addr = LLDB_INVALID_ADDRESS; @@ -126,7 +122,7 @@ addr_t SBWatchpoint::GetWatchAddress() { } size_t SBWatchpoint::GetWatchSize() { - LLDB_RECORD_METHOD_NO_ARGS(size_t, SBWatchpoint, GetWatchSize); + LLDB_INSTRUMENT_VA(this); size_t watch_size = 0; @@ -141,7 +137,7 @@ size_t SBWatchpoint::GetWatchSize() { } void SBWatchpoint::SetEnabled(bool enabled) { - LLDB_RECORD_METHOD(void, SBWatchpoint, SetEnabled, (bool), enabled); + LLDB_INSTRUMENT_VA(this, enabled); lldb::WatchpointSP watchpoint_sp(GetSP()); if (watchpoint_sp) { @@ -161,7 +157,7 @@ void SBWatchpoint::SetEnabled(bool enabled) { } bool SBWatchpoint::IsEnabled() { - LLDB_RECORD_METHOD_NO_ARGS(bool, SBWatchpoint, IsEnabled); + LLDB_INSTRUMENT_VA(this); lldb::WatchpointSP watchpoint_sp(GetSP()); if (watchpoint_sp) { @@ -173,7 +169,7 @@ bool SBWatchpoint::IsEnabled() { } uint32_t SBWatchpoint::GetHitCount() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBWatchpoint, GetHitCount); + LLDB_INSTRUMENT_VA(this); uint32_t count = 0; lldb::WatchpointSP watchpoint_sp(GetSP()); @@ -187,7 +183,7 @@ uint32_t SBWatchpoint::GetHitCount() { } uint32_t SBWatchpoint::GetIgnoreCount() { - LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBWatchpoint, GetIgnoreCount); + LLDB_INSTRUMENT_VA(this); lldb::WatchpointSP watchpoint_sp(GetSP()); if (watchpoint_sp) { @@ -199,7 +195,7 @@ uint32_t SBWatchpoint::GetIgnoreCount() { } void SBWatchpoint::SetIgnoreCount(uint32_t n) { - LLDB_RECORD_METHOD(void, SBWatchpoint, SetIgnoreCount, (uint32_t), n); + LLDB_INSTRUMENT_VA(this, n); lldb::WatchpointSP watchpoint_sp(GetSP()); if (watchpoint_sp) { @@ -210,7 +206,7 @@ void SBWatchpoint::SetIgnoreCount(uint32_t n) { } const char *SBWatchpoint::GetCondition() { - LLDB_RECORD_METHOD_NO_ARGS(const char *, SBWatchpoint, GetCondition); + LLDB_INSTRUMENT_VA(this); lldb::WatchpointSP watchpoint_sp(GetSP()); if (watchpoint_sp) { @@ -222,8 +218,7 @@ const char *SBWatchpoint::GetCondition() { } void SBWatchpoint::SetCondition(const char *condition) { - LLDB_RECORD_METHOD(void, SBWatchpoint, SetCondition, (const char *), - condition); + LLDB_INSTRUMENT_VA(this, condition); lldb::WatchpointSP watchpoint_sp(GetSP()); if (watchpoint_sp) { @@ -235,9 +230,7 @@ void SBWatchpoint::SetCondition(const char *condition) { bool SBWatchpoint::GetDescription(SBStream &description, DescriptionLevel level) { - LLDB_RECORD_METHOD(bool, SBWatchpoint, GetDescription, - (lldb::SBStream &, lldb::DescriptionLevel), description, - level); + LLDB_INSTRUMENT_VA(this, description, level); Stream &strm = description.ref(); @@ -254,27 +247,25 @@ bool SBWatchpoint::GetDescription(SBStream &description, } void SBWatchpoint::Clear() { - LLDB_RECORD_METHOD_NO_ARGS(void, SBWatchpoint, Clear); + LLDB_INSTRUMENT_VA(this); m_opaque_wp.reset(); } lldb::WatchpointSP SBWatchpoint::GetSP() const { - LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::WatchpointSP, SBWatchpoint, GetSP); + LLDB_INSTRUMENT_VA(this); return m_opaque_wp.lock(); } void SBWatchpoint::SetSP(const lldb::WatchpointSP &sp) { - LLDB_RECORD_METHOD(void, SBWatchpoint, SetSP, (const lldb::WatchpointSP &), - sp); + LLDB_INSTRUMENT_VA(this, sp); m_opaque_wp = sp; } bool SBWatchpoint::EventIsWatchpointEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(bool, SBWatchpoint, EventIsWatchpointEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); return Watchpoint::WatchpointEventData::GetEventDataFromEvent(event.get()) != nullptr; @@ -282,9 +273,7 @@ bool SBWatchpoint::EventIsWatchpointEvent(const lldb::SBEvent &event) { WatchpointEventType SBWatchpoint::GetWatchpointEventTypeFromEvent(const SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::WatchpointEventType, SBWatchpoint, - GetWatchpointEventTypeFromEvent, - (const lldb::SBEvent &), event); + LLDB_INSTRUMENT_VA(event); if (event.IsValid()) return Watchpoint::WatchpointEventData::GetWatchpointEventTypeFromEvent( @@ -293,9 +282,7 @@ SBWatchpoint::GetWatchpointEventTypeFromEvent(const SBEvent &event) { } SBWatchpoint SBWatchpoint::GetWatchpointFromEvent(const lldb::SBEvent &event) { - LLDB_RECORD_STATIC_METHOD(lldb::SBWatchpoint, SBWatchpoint, - GetWatchpointFromEvent, (const lldb::SBEvent &), - event); + LLDB_INSTRUMENT_VA(event); SBWatchpoint sb_watchpoint; if (event.IsValid()) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 80e986f18c618..1bf647e4acfc6 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -37,7 +37,7 @@ #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Target/Thread.h" #include "lldb/Target/ThreadPlan.h" -#include "lldb/Utility/ReproducerInstrumentation.h" +#include "lldb/Utility/Instrumentation.h" #include "lldb/Utility/Timer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" diff --git a/lldb/source/Utility/CMakeLists.txt b/lldb/source/Utility/CMakeLists.txt index 6790e47d69f25..cc0bf5fdb61ac 100644 --- a/lldb/source/Utility/CMakeLists.txt +++ b/lldb/source/Utility/CMakeLists.txt @@ -49,7 +49,7 @@ add_lldb_library(lldbUtility RegisterValue.cpp RegularExpression.cpp Reproducer.cpp - ReproducerInstrumentation.cpp + Instrumentation.cpp ReproducerProvider.cpp Scalar.cpp SelectHelper.cpp diff --git a/lldb/source/Utility/ReproducerInstrumentation.cpp b/lldb/source/Utility/Instrumentation.cpp similarity index 52% rename from lldb/source/Utility/ReproducerInstrumentation.cpp rename to lldb/source/Utility/Instrumentation.cpp index 8365701f8e3be..d375fcea58c02 100644 --- a/lldb/source/Utility/ReproducerInstrumentation.cpp +++ b/lldb/source/Utility/Instrumentation.cpp @@ -1,46 +1,37 @@ -//===-- ReproducerInstrumentation.cpp -------------------------------------===// -// +//===-- Instrumentation.cpp -----------------------------------------------===// // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "lldb/Utility/ReproducerInstrumentation.h" -#include "lldb/Utility/Reproducer.h" +#include "lldb/Utility/Instrumentation.h" #include #include #include #include using namespace lldb_private; -using namespace lldb_private::repro; +using namespace lldb_private::instrumentation; // Whether we're currently across the API boundary. static thread_local bool g_global_boundary = false; -Recorder::Recorder() { - if (!g_global_boundary) { - g_global_boundary = true; - m_local_boundary = true; - } -} - -Recorder::Recorder(llvm::StringRef pretty_func, std::string &&pretty_args) - : m_local_boundary(false) { +Instrumenter::Instrumenter(llvm::StringRef pretty_func, + std::string &&pretty_args) + : m_local_boundary(false) { if (!g_global_boundary) { g_global_boundary = true; m_local_boundary = true; - LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API), "{0} ({1})", - pretty_func, pretty_args); } + LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API), "[{0}] {1} ({2})", + m_local_boundary ? "external" : "internal", pretty_func, + pretty_args); } -Recorder::~Recorder() { - UpdateBoundary(); -} +Instrumenter::~Instrumenter() { UpdateBoundary(); } -void Recorder::UpdateBoundary() { +void Instrumenter::UpdateBoundary() { if (m_local_boundary) g_global_boundary = false; } From 156b997251db6d87636fa300d7654989caa01dea Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 20 Jan 2022 15:50:27 -0800 Subject: [PATCH 117/946] [lldb] Instrument SB API with signposts Instrument the SB API with signposts on Darwin. This gives us a time profile on whose behalf LLDB spends time (particularly when run via the SBAPI from an IDE). Differential revision: https://reviews.llvm.org/D117632 --- lldb/include/lldb/Utility/Instrumentation.h | 2 ++ lldb/source/Utility/Instrumentation.cpp | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lldb/include/lldb/Utility/Instrumentation.h b/lldb/include/lldb/Utility/Instrumentation.h index 6962270bb89db..ff6591d634378 100644 --- a/lldb/include/lldb/Utility/Instrumentation.h +++ b/lldb/include/lldb/Utility/Instrumentation.h @@ -86,6 +86,8 @@ class Instrumenter { private: void UpdateBoundary(); + llvm::StringRef m_pretty_func; + /// Whether this function call was the one crossing the API boundary. bool m_local_boundary = false; }; diff --git a/lldb/source/Utility/Instrumentation.cpp b/lldb/source/Utility/Instrumentation.cpp index d375fcea58c02..861789810e1a1 100644 --- a/lldb/source/Utility/Instrumentation.cpp +++ b/lldb/source/Utility/Instrumentation.cpp @@ -6,6 +6,8 @@ //===----------------------------------------------------------------------===// #include "lldb/Utility/Instrumentation.h" +#include "llvm/Support/Signposts.h" + #include #include #include @@ -17,21 +19,25 @@ using namespace lldb_private::instrumentation; // Whether we're currently across the API boundary. static thread_local bool g_global_boundary = false; +// Instrument SB API calls with singposts when supported. +static llvm::ManagedStatic g_api_signposts; + Instrumenter::Instrumenter(llvm::StringRef pretty_func, std::string &&pretty_args) - : m_local_boundary(false) { + : m_pretty_func(pretty_func), m_local_boundary(false) { if (!g_global_boundary) { g_global_boundary = true; m_local_boundary = true; + g_api_signposts->startInterval(this, m_pretty_func); } LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API), "[{0}] {1} ({2})", - m_local_boundary ? "external" : "internal", pretty_func, + m_local_boundary ? "external" : "internal", m_pretty_func, pretty_args); } -Instrumenter::~Instrumenter() { UpdateBoundary(); } - -void Instrumenter::UpdateBoundary() { - if (m_local_boundary) +Instrumenter::~Instrumenter() { + if (m_local_boundary) { g_global_boundary = false; + g_api_signposts->endInterval(this, m_pretty_func); + } } From 0dfe953294ba1c0fc43fb710518b9b5a608b223a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 20 Jan 2022 12:06:47 -0500 Subject: [PATCH 118/946] [OpenMP] Change default visibility to protected for device declarations This patch changes the special-case handling of visibility when compiling for an OpenMP target offloading device. This was orignally added as a precaution against the bug encountered in PR41826 when symbols in the device were being preempted by shared library symbols. This should instead be done by making the visibility protected by default. With protected visibility we are asserting that the symbols on the device will never be preempted or preempt another symbol pending a shared library load. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D117806 --- clang/lib/AST/Decl.cpp | 13 -------- clang/lib/Driver/ToolChains/Clang.cpp | 6 ++++ clang/test/OpenMP/declare_target_codegen.cpp | 33 ++++++++++--------- ...x_declare_target_var_ctor_dtor_codegen.cpp | 28 ++++++++-------- .../nvptx_target_pure_deleted_codegen.cpp | 10 +++--- .../OpenMP/nvptx_unsupported_type_codegen.cpp | 16 ++++----- .../OpenMP/target_attribute_convergent.cpp | 6 ++-- 7 files changed, 53 insertions(+), 59 deletions(-) diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 232376e4e05db..b2ee34f20cf73 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -787,10 +787,6 @@ LinkageComputer::getLVForNamespaceScopeDecl(const NamedDecl *D, // Note that we don't want to make the variable non-external // because of this, but unique-external linkage suits us. - // We need variables inside OpenMP declare target directives to be visible. - if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Var)) - return LinkageInfo::external(); - if (Context.getLangOpts().CPlusPlus && !isFirstInExternCContext(Var) && !IgnoreVarTypeLinkage) { LinkageInfo TypeLV = getLVForType(*Var->getType(), computation); @@ -917,10 +913,6 @@ LinkageComputer::getLVForNamespaceScopeDecl(const NamedDecl *D, if (!isExternallyVisible(LV.getLinkage())) return LinkageInfo(LV.getLinkage(), DefaultVisibility, false); - // Mark the symbols as hidden when compiling for the device. - if (Context.getLangOpts().OpenMP && Context.getLangOpts().OpenMPIsDevice) - LV.mergeVisibility(HiddenVisibility, /*newExplicit=*/false); - return LV; } @@ -1075,11 +1067,6 @@ LinkageComputer::getLVForClassMember(const NamedDecl *D, // Finally, merge in information from the class. LV.mergeMaybeWithVisibility(classLV, considerClassVisibility); - // We need variables inside OpenMP declare target directives to be visible. - if (const VarDecl *VD = dyn_cast(D)) - if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) - return LinkageInfo(LV.getLinkage(), DefaultVisibility, false); - return LV; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a22f03a488486..dd2570132ddf7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5822,6 +5822,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-ftype-visibility"); CmdArgs.push_back("default"); } + } else if (IsOpenMPDevice) { + // When compiling for the OpenMP device we want protected visibility by + // default. This prevents the device from accidenally preempting code on the + // host, makes the system more robust, and improves performance. + CmdArgs.push_back("-fvisibility"); + CmdArgs.push_back("protected"); } if (!RawTriple.isPS4()) diff --git a/clang/test/OpenMP/declare_target_codegen.cpp b/clang/test/OpenMP/declare_target_codegen.cpp index 6ea9f181b5cbd..991d6aa67f539 100644 --- a/clang/test/OpenMP/declare_target_codegen.cpp +++ b/clang/test/OpenMP/declare_target_codegen.cpp @@ -1,22 +1,22 @@ // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -DLOAD -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DLOAD | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -DLOAD | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DLOAD | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -DLOAD | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -DOMP5 | FileCheck %s --check-prefix HOST5 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -DOMP5 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix DEV5 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix DEV5 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - -DOMP5 | FileCheck %s --check-prefix KMPC-ONLY // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -DOMP5 | FileCheck %s --check-prefix SIMD-ONLY // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -DOMP5 -// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix SIMD-ONLY // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=45 -// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=45 -// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=45 +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY // expected-no-diagnostics @@ -26,26 +26,26 @@ // CHECK-NOT: define {{.*}}{{baz1|baz4|maini1|Base|virtual_}} // CHECK-DAG: Bake // CHECK-NOT: @{{hhh|ggg|fff|eee}} = -// CHECK-DAG: @flag = global i8 undef, +// CHECK-DAG: @flag = protected global i8 undef, // CHECK-DAG: @aaa = external global i32, -// CHECK-DAG: @bbb = global i32 0, +// CHECK-DAG: @bbb ={{ protected | }}global i32 0, // CHECK-DAG: weak constant %struct.__tgt_offload_entry { i8* bitcast (i32* @bbb to i8*), // CHECK-DAG: @ccc = external global i32, -// CHECK-DAG: @ddd = global i32 0, +// CHECK-DAG: @ddd ={{ protected | }}global i32 0, // CHECK-DAG: @hhh_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @ggg_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @fff_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @eee_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @{{.*}}maini1{{.*}}aaa = internal global i64 23, // CHECK-DAG: @pair = {{.*}}addrspace(3) global %struct.PAIR undef -// CHECK-DAG: @_ZN2SS3SSSE = global i32 1, -// CHECK-DAG: @b = global i32 15, -// CHECK-DAG: @d = global i32 0, +// CHECK-DAG: @_ZN2SS3SSSE ={{ protected | }}global i32 1, +// CHECK-DAG: @b ={{ protected | }}global i32 15, +// CHECK-DAG: @d ={{ protected | }}global i32 0, // CHECK-DAG: @c = external global i32, -// CHECK-DAG: @globals = global %struct.S zeroinitializer, +// CHECK-DAG: @globals ={{ protected | }}global %struct.S zeroinitializer, // CHECK-DAG: [[STAT:@.+stat]] = internal global %struct.S zeroinitializer, // CHECK-DAG: [[STAT_REF:@.+]] = internal constant %struct.S* [[STAT]] -// CHECK-DAG: @out_decl_target = global i32 0, +// CHECK-DAG: @out_decl_target ={{ protected | }}global i32 0, // CHECK-DAG: @llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @__omp_offloading__{{.+}}_globals_l[[@LINE+84]]_ctor to i8*), i8* bitcast (void ()* @__omp_offloading__{{.+}}_stat_l[[@LINE+85]]_ctor to i8*)], // CHECK-DAG: @llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (%struct.S** [[STAT_REF]] to i8*)], @@ -291,4 +291,5 @@ struct SS { #pragma omp end declare target }; int SS::SSS = 1; + #endif diff --git a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp index 975015cc58276..0b8c17f1d3279 100644 --- a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp +++ b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp @@ -1,14 +1,14 @@ // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST --check-prefix CHECK // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK -// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t +// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o - | FileCheck %s --check-prefix SIMD-ONLY // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o -| FileCheck %s --check-prefix SIMD-ONLY -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o -| FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix SIMD-ONLY #ifndef HEADER #define HEADER @@ -16,9 +16,9 @@ // SIMD-ONLY-NOT: {{__kmpc|__tgt}} // DEVICE-DAG: [[C_ADDR:.+]] = internal global i32 0, -// DEVICE-DAG: [[CD_ADDR:@.+]] ={{ hidden | }}global %struct.S zeroinitializer, +// DEVICE-DAG: [[CD_ADDR:@.+]] ={{ protected | }}global %struct.S zeroinitializer, // HOST-DAG: @[[C_ADDR:.+]] = internal global i32 0, -// HOST-DAG: @[[CD_ADDR:.+]] ={{( hidden | dso_local)?}} global %struct.S zeroinitializer, +// HOST-DAG: @[[CD_ADDR:.+]] ={{( protected | dso_local)?}} global %struct.S zeroinitializer, #pragma omp declare target int foo() { return 0; } @@ -34,12 +34,12 @@ int car() { return 0; } #pragma omp declare target (bar) int caz() { return 0; } -// DEVICE-DAG: define{{ hidden | }}noundef i32 [[FOO:@.*foo.*]]() -// DEVICE-DAG: define{{ hidden | }}noundef i32 [[BAR:@.*bar.*]]() -// DEVICE-DAG: define{{ hidden | }}noundef i32 [[BAZ:@.*baz.*]]() -// DEVICE-DAG: define{{ hidden | }}noundef i32 [[DOO:@.*doo.*]]() -// DEVICE-DAG: define{{ hidden | }}noundef i32 [[CAR:@.*car.*]]() -// DEVICE-DAG: define{{ hidden | }}noundef i32 [[CAZ:@.*caz.*]]() +// DEVICE-DAG: define{{ protected | }}noundef i32 [[FOO:@.*foo.*]]() +// DEVICE-DAG: define{{ protected | }}noundef i32 [[BAR:@.*bar.*]]() +// DEVICE-DAG: define{{ protected | }}noundef i32 [[BAZ:@.*baz.*]]() +// DEVICE-DAG: define{{ protected | }}noundef i32 [[DOO:@.*doo.*]]() +// DEVICE-DAG: define{{ protected | }}noundef i32 [[CAR:@.*car.*]]() +// DEVICE-DAG: define{{ protected | }}noundef i32 [[CAZ:@.*caz.*]]() static int c = foo() + bar() + baz(); #pragma omp declare target (c) diff --git a/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp b/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp index e38fcfb080007..781d0ae3d4142 100644 --- a/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp @@ -1,17 +1,17 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s // expected-no-diagnostics #ifndef HEADER #define HEADER // CHECK-NOT: class_type_info -// CHECK-DAG: @_ZTV7Derived = linkonce_odr hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%class.Derived*)* @_ZN7Derived3fooEv to i8*)] } -// CHECK-DAG: @_ZTV4Base = linkonce_odr hidden unnamed_addr constant { [3 x i8*] } zeroinitializer +// CHECK-DAG: @_ZTV7Derived = linkonce_odr protected unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%class.Derived*)* @_ZN7Derived3fooEv to i8*)] } +// CHECK-DAG: @_ZTV4Base = linkonce_odr protected unnamed_addr constant { [3 x i8*] } zeroinitializer // CHECK-NOT: class_type_info class Base { public: diff --git a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp index 43c47dbc5a973..9e3f5a56bcbca 100644 --- a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp +++ b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp @@ -1,8 +1,8 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s // expected-no-diagnostics // CHECK-DAG: [[T:%.+]] = type {{.+}}, {{fp128|ppc_fp128}}, @@ -34,18 +34,18 @@ struct T1 { #pragma omp declare target T a = T(); T f = a; -// CHECK: define{{ hidden | }}void @{{.+}}foo{{.+}}([[T]]* noundef byval([[T]]) align {{.+}}) +// CHECK: define{{ protected | }}void @{{.+}}foo{{.+}}([[T]]* noundef byval([[T]]) align {{.+}}) void foo(T a = T()) { return; } -// CHECK: define{{ hidden | }}[6 x i64] @{{.+}}bar{{.+}}() +// CHECK: define{{ protected | }}[6 x i64] @{{.+}}bar{{.+}}() T bar() { // CHECK: bitcast [[T]]* %{{.+}} to [6 x i64]* // CHECK-NEXT: load [6 x i64], [6 x i64]* %{{.+}}, // CHECK-NEXT: ret [6 x i64] return T(); } -// CHECK: define{{ hidden | }}void @{{.+}}baz{{.+}}() +// CHECK: define{{ protected | }}void @{{.+}}baz{{.+}}() void baz() { // CHECK: call [6 x i64] @{{.+}}bar{{.+}}() // CHECK-NEXT: bitcast [[T]]* %{{.+}} to [6 x i64]* @@ -54,17 +54,17 @@ void baz() { } T1 a1 = T1(); T1 f1 = a1; -// CHECK: define{{ hidden | }}void @{{.+}}foo1{{.+}}([[T1]]* noundef byval([[T1]]) align {{.+}}) +// CHECK: define{{ protected | }}void @{{.+}}foo1{{.+}}([[T1]]* noundef byval([[T1]]) align {{.+}}) void foo1(T1 a = T1()) { return; } -// CHECK: define{{ hidden | }}[[T1]] @{{.+}}bar1{{.+}}() +// CHECK: define{{ protected | }}[[T1]] @{{.+}}bar1{{.+}}() T1 bar1() { // CHECK: load [[T1]], [[T1]]* // CHECK-NEXT: ret [[T1]] return T1(); } -// CHECK: define{{ hidden | }}void @{{.+}}baz1{{.+}}() +// CHECK: define{{ protected | }}void @{{.+}}baz1{{.+}}() void baz1() { // CHECK: call [[T1]] @{{.+}}bar1{{.+}}() T1 t = bar1(); diff --git a/clang/test/OpenMP/target_attribute_convergent.cpp b/clang/test/OpenMP/target_attribute_convergent.cpp index 932214e987c86..9bea28789d3ba 100644 --- a/clang/test/OpenMP/target_attribute_convergent.cpp +++ b/clang/test/OpenMP/target_attribute_convergent.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s -// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -o - | FileCheck %s +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -o - | FileCheck %s // expected-no-diagnostics #pragma omp declare target @@ -9,5 +9,5 @@ void foo() {} #pragma omp end declare target // CHECK: Function Attrs: {{.*}}convergent{{.*}} -// CHECK: define hidden void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]] +// CHECK: define protected void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]] // CHECK: attributes [[ATTRIBUTE_NUMBER]] = { {{.*}}convergent{{.*}} } From 26feef084616a18e9b61acb9c78dfca40f4c6f97 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 20 Jan 2022 13:06:22 -0500 Subject: [PATCH 119/946] [Libomptarget] Change visibility to hidden for device RTL This patch changes the visibility for all construct in the new device RTL to be hidden by default. This is done after the changes introduced in D117806 changed the visibility from being hidden by default for all device compilations. This asserts that the visibility for the device runtime library will be hidden except for the internal environment variable. This is done to aid optimization and linking of the device library. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D117807 --- openmp/libomptarget/DeviceRTL/CMakeLists.txt | 4 ++-- openmp/libomptarget/DeviceRTL/include/Types.h | 3 +-- openmp/libomptarget/DeviceRTL/src/Configuration.cpp | 4 +++- openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt | 2 +- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h | 4 ++-- openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 1 + 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 8185727ab84c2..2e52bdf4d90bf 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -130,7 +130,7 @@ set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS} list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I") # Set flags for LLVM Bitcode compilation. -set(bc_flags -S -x c++ -std=c++17 +set(bc_flags -S -x c++ -std=c++17 -fvisibility=hidden ${clang_opt_flags} -Xclang -emit-llvm-bc -Xclang -aux-triple -Xclang ${aux_triple} @@ -231,5 +231,5 @@ foreach(sm ${nvptx_sm_list}) endforeach() foreach(mcpu ${amdgpu_mcpus}) - compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib) + compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -nogpulib) endforeach() diff --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h index 8d9b48a0f1352..0ff0cee66b3f3 100644 --- a/openmp/libomptarget/DeviceRTL/include/Types.h +++ b/openmp/libomptarget/DeviceRTL/include/Types.h @@ -193,8 +193,7 @@ enum OMPTgtExecModeFlags : int8_t { // TODO: clang should use address space 5 for omp_thread_mem_alloc, but right // now that's not the case. #define THREAD_LOCAL(NAME) \ - NAME [[clang::loader_uninitialized, clang::address_space(5), \ - gnu::visibility("hidden")]] + NAME [[clang::loader_uninitialized, clang::address_space(5)]] // TODO: clang should use address space 4 for omp_const_mem_alloc, maybe it // does? diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp index ee8a98d703618..349f93a08701c 100644 --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -23,8 +23,10 @@ using namespace _OMP; extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU // TODO: We want to change the name as soon as the old runtime is gone. +// This variable should be visibile to the plugin so we override the default +// hidden visibility. DeviceEnvironmentTy CONSTANT(omptarget_device_environment) - __attribute__((used, retain, weak)); + __attribute__((used, retain, weak, visibility("protected"))); uint32_t config::getDebugKind() { return __omp_rtl_debug_kind & omptarget_device_environment.DebugKind; diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt index 0207da1c8b5db..5ff5dde1c45e7 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -132,7 +132,7 @@ macro(add_cuda_bc_library) -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device -D__AMDGCN__ -Xclang -target-cpu -Xclang ${mcpu} - -fvisibility=default + -fvisibility=hidden -Wno-unused-value -nogpulib -O${optimization_level} diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h index 1303258f17494..3ea7cdeb5b2bc 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -34,8 +34,8 @@ typedef uint64_t __kmpc_impl_lanemask_t; __attribute__((used)) /* Don't discard values the plugin reads */ \ __attribute__((weak)) /* We may have multiple definitions */ \ __attribute__((retain)) /* Also needed to keep values alive */ \ - __attribute__((visibility("default"))) /* Access via SHT_HASH */ \ - __attribute__((section(".data"))) /* Not .bss, can write before load */ + __attribute__((visibility("protected"))) /* Access via SHT_HASH */ \ + __attribute__((section(".data"))) /* Not .bss, can write before load */ #include "llvm/Frontend/OpenMP/OMPGridValues.h" diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 3f1c4e75cbc16..16126891b6521 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -165,6 +165,7 @@ set(bc_flags -S -x c++ -O1 -std=c++14 -mllvm -openmp-opt-disable -ffreestanding -target nvptx64 + -fvisibility=hidden -Xclang -emit-llvm-bc -Xclang -aux-triple -Xclang ${aux_triple} -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device From 58580e922a69d94859a2506c3053d8c066a1e38c Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 20 Jan 2022 22:20:24 +0000 Subject: [PATCH 120/946] [llvm][cmake] Make `llvm_install_symlink` robust to absolute dirs. If `CMAKE_INSTALL_BINDIR` is a different absolute path per project, as it is with NixOS when we install every package to its own prefix, the old way fails when the absolute path gets prepended. There are still some issues with dowstream packages using `LLVM_TOOLS_INSTALL_DIR` which also may be absolute and just for LLVM proper, but that will be addressed in a future commit. Differential Revision: https://reviews.llvm.org/D101070 --- llvm/cmake/modules/LLVMInstallSymlink.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/cmake/modules/LLVMInstallSymlink.cmake b/llvm/cmake/modules/LLVMInstallSymlink.cmake index b5c35f706cb7e..ef7f5ab5288de 100644 --- a/llvm/cmake/modules/LLVMInstallSymlink.cmake +++ b/llvm/cmake/modules/LLVMInstallSymlink.cmake @@ -6,7 +6,8 @@ include(GNUInstallDirs) function(install_symlink name target outdir) set(DESTDIR $ENV{DESTDIR}) - set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/${outdir}") + GNUInstallDirs_get_absolute_install_dir(bindir "${outdir}" BINDIR) + set(bindir "${DESTDIR}${bindir}") message(STATUS "Creating ${name}") From a65934241c22ac834578c73f528b062fc53d80bf Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 21 Jan 2022 02:11:31 +0000 Subject: [PATCH 121/946] [gn build] Port 1755f5b1d7b7 --- llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn index 15c1262f53ae3..7aff2ae2a22a0 100644 --- a/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn @@ -22,6 +22,7 @@ static_library("Utility") { "FileSpec.cpp", "GDBRemote.cpp", "IOObject.cpp", + "Instrumentation.cpp", "LLDBAssert.cpp", "Listener.cpp", "Log.cpp", @@ -31,7 +32,6 @@ static_library("Utility") { "RegisterValue.cpp", "RegularExpression.cpp", "Reproducer.cpp", - "ReproducerInstrumentation.cpp", "ReproducerProvider.cpp", "Scalar.cpp", "SelectHelper.cpp", From cac164ff9cb671fae7a00d9a3adf1834ebca8bd9 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 20 Jan 2022 18:28:17 -0800 Subject: [PATCH 122/946] [lldb] Update the modulemap --- lldb/include/lldb/module.modulemap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/include/lldb/module.modulemap b/lldb/include/lldb/module.modulemap index c0d467a6505eb..303d6b15e808c 100644 --- a/lldb/include/lldb/module.modulemap +++ b/lldb/include/lldb/module.modulemap @@ -2,7 +2,7 @@ module lldb_API { requires cplusplus - textual header "Utility/ReproducerInstrumentation.h" + textual header "Utility/Instrumentation.h" umbrella "API" module * { export * } From d93a11c138bb57f0ecca234867dc65eed35ccba3 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 21 Jan 2022 02:48:10 +0000 Subject: [PATCH 123/946] Revert "[llvm][cmake] Make `llvm_install_symlink` robust to absolute dirs." https://lab.llvm.org/buildbot/#/builders/36/builds/16668 was the sort of thing I saw before when this was part of D99484, and it makes some sense now this would have something to do with it. This reverts commit 58580e922a69d94859a2506c3053d8c066a1e38c. --- llvm/cmake/modules/LLVMInstallSymlink.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/cmake/modules/LLVMInstallSymlink.cmake b/llvm/cmake/modules/LLVMInstallSymlink.cmake index ef7f5ab5288de..b5c35f706cb7e 100644 --- a/llvm/cmake/modules/LLVMInstallSymlink.cmake +++ b/llvm/cmake/modules/LLVMInstallSymlink.cmake @@ -6,8 +6,7 @@ include(GNUInstallDirs) function(install_symlink name target outdir) set(DESTDIR $ENV{DESTDIR}) - GNUInstallDirs_get_absolute_install_dir(bindir "${outdir}" BINDIR) - set(bindir "${DESTDIR}${bindir}") + set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/${outdir}") message(STATUS "Creating ${name}") From 82af95029ec947fed8b9c516f04d4f217bd87930 Mon Sep 17 00:00:00 2001 From: Joao Moreira Date: Fri, 21 Jan 2022 09:31:21 +0800 Subject: [PATCH 124/946] [X86] Enable ibt-seal optimization when LTO is used in Kernel Intel's CET/IBT requires every indirect branch target to be an ENDBR instruction. Because of that, the compiler needs to correctly emit these instruction on function's prologues. Because this is a security feature, it is desirable that only actual indirect-branch-targeted functions are emitted with ENDBRs. While it is possible to identify address-taken functions through LTO, minimizing these ENDBR instructions remains a hard task for user-space binaries because exported functions may end being reachable through PLT entries, that will use an indirect branch for such. Because this cannot be determined during compilation-time, the compiler currently emits ENDBRs to every non-local-linkage function. Despite the challenge presented for user-space, the kernel landscape is different as no PLTs are used. With the intent of providing the most fit ENDBR emission for the kernel, kernel developers proposed an optimization named "ibt-seal" which replaces the ENDBRs for NOPs directly in the binary. The discussion of this feature can be seen in [1]. This diff brings the enablement of the flag -mibt-seal, which in combination with LTO enforces a different policy for ENDBR placement in when the code-model is set to "kernel". In this scenario, the compiler will only emit ENDBRs to address taken functions, ignoring non-address taken functions that are don't have local linkage. A comparison between an LTO-compiled kernel binaries without and with the -mibt-seal feature enabled shows that when -mibt-seal was used, the number of ENDBRs in the vmlinux.o binary patched by objtool decreased from 44383 to 33192, and that the number of superfluous ENDBR instructions nopped-out decreased from 11730 to 540. The 540 missed superfluous ENDBRs need to be investigated further, but hypotheses are: assembly code not being taken care of by the compiler, kernel exported symbols mechanisms creating bogus address taken situations or even these being removed due to other binary optimizations like kernel's static_calls. For now, I assume that the large drop in the number of ENDBR instructions already justifies the feature being merged. [1] - https://lkml.org/lkml/2021/11/22/591 Reviewed By: xiangzhangllvm Differential Revision: https://reviews.llvm.org/D116070 --- clang/include/clang/Basic/CodeGenOptions.def | 2 + clang/include/clang/Driver/Options.td | 2 + clang/lib/CodeGen/CodeGenModule.cpp | 3 ++ clang/lib/Driver/ToolChains/Clang.cpp | 3 ++ clang/lib/Frontend/CompilerInvocation.cpp | 3 ++ .../Target/X86/X86IndirectBranchTracking.cpp | 48 ++++++++++++++----- llvm/test/CodeGen/X86/ibtseal-kernel.ll | 19 ++++++++ llvm/test/CodeGen/X86/ibtseal-large.ll | 19 ++++++++ llvm/test/CodeGen/X86/ibtseal-small.ll | 19 ++++++++ 9 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/X86/ibtseal-kernel.ll create mode 100644 llvm/test/CodeGen/X86/ibtseal-large.ll create mode 100644 llvm/test/CodeGen/X86/ibtseal-small.ll diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 3526b8a4a9044..0da875525c0c4 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -107,6 +107,8 @@ CODEGENOPT(CFProtectionReturn , 1, 0) ///< if -fcf-protection is ///< set to full or return. CODEGENOPT(CFProtectionBranch , 1, 0) ///< if -fcf-protection is ///< set to full or branch. +CODEGENOPT(IBTSeal, 1, 0) ///< set to optimize CFProtectionBranch. + CODEGENOPT(XRayInstrumentFunctions , 1, 0) ///< Set when -fxray-instrument is ///< enabled. CODEGENOPT(StackSizeSection , 1, 0) ///< Set when -fstack-size-section is enabled. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b66363b1d3e92..49ceebcb51cf5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1927,6 +1927,8 @@ def fcf_protection_EQ : Joined<["-"], "fcf-protection=">, Flags<[CoreOption, CC1 def fcf_protection : Flag<["-"], "fcf-protection">, Group, Flags<[CoreOption, CC1Option]>, Alias, AliasArgs<["full"]>, HelpText<"Enable cf-protection in 'full' mode">; +def mibt_seal : Flag<["-"], "mibt-seal">, Group, Flags<[CoreOption, CC1Option]>, + HelpText<"Optimize fcf-protection=branch/full (requires LTO).">; defm xray_instrument : BoolFOption<"xray-instrument", LangOpts<"XRayInstrument">, DefaultFalse, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e91da73d2f03c..d534cf182f5a7 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -712,6 +712,9 @@ void CodeGenModule::Release() { 1); } + if (CodeGenOpts.IBTSeal) + getModule().addModuleFlag(llvm::Module::Override, "ibt-seal", 1); + // Add module metadata for return address signing (ignoring // non-leaf/all) and stack tagging. These are actually turned on by function // attributes, but we use module metadata to emit build attributes. This is diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index dd2570132ddf7..52d576345c027 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6166,6 +6166,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString(Twine("-fcf-protection=") + A->getValue())); } + if (IsUsingLTO) + Args.AddLastArg(CmdArgs, options::OPT_mibt_seal); + // Forward -f options with positive and negative forms; we translate these by // hand. Do not propagate PGO options to the GPU-side compilations as the // profile info is for the host-side compilation only. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 7727d70adfb1e..eaca1fbb9def8 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1814,6 +1814,9 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } + if (Opts.PrepareForLTO && Args.hasArg(OPT_mibt_seal)) + Opts.IBTSeal = 1; + for (auto *A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_builtin_bitcode)) { CodeGenOptions::BitcodeFileToLink F; diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp index 6642f46e64b2f..7e751a4c8811e 100644 --- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp +++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp @@ -95,14 +95,45 @@ static bool IsCallReturnTwice(llvm::MachineOperand &MOp) { return Attrs.hasFnAttr(Attribute::ReturnsTwice); } +// Checks if function should have an ENDBR in its prologue +static bool needsPrologueENDBR(MachineFunction &MF, const Module *M) { + Function &F = MF.getFunction(); + + if (F.doesNoCfCheck()) + return false; + + const X86TargetMachine *TM = + static_cast(&MF.getTarget()); + Metadata *IBTSeal = M->getModuleFlag("ibt-seal"); + + switch (TM->getCodeModel()) { + // Large code model functions always reachable through indirect calls. + case CodeModel::Large: + return true; + // Only address taken functions in LTO'ed kernel are reachable indirectly. + // IBTSeal implies LTO, thus only check if function is address taken. + case CodeModel::Kernel: + // Check if ibt-seal was enabled (implies LTO is being used). + if (IBTSeal) { + return F.hasAddressTaken(); + } + // if !IBTSeal, fall into default case. + LLVM_FALLTHROUGH; + // Address taken or externally linked functions may be reachable. + default: + return (F.hasAddressTaken() || !F.hasLocalLinkage()); + } +} + bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) { const X86Subtarget &SubTarget = MF.getSubtarget(); + const Module *M = MF.getMMI().getModule(); // Check that the cf-protection-branch is enabled. - Metadata *isCFProtectionSupported = - MF.getMMI().getModule()->getModuleFlag("cf-protection-branch"); - // NB: We need to enable IBT in jitted code if JIT compiler is CET - // enabled. + Metadata *isCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); + + // NB: We need to enable IBT in jitted code if JIT compiler is CET + // enabled. const X86TargetMachine *TM = static_cast(&MF.getTarget()); #ifdef __CET__ @@ -119,13 +150,8 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) { TII = SubTarget.getInstrInfo(); EndbrOpcode = SubTarget.is64Bit() ? X86::ENDBR64 : X86::ENDBR32; - // Large code model, non-internal function or function whose address - // was taken, can be accessed through indirect calls. Mark the first - // BB with ENDBR instruction unless nocf_check attribute is used. - if ((TM->getCodeModel() == CodeModel::Large || - MF.getFunction().hasAddressTaken() || - !MF.getFunction().hasLocalLinkage()) && - !MF.getFunction().doesNoCfCheck()) { + // If function is reachable indirectly, mark the first BB with ENDBR. + if (needsPrologueENDBR(MF, M)) { auto MBB = MF.begin(); Changed |= addENDBR(*MBB, MBB->begin()); } diff --git a/llvm/test/CodeGen/X86/ibtseal-kernel.ll b/llvm/test/CodeGen/X86/ibtseal-kernel.ll new file mode 100644 index 0000000000000..eb98515f01f81 --- /dev/null +++ b/llvm/test/CodeGen/X86/ibtseal-kernel.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-unknown-linux-gnu -x86-indirect-branch-tracking --code-model=kernel | FileCheck %s --check-prefix=CHECK-KERNEL-IBTSEAL + +; CHECK-KERNEL-IBTSEAL: foo: +; CHECK-KERNEL-IBTSEAL: endbr +; CHECK-KERNEL-IBTSEAL: bar: +; CHECK-KERNEL-IBTSEAL-NOT: endbr + +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo() { + ret void +} + +define dso_local i8* @bar() { + ret i8* bitcast (void ()* @foo to i8*) +} + +!llvm.module.flags = !{!1} +!1 = !{i32 4, !"ibt-seal", i32 1} diff --git a/llvm/test/CodeGen/X86/ibtseal-large.ll b/llvm/test/CodeGen/X86/ibtseal-large.ll new file mode 100644 index 0000000000000..e48ac8eb19a7b --- /dev/null +++ b/llvm/test/CodeGen/X86/ibtseal-large.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-unknown-linux-gnu -x86-indirect-branch-tracking --code-model=large | FileCheck %s --check-prefix=CHECK-LARGE-IBTSEAL + +; CHECK-LARGE-IBTSEAL: foo: +; CHECK-LARGE-IBTSEAL: endbr +; CHECK-LARGE-IBTSEAL: bar: +; CHECK-LARGE-IBTSEAL: endbr + +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo() { + ret void +} + +define dso_local i8* @bar() { + ret i8* bitcast (void ()* @foo to i8*) +} + +!llvm.module.flags = !{!1} +!1 = !{i32 4, !"ibt-seal", i32 1} diff --git a/llvm/test/CodeGen/X86/ibtseal-small.ll b/llvm/test/CodeGen/X86/ibtseal-small.ll new file mode 100644 index 0000000000000..3d810d89a26c6 --- /dev/null +++ b/llvm/test/CodeGen/X86/ibtseal-small.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-unknown-linux-gnu -x86-indirect-branch-tracking --code-model=small | FileCheck %s --check-prefix=CHECK-SMALL-IBTSEAL + +; CHECK-SMALL-IBTSEAL: foo: +; CHECK-SMALL-IBTSEAL: endbr +; CHECK-SMALL-IBTSEAL: bar: +; CHECK-SMALL-IBTSEAL: endbr + +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo() { + ret void +} + +define dso_local i8* @bar() { + ret i8* bitcast (void ()* @foo to i8*) +} + +!llvm.module.flags = !{!1} +!1 = !{i32 4, !"ibt-seal", i32 1} From 7ee1c162cc53d37f717f9a138276ad64fa6863bc Mon Sep 17 00:00:00 2001 From: Wu Xinlong <821408745@qq.com> Date: Thu, 20 Jan 2022 16:35:55 +0800 Subject: [PATCH 125/946] [RISCV][RFC] add inst support of zbkb This commit add instructions supports of `zbkb` which defined in scalar cryptography extension version v1.0.0 (has been ratified already). Most of the zbkb directives reuse parts of the zbp and zbb directives, so this patch just modified some of the inst aliases and predicates. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117640 --- llvm/lib/Support/RISCVISAInfo.cpp | 2 + llvm/lib/Target/RISCV/RISCV.td | 20 +++++++++ llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 48 +++++++++++++--------- llvm/lib/Target/RISCV/RISCVSchedRocket.td | 2 +- llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 2 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 2 + llvm/test/CodeGen/RISCV/attributes.ll | 4 ++ llvm/test/CodeGen/RISCV/rv32zbp.ll | 14 +++---- llvm/test/CodeGen/RISCV/rv64zbp.ll | 4 +- llvm/test/MC/RISCV/attribute-arch.s | 3 ++ llvm/test/MC/RISCV/rv32zbkb-only-valid.s | 16 ++++++++ llvm/test/MC/RISCV/rv32zbkb-valid.s | 45 ++++++++++++++++++++ llvm/test/MC/RISCV/rv32zbp-aliases-valid.s | 10 +---- llvm/test/MC/RISCV/rv32zbp-only-valid.s | 6 +++ llvm/test/MC/RISCV/rv32zbp-valid.s | 2 +- llvm/test/MC/RISCV/rv64-zbkb-valid.s | 20 +++++++++ llvm/test/MC/RISCV/rv64zbkb-only-valid.s | 9 ++++ llvm/test/MC/RISCV/rv64zbp-aliases-valid.s | 2 +- 18 files changed, 170 insertions(+), 41 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv32zbkb-only-valid.s create mode 100644 llvm/test/MC/RISCV/rv32zbkb-valid.s create mode 100644 llvm/test/MC/RISCV/rv64-zbkb-valid.s create mode 100644 llvm/test/MC/RISCV/rv64zbkb-only-valid.s diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index fc52fc6803439..0d9b6e4fa4bb5 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -55,6 +55,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zbb", RISCVExtensionVersion{1, 0}}, {"zbc", RISCVExtensionVersion{1, 0}}, {"zbs", RISCVExtensionVersion{1, 0}}, + + {"zbkb", RISCVExtensionVersion{1, 0}}, }; static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index b5d2bd01d3552..378720bc6b26d 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -143,6 +143,26 @@ def HasStdExtZbbOrZbp "'Zbb' (Base 'B' Instructions) or " "'Zbp' (Permutation 'B' Instructions)">; +def FeatureStdExtZbkb + : SubtargetFeature<"zbkb", "HasStdExtZbkb", "true", + "'Zbkb' (Bitmanip instructions for Cryptography)">; +def HasStdExtZbkb : Predicate<"Subtarget->hasStdExtZbkb()">, + AssemblerPredicate<(all_of FeatureStdExtZbkb), + "'Zbkb' (Bitmanip instructions for Cryptography)">; + +def HasStdExtZbpOrZbkb + : Predicate<"Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">, + AssemblerPredicate<(any_of FeatureStdExtZbp, FeatureStdExtZbkb), + "'Zbp' (Permutation 'B' Instructions) or " + "'Zbkb' (Bitmanip instructions for Cryptography)">; + +def HasStdExtZbbOrZbpOrZbkb + : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">, + AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp, FeatureStdExtZbkb), + "'Zbb' (Base 'B' Instructions) or " + "'Zbp' (Permutation 'B' Instructions) or " + "'Zbkb' (Bitmanip instructions for Cryptography)">; + def FeatureNoRVCHints : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false", "Disable RVC Hint Instructions.">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index f23fd2adb665b..f8030b35da3a2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -313,14 +313,14 @@ class RVBTernaryImm5 funct2, bits<3> funct3, RISCVOpcode opcode, // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZbbOrZbp] in { +let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { def ANDN : ALU_rr<0b0100000, 0b111, "andn">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; def ORN : ALU_rr<0b0100000, 0b110, "orn">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; def XNOR : ALU_rr<0b0100000, 0b100, "xnor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; -} // Predicates = [HasStdExtZbbOrZbp] +} // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZba] in { def SH1ADD : ALU_rr<0b0010000, 0b010, "sh1add">, @@ -331,12 +331,12 @@ def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>; } // Predicates = [HasStdExtZba] -let Predicates = [HasStdExtZbbOrZbp] in { +let Predicates = [HasStdExtZbbOrZbpOrZbkb] in { def ROL : ALU_rr<0b0110000, 0b001, "rol">, Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>; def ROR : ALU_rr<0b0110000, 0b101, "ror">, Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>; -} // Predicates = [HasStdExtZbbOrZbp] +} // Predicates = [HasStdExtZbbOrZbpOrZbkb] let Predicates = [HasStdExtZbs] in { def BCLR : ALU_rr<0b0100100, 0b001, "bclr">, @@ -360,7 +360,7 @@ def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>; def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>; } // Predicates = [HasStdExtZbp] -let Predicates = [HasStdExtZbbOrZbp] in +let Predicates = [HasStdExtZbbOrZbpOrZbkb] in def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[WriteRotateImm, ReadRotateImm]>; @@ -471,11 +471,13 @@ def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>; def BCOMPRESS : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>; } // Predicates = [HasStdExtZbe] -let Predicates = [HasStdExtZbp] in { +let Predicates = [HasStdExtZbpOrZbkb] in { def PACK : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>; -def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>; def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>; -} // Predicates = [HasStdExtZbp] +} // Predicates = [HasStdExtZbpOrZbkb] + +let Predicates = [HasStdExtZbp] in +def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>; let Predicates = [HasStdExtZbm, IsRV64] in { def BMATOR : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>; @@ -504,7 +506,7 @@ def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>; } // Predicates = [HasStdExtZbb, IsRV64] -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">, Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>; def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, @@ -520,7 +522,7 @@ let Predicates = [HasStdExtZbp, IsRV64] in { def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>; } // Predicates = [HasStdExtZbp, IsRV64] -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in +let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[WriteRotateImm32, ReadRotateImm32]>; @@ -559,10 +561,11 @@ def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>; def BCOMPRESSW : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>; } // Predicates = [HasStdExtZbe, IsRV64] -let Predicates = [HasStdExtZbp, IsRV64] in { +let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in def PACKW : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>; + +let Predicates = [HasStdExtZbp, IsRV64] in def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>; -} // Predicates = [HasStdExtZbp, IsRV64] let Predicates = [HasStdExtZbf, IsRV64] in def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">, @@ -593,21 +596,21 @@ def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd), // causes diagnostics to suggest that Zbp rather than Zbb is required for rev8 // or gorci. Since Zbb is closer to being finalized than Zbp this will be // misleading to users. -let Predicates = [HasStdExtZbbOrZbp, IsRV32] in { +let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def REV8_RV32 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> { let imm12 = { 0b01101, 0b0011000 }; } -} // Predicates = [HasStdExtZbbOrZbp, IsRV32] +} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32] -let Predicates = [HasStdExtZbbOrZbp, IsRV64] in { +let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def REV8_RV64 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> { let imm12 = { 0b01101, 0b0111000 }; } -} // Predicates = [HasStdExtZbbOrZbp, IsRV64] +} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] let Predicates = [HasStdExtZbbOrZbp] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -617,6 +620,15 @@ def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), } } // Predicates = [HasStdExtZbbOrZbp] +let Predicates = [HasStdExtZbpOrZbkb] in +def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">; + +let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in { +def ZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">; +def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">; +} // Predicates = [HasStdExtZbkb, IsRV32] + + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// @@ -631,11 +643,11 @@ def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>; def : InstAlias<"rev.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00011)>; def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>; def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>; -def : InstAlias<"rev.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00111)>; def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>; def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>; def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>; def : InstAlias<"rev.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01111)>; +def : InstAlias<"rev.b $rd, $rs", (BREV8 GPR:$rd, GPR:$rs)>; def : InstAlias<"zip.n $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b0001)>; def : InstAlias<"unzip.n $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>; @@ -675,8 +687,6 @@ def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>; def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>; def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>; def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>; -def : InstAlias<"zip $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1111)>; -def : InstAlias<"unzip $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>; def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>; def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>; diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index b907ada3a1d5a..6cc24fa17c84a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = false; - let UnsupportedFeatures = [HasVInstructions, HasVInstructionsI64]; + let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions, HasVInstructionsI64]; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 5672637a40cc2..2da8c14088890 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = 0; - let UnsupportedFeatures = [HasVInstructions]; + let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions]; } // The SiFive7 microarchitecture has two pipelines: A and B. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 4b5958ad38d9c..141e7114b5883 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -83,6 +83,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasStdExtZve64d = false; bool HasStdExtZfhmin = false; bool HasStdExtZfh = false; + bool HasStdExtZbkb = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; @@ -156,6 +157,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; } bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } + bool hasStdExtZbkb() const { return HasStdExtZbkb; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 59790c9967111..dd4a340edeac4 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -19,6 +19,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV32ZBT %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s ; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s +; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s @@ -38,6 +39,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV64ZBT %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s ; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s +; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s ; RV32M: .attribute 5, "rv32i2p0_m2p0" @@ -59,6 +61,7 @@ ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93" ; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" ; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64A: .attribute 5, "rv64i2p0_a2p0" @@ -79,6 +82,7 @@ ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93" ; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" ; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0" define i32 @addi(i32 %a) { %1 = add i32 %a, 1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll index 026a27b691196..1e2b0322b3688 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -1491,7 +1491,7 @@ define i32 @grev7_i32(i32 %a) nounwind { ; ; RV32ZBP-LABEL: grev7_i32: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: rev.b a0, a0 +; RV32ZBP-NEXT: grevi a0, a0, 7 ; RV32ZBP-NEXT: ret %and1 = shl i32 %a, 1 %shl1 = and i32 %and1, -1431655766 @@ -1560,8 +1560,8 @@ define i64 @grev7_i64(i64 %a) nounwind { ; ; RV32ZBP-LABEL: grev7_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: rev.b a0, a0 -; RV32ZBP-NEXT: rev.b a1, a1 +; RV32ZBP-NEXT: grevi a0, a0, 7 +; RV32ZBP-NEXT: grevi a1, a1, 7 ; RV32ZBP-NEXT: ret %and1 = shl i64 %a, 1 %shl1 = and i64 %and1, -6148914691236517206 @@ -2175,7 +2175,7 @@ define zeroext i8 @bitreverse_i8(i8 zeroext %a) nounwind { ; ; RV32ZBP-LABEL: bitreverse_i8: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: rev.b a0, a0 +; RV32ZBP-NEXT: grevi a0, a0, 7 ; RV32ZBP-NEXT: ret %1 = tail call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %1 @@ -2450,7 +2450,7 @@ define i32 @bitreverse_bswap_i32(i32 %a) { ; ; RV32ZBP-LABEL: bitreverse_bswap_i32: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: rev.b a0, a0 +; RV32ZBP-NEXT: grevi a0, a0, 7 ; RV32ZBP-NEXT: ret %1 = call i32 @llvm.bitreverse.i32(i32 %a) %2 = call i32 @llvm.bswap.i32(i32 %1) @@ -2539,8 +2539,8 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; ; RV32ZBP-LABEL: bitreverse_bswap_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: rev.b a0, a0 -; RV32ZBP-NEXT: rev.b a1, a1 +; RV32ZBP-NEXT: grevi a0, a0, 7 +; RV32ZBP-NEXT: grevi a1, a1, 7 ; RV32ZBP-NEXT: ret %1 = call i64 @llvm.bitreverse.i64(i64 %a) %2 = call i64 @llvm.bswap.i64(i64 %1) diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll index 674ffcff180de..5b74675cdb3ff 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -1438,7 +1438,7 @@ define i64 @grev7_i64(i64 %a) nounwind { ; ; RV64ZBP-LABEL: grev7_i64: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: rev.b a0, a0 +; RV64ZBP-NEXT: grevi a0, a0, 7 ; RV64ZBP-NEXT: ret %and1 = shl i64 %a, 1 %shl1 = and i64 %and1, -6148914691236517206 @@ -2481,7 +2481,7 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; ; RV64ZBP-LABEL: bitreverse_bswap_i64: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: rev.b a0, a0 +; RV64ZBP-NEXT: grevi a0, a0, 7 ; RV64ZBP-NEXT: ret %1 = call i64 @llvm.bitreverse.i64(i64 %a) %2 = call i64 @llvm.bswap.i64(i64 %1) diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 3d7be42caeb56..703b6a6aa8105 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -127,3 +127,6 @@ .attribute arch, "rv32ifd_zve64d0p10" # CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10" + +.attribute arch, "rv32i_zbkb1p0" +# CHECK: attribute 5, "rv32i2p0_zbkb1p0" diff --git a/llvm/test/MC/RISCV/rv32zbkb-only-valid.s b/llvm/test/MC/RISCV/rv32zbkb-only-valid.s new file mode 100644 index 0000000000000..a04ff04ba3f10 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zbkb-only-valid.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkb -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkb < %s \ +# RUN: | llvm-objdump --mattr=+zbkb -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: rev8 t0, t1 +# CHECK-ASM: encoding: [0x93,0x52,0x83,0x69] +rev8 t0, t1 + +# CHECK-ASM-AND-OBJ: zip t0, t1 +# CHECK-ASM: encoding: [0x93,0x12,0xf3,0x08] +zip t0, t1 +# CHECK-S-OBJ-NOALIAS: unzip t0, t1 +# CHECK-ASM: encoding: [0x93,0x52,0xf3,0x08] +unzip t0, t1 diff --git a/llvm/test/MC/RISCV/rv32zbkb-valid.s b/llvm/test/MC/RISCV/rv32zbkb-valid.s new file mode 100644 index 0000000000000..36e0a27922776 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zbkb-valid.s @@ -0,0 +1,45 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkb -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zbkb -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkb < %s \ +# RUN: | llvm-objdump --mattr=+zbkb -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkb < %s \ +# RUN: | llvm-objdump --mattr=+zbkb -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: ror t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x52,0x73,0x60] +ror t0, t1, t2 +# CHECK-ASM-AND-OBJ: rol t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x12,0x73,0x60] +rol t0, t1, t2 +# CHECK-ASM-AND-OBJ: rori t0, t1, 31 +# CHECK-ASM: encoding: [0x93,0x52,0xf3,0x61] +rori t0, t1, 31 +# CHECK-ASM-AND-OBJ: rori t0, t1, 0 +# CHECK-ASM: encoding: [0x93,0x52,0x03,0x60] +rori t0, t1, 0 + +# CHECK-ASM-AND-OBJ: andn t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x72,0x73,0x40] +andn t0, t1, t2 +# CHECK-ASM-AND-OBJ: orn t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x62,0x73,0x40] +orn t0, t1, t2 +# CHECK-ASM-AND-OBJ: xnor t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x42,0x73,0x40] +xnor t0, t1, t2 + +# CHECK-ASM: pack t0, t1, zero +# CHECK-OBJ: zext.h t0, t1 +# CHECK-ASM: encoding: [0xb3,0x42,0x03,0x08] +pack t0, t1, x0 +# CHECK-ASM-AND-OBJ: packh t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x72,0x73,0x08] +packh t0, t1, t2 + +# CHECK-ASM-AND-OBJ: brev8 t0, t1 +# CHECK-ASM: encoding: [0x93,0x52,0x73,0x68] +brev8 t0, t1 diff --git a/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s b/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s index dd14f49840a06..6472394cee260 100644 --- a/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s @@ -39,7 +39,7 @@ rev4.b x5, x6 # CHECK-S-OBJ: rev2.b t0, t1 rev2.b x5, x6 -# CHECK-S-OBJ-NOALIAS: grevi t0, t1, 7 +# CHECK-S-OBJ-NOALIAS: brev8 t0, t1 # CHECK-S-OBJ: rev.b t0, t1 rev.b x5, x6 @@ -151,14 +151,6 @@ zip2 x5, x6 # CHECK-S-OBJ: unzip2 t0, t1 unzip2 x5, x6 -# CHECK-S-OBJ-NOALIAS: shfli t0, t1, 15 -# CHECK-S-OBJ: zip t0, t1 -zip x5, x6 - -# CHECK-S-OBJ-NOALIAS: unshfli t0, t1, 15 -# CHECK-S-OBJ: unzip t0, t1 -unzip x5, x6 - # CHECK-S-OBJ-NOALIAS: gorci t0, t1, 1 # CHECK-S-OBJ: orc.p t0, t1 orc.p x5, x6 diff --git a/llvm/test/MC/RISCV/rv32zbp-only-valid.s b/llvm/test/MC/RISCV/rv32zbp-only-valid.s index a7aba5ee258f6..42f462e98a21a 100644 --- a/llvm/test/MC/RISCV/rv32zbp-only-valid.s +++ b/llvm/test/MC/RISCV/rv32zbp-only-valid.s @@ -13,3 +13,9 @@ pack t0, t1, x0 # CHECK-OBJ: rev8 t0, t1 # CHECK-ASM: encoding: [0x93,0x52,0x83,0x69] grevi t0, t1, 24 +# CHECK-ASM-AND-OBJ: zip t0, t1 +# CHECK-ASM: encoding: [0x93,0x12,0xf3,0x08] +zip x5, x6 +# CHECK-ASM-AND-OBJ: unzip t0, t1 +# CHECK-ASM: encoding: [0x93,0x52,0xf3,0x08] +unzip x5, x6 diff --git a/llvm/test/MC/RISCV/rv32zbp-valid.s b/llvm/test/MC/RISCV/rv32zbp-valid.s index 32474d0ead90c..5cb23fc38b242 100644 --- a/llvm/test/MC/RISCV/rv32zbp-valid.s +++ b/llvm/test/MC/RISCV/rv32zbp-valid.s @@ -54,5 +54,5 @@ xperm.n t0, t1, t2 # CHECK-ASM: encoding: [0xb3,0x42,0x73,0x28] xperm.b t0, t1, t2 # CHECK-ASM-AND-OBJ: xperm.h t0, t1, t2 -# CHECK-ASM: encoding: [0xb3,0x62,0x73,0x28 +# CHECK-ASM: encoding: [0xb3,0x62,0x73,0x28] xperm.h t0, t1, t2 diff --git a/llvm/test/MC/RISCV/rv64-zbkb-valid.s b/llvm/test/MC/RISCV/rv64-zbkb-valid.s new file mode 100644 index 0000000000000..01e1f6a760c89 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64-zbkb-valid.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkb < %s \ +# RUN: | llvm-objdump --mattr=+zbkb -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: rorw t0, t1, t2 +# CHECK-ASM: encoding: [0xbb,0x52,0x73,0x60] +rorw t0, t1, t2 +# CHECK-ASM-AND-OBJ: rolw t0, t1, t2 +# CHECK-ASM: encoding: [0xbb,0x12,0x73,0x60] +rolw t0, t1, t2 +# CHECK-ASM-AND-OBJ: roriw t0, t1, 31 +# CHECK-ASM: encoding: [0x9b,0x52,0xf3,0x61] +roriw t0, t1, 31 +# CHECK-ASM-AND-OBJ: roriw t0, t1, 0 +# CHECK-ASM: encoding: [0x9b,0x52,0x03,0x60] +roriw t0, t1, 0 + +# CHECK-ASM-AND-OBJ: packw t0, t1, t2 +# CHECK-ASM: encoding: [0xbb,0x42,0x73,0x08] +packw t0, t1, t2 diff --git a/llvm/test/MC/RISCV/rv64zbkb-only-valid.s b/llvm/test/MC/RISCV/rv64zbkb-only-valid.s new file mode 100644 index 0000000000000..2d3e7381b3560 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zbkb-only-valid.s @@ -0,0 +1,9 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zbkb -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkb < %s \ +# RUN: | llvm-objdump --mattr=+zbkb -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: rev8 t0, t1 +# CHECK-ASM: encoding: [0x93,0x52,0x83,0x6b] +rev8 t0, t1 diff --git a/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s b/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s index 6c51d96ab6b13..c1474afec5271 100644 --- a/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s @@ -39,7 +39,7 @@ rev4.b x5, x6 # CHECK-S-OBJ: rev2.b t0, t1 rev2.b x5, x6 -# CHECK-S-OBJ-NOALIAS: grevi t0, t1, 7 +# CHECK-S-OBJ-NOALIAS: brev8 t0, t1 # CHECK-S-OBJ: rev.b t0, t1 rev.b x5, x6 From 7b3d30728816403d1fd73cc5082e9fb761262bce Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 20 Jan 2022 20:43:48 -0800 Subject: [PATCH 126/946] [RISCV] Add isel patterns for grevi, shfli, and unshfli to brev8/zip/unzip instructions. Zbkb supports some encodings of the general grevi, shfli, and unshfli instructions legal, so we added separate instructions for those encodings to improve the diagnostics for assembler and disassembler. To be consistent we should always use these separate instructions whenever those specific encodings of grevi/shfli/unshfli occur. So this patch adds specific isel patterns to override the generic isel patterns for these cases. Similar was done for rev8 and zext.h for Zbb previously. --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 8 ++++++++ llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll | 18 ++++++++++++++++++ llvm/test/CodeGen/RISCV/rv32zbp.ll | 14 +++++++------- llvm/test/CodeGen/RISCV/rv64zbp.ll | 4 ++-- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index f8030b35da3a2..aae646f02f13f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -687,6 +687,7 @@ def : InstAlias<"zip4 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1100)>; def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>; def : InstAlias<"zip2 $rd, $rs", (SHFLI GPR:$rd, GPR:$rs, 0b1110)>; def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>; +// zip and unzip are considered instructions rather than an alias. def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>; def : InstAlias<"orc8 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b11000)>; @@ -865,6 +866,9 @@ def : PatGprImm; def : PatGprImm; def : PatGprImm; def : PatGprImm; + +// We treat brev8 as a separate instruction, so match it directly. +def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>; } // Predicates = [HasStdExtZbp] let Predicates = [HasStdExtZbp, IsRV64] in @@ -876,6 +880,10 @@ def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)> // We treat rev8 as a separate instruction, so match it directly. def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>; + +// We treat zip and unzip as separate instructions, so match it directly. +def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>; +def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>; } // Predicates = [HasStdExtZbp, IsRV32] let Predicates = [HasStdExtZbp, IsRV64] in { diff --git a/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll index 6285804190ac2..4f1dd3d588844 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll @@ -92,6 +92,15 @@ define i32 @shfli32(i32 %a) nounwind { ret i32 %tmp } +define i32 @zipi32(i32 %a) nounwind { +; RV32ZBP-LABEL: zipi32: +; RV32ZBP: # %bb.0: +; RV32ZBP-NEXT: zip a0, a0 +; RV32ZBP-NEXT: ret + %tmp = call i32 @llvm.riscv.shfl.i32(i32 %a, i32 15) + ret i32 %tmp +} + declare i32 @llvm.riscv.unshfl.i32(i32 %a, i32 %b) define i32 @unshfl32(i32 %a, i32 %b) nounwind { @@ -122,6 +131,15 @@ define i32 @unshfli32(i32 %a) nounwind { ret i32 %tmp } +define i32 @unzipi32(i32 %a) nounwind { +; RV32ZBP-LABEL: unzipi32: +; RV32ZBP: # %bb.0: +; RV32ZBP-NEXT: unzip a0, a0 +; RV32ZBP-NEXT: ret + %tmp = call i32 @llvm.riscv.unshfl.i32(i32 %a, i32 15) + ret i32 %tmp +} + declare i32 @llvm.riscv.xperm.n.i32(i32 %a, i32 %b) define i32 @xpermn32(i32 %a, i32 %b) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll index 1e2b0322b3688..026a27b691196 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -1491,7 +1491,7 @@ define i32 @grev7_i32(i32 %a) nounwind { ; ; RV32ZBP-LABEL: grev7_i32: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: grevi a0, a0, 7 +; RV32ZBP-NEXT: rev.b a0, a0 ; RV32ZBP-NEXT: ret %and1 = shl i32 %a, 1 %shl1 = and i32 %and1, -1431655766 @@ -1560,8 +1560,8 @@ define i64 @grev7_i64(i64 %a) nounwind { ; ; RV32ZBP-LABEL: grev7_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: grevi a0, a0, 7 -; RV32ZBP-NEXT: grevi a1, a1, 7 +; RV32ZBP-NEXT: rev.b a0, a0 +; RV32ZBP-NEXT: rev.b a1, a1 ; RV32ZBP-NEXT: ret %and1 = shl i64 %a, 1 %shl1 = and i64 %and1, -6148914691236517206 @@ -2175,7 +2175,7 @@ define zeroext i8 @bitreverse_i8(i8 zeroext %a) nounwind { ; ; RV32ZBP-LABEL: bitreverse_i8: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: grevi a0, a0, 7 +; RV32ZBP-NEXT: rev.b a0, a0 ; RV32ZBP-NEXT: ret %1 = tail call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %1 @@ -2450,7 +2450,7 @@ define i32 @bitreverse_bswap_i32(i32 %a) { ; ; RV32ZBP-LABEL: bitreverse_bswap_i32: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: grevi a0, a0, 7 +; RV32ZBP-NEXT: rev.b a0, a0 ; RV32ZBP-NEXT: ret %1 = call i32 @llvm.bitreverse.i32(i32 %a) %2 = call i32 @llvm.bswap.i32(i32 %1) @@ -2539,8 +2539,8 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; ; RV32ZBP-LABEL: bitreverse_bswap_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: grevi a0, a0, 7 -; RV32ZBP-NEXT: grevi a1, a1, 7 +; RV32ZBP-NEXT: rev.b a0, a0 +; RV32ZBP-NEXT: rev.b a1, a1 ; RV32ZBP-NEXT: ret %1 = call i64 @llvm.bitreverse.i64(i64 %a) %2 = call i64 @llvm.bswap.i64(i64 %1) diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll index 5b74675cdb3ff..674ffcff180de 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -1438,7 +1438,7 @@ define i64 @grev7_i64(i64 %a) nounwind { ; ; RV64ZBP-LABEL: grev7_i64: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: grevi a0, a0, 7 +; RV64ZBP-NEXT: rev.b a0, a0 ; RV64ZBP-NEXT: ret %and1 = shl i64 %a, 1 %shl1 = and i64 %and1, -6148914691236517206 @@ -2481,7 +2481,7 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; ; RV64ZBP-LABEL: bitreverse_bswap_i64: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: grevi a0, a0, 7 +; RV64ZBP-NEXT: rev.b a0, a0 ; RV64ZBP-NEXT: ret %1 = call i64 @llvm.bitreverse.i64(i64 %a) %2 = call i64 @llvm.bswap.i64(i64 %1) From 26167cae45154d326aba88c8e8ca72e2bc42ea30 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 21 Jan 2022 05:15:46 +0000 Subject: [PATCH 127/946] Print the `// ----` separator between modules when using -split-input-file with mlir-opt This allows to pipe sequences of `mlir-opt -split-input-file | mlir-opt -split-input-file`. Depends On D117750 Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D117756 --- mlir/lib/Support/MlirOptMain.cpp | 10 ++++++---- mlir/test/Dialect/OpenACC/ops.mlir | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Support/MlirOptMain.cpp b/mlir/lib/Support/MlirOptMain.cpp index 5c3f7a7732403..af4681f50ccbd 100644 --- a/mlir/lib/Support/MlirOptMain.cpp +++ b/mlir/lib/Support/MlirOptMain.cpp @@ -158,10 +158,12 @@ LogicalResult mlir::MlirOptMain(raw_ostream &outputStream, return splitAndProcessBuffer( std::move(buffer), [&](std::unique_ptr chunkBuffer, raw_ostream &os) { - return processBuffer(os, std::move(chunkBuffer), verifyDiagnostics, - verifyPasses, allowUnregisteredDialects, - preloadDialectsInContext, passManagerSetupFn, - registry, threadPool); + LogicalResult result = processBuffer( + os, std::move(chunkBuffer), verifyDiagnostics, verifyPasses, + allowUnregisteredDialects, preloadDialectsInContext, + passManagerSetupFn, registry, threadPool); + os << "// -----\n"; + return result; }, outputStream); diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 9ced56acfe74b..05e8026b554ca 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt -split-input-file %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: mlir-opt -split-input-file %s | mlir-opt -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt -split-input-file %s | mlir-opt -split-input-file | FileCheck %s // Verify the generic form can be parsed. -// RUN: mlir-opt -split-input-file -mlir-print-op-generic %s | mlir-opt -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt -split-input-file -mlir-print-op-generic %s | mlir-opt -split-input-file | FileCheck %s func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x10xf32>) -> memref<10x10xf32> { %c0 = arith.constant 0 : index From 308d8b8c6618f570166bcc7dbb87f97c04bba1b2 Mon Sep 17 00:00:00 2001 From: Rahul Kayaith Date: Fri, 21 Jan 2022 05:21:00 +0000 Subject: [PATCH 128/946] [mlir][python] 8b/16b DenseIntElements access This extends dense attribute element access to support 8b and 16b ints. Also extends the corresponding parts of the C api. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D117731 --- mlir/include/mlir-c/BuiltinAttributes.h | 8 +++++ mlir/lib/Bindings/Python/IRAttributes.cpp | 12 +++++++ mlir/lib/CAPI/IR/BuiltinAttributes.cpp | 16 +++++++++ mlir/test/CAPI/ir.c | 11 ++++++ mlir/test/python/ir/attributes.py | 44 +++++++++++++++++++++++ 5 files changed, 91 insertions(+) diff --git a/mlir/include/mlir-c/BuiltinAttributes.h b/mlir/include/mlir-c/BuiltinAttributes.h index 5839cd3d2408a..973b7e99469c0 100644 --- a/mlir/include/mlir-c/BuiltinAttributes.h +++ b/mlir/include/mlir-c/BuiltinAttributes.h @@ -355,6 +355,10 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrUInt8Get( MlirType shapedType, intptr_t numElements, const uint8_t *elements); MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrInt8Get( MlirType shapedType, intptr_t numElements, const int8_t *elements); +MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrUInt16Get( + MlirType shapedType, intptr_t numElements, const uint16_t *elements); +MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrInt16Get( + MlirType shapedType, intptr_t numElements, const int16_t *elements); MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrUInt32Get( MlirType shapedType, intptr_t numElements, const uint32_t *elements); MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrInt32Get( @@ -416,6 +420,10 @@ MLIR_CAPI_EXPORTED int8_t mlirDenseElementsAttrGetInt8Value(MlirAttribute attr, intptr_t pos); MLIR_CAPI_EXPORTED uint8_t mlirDenseElementsAttrGetUInt8Value(MlirAttribute attr, intptr_t pos); +MLIR_CAPI_EXPORTED int16_t +mlirDenseElementsAttrGetInt16Value(MlirAttribute attr, intptr_t pos); +MLIR_CAPI_EXPORTED uint16_t +mlirDenseElementsAttrGetUInt16Value(MlirAttribute attr, intptr_t pos); MLIR_CAPI_EXPORTED int32_t mlirDenseElementsAttrGetInt32Value(MlirAttribute attr, intptr_t pos); MLIR_CAPI_EXPORTED uint32_t diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index fd44ffe6ba5fe..5d87641c379d8 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -673,6 +673,12 @@ class PyDenseIntElementsAttribute if (width == 1) { return mlirDenseElementsAttrGetBoolValue(*this, pos); } + if (width == 8) { + return mlirDenseElementsAttrGetUInt8Value(*this, pos); + } + if (width == 16) { + return mlirDenseElementsAttrGetUInt16Value(*this, pos); + } if (width == 32) { return mlirDenseElementsAttrGetUInt32Value(*this, pos); } @@ -683,6 +689,12 @@ class PyDenseIntElementsAttribute if (width == 1) { return mlirDenseElementsAttrGetBoolValue(*this, pos); } + if (width == 8) { + return mlirDenseElementsAttrGetInt8Value(*this, pos); + } + if (width == 16) { + return mlirDenseElementsAttrGetInt16Value(*this, pos); + } if (width == 32) { return mlirDenseElementsAttrGetInt32Value(*this, pos); } diff --git a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp index c20548bd47597..7b718da88ceef 100644 --- a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp @@ -426,6 +426,16 @@ MlirAttribute mlirDenseElementsAttrInt8Get(MlirType shapedType, const int8_t *elements) { return getDenseAttribute(shapedType, numElements, elements); } +MlirAttribute mlirDenseElementsAttrUInt16Get(MlirType shapedType, + intptr_t numElements, + const uint16_t *elements) { + return getDenseAttribute(shapedType, numElements, elements); +} +MlirAttribute mlirDenseElementsAttrInt16Get(MlirType shapedType, + intptr_t numElements, + const int16_t *elements) { + return getDenseAttribute(shapedType, numElements, elements); +} MlirAttribute mlirDenseElementsAttrUInt32Get(MlirType shapedType, intptr_t numElements, const uint32_t *elements) { @@ -530,6 +540,12 @@ int8_t mlirDenseElementsAttrGetInt8Value(MlirAttribute attr, intptr_t pos) { uint8_t mlirDenseElementsAttrGetUInt8Value(MlirAttribute attr, intptr_t pos) { return unwrap(attr).cast().getValues()[pos]; } +int16_t mlirDenseElementsAttrGetInt16Value(MlirAttribute attr, intptr_t pos) { + return unwrap(attr).cast().getValues()[pos]; +} +uint16_t mlirDenseElementsAttrGetUInt16Value(MlirAttribute attr, intptr_t pos) { + return unwrap(attr).cast().getValues()[pos]; +} int32_t mlirDenseElementsAttrGetInt32Value(MlirAttribute attr, intptr_t pos) { return unwrap(attr).cast().getValues()[pos]; } diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index d01ccaeb0e93a..257d5e9b8683d 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -904,6 +904,8 @@ int printBuiltinAttributes(MlirContext ctx) { int bools[] = {0, 1}; uint8_t uints8[] = {0u, 1u}; int8_t ints8[] = {0, 1}; + uint16_t uints16[] = {0u, 1u}; + int16_t ints16[] = {0, 1}; uint32_t uints32[] = {0u, 1u}; int32_t ints32[] = {0, 1}; uint64_t uints64[] = {0u, 1u}; @@ -921,6 +923,13 @@ int printBuiltinAttributes(MlirContext ctx) { MlirAttribute int8Elements = mlirDenseElementsAttrInt8Get( mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeGet(ctx, 8), encoding), 2, ints8); + MlirAttribute uint16Elements = mlirDenseElementsAttrUInt16Get( + mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeUnsignedGet(ctx, 16), + encoding), + 2, uints16); + MlirAttribute int16Elements = mlirDenseElementsAttrInt16Get( + mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeGet(ctx, 16), encoding), + 2, ints16); MlirAttribute uint32Elements = mlirDenseElementsAttrUInt32Get( mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeUnsignedGet(ctx, 32), encoding), @@ -956,6 +965,8 @@ int printBuiltinAttributes(MlirContext ctx) { if (mlirDenseElementsAttrGetBoolValue(boolElements, 1) != 1 || mlirDenseElementsAttrGetUInt8Value(uint8Elements, 1) != 1 || mlirDenseElementsAttrGetInt8Value(int8Elements, 1) != 1 || + mlirDenseElementsAttrGetUInt16Value(uint16Elements, 1) != 1 || + mlirDenseElementsAttrGetInt16Value(int16Elements, 1) != 1 || mlirDenseElementsAttrGetUInt32Value(uint32Elements, 1) != 1 || mlirDenseElementsAttrGetInt32Value(int32Elements, 1) != 1 || mlirDenseElementsAttrGetUInt64Value(uint64Elements, 1) != 1 || diff --git a/mlir/test/python/ir/attributes.py b/mlir/test/python/ir/attributes.py index 5f8dd0ad1183f..48f2d4b3df067 100644 --- a/mlir/test/python/ir/attributes.py +++ b/mlir/test/python/ir/attributes.py @@ -292,6 +292,50 @@ def testDenseIntAttr(): print(ShapedType(a.type).element_type) +# CHECK-LABEL: TEST: testDenseIntAttrGetItem +@run +def testDenseIntAttrGetItem(): + def print_item(attr_asm): + attr = DenseIntElementsAttr(Attribute.parse(attr_asm)) + dtype = ShapedType(attr.type).element_type + try: + item = attr[0] + print(f"{dtype}:", item) + except TypeError as e: + print(f"{dtype}:", e) + + with Context(): + # CHECK: i1: 1 + print_item("dense : tensor") + # CHECK: i8: 123 + print_item("dense<123> : tensor") + # CHECK: i16: 123 + print_item("dense<123> : tensor") + # CHECK: i32: 123 + print_item("dense<123> : tensor") + # CHECK: i64: 123 + print_item("dense<123> : tensor") + # CHECK: ui8: 123 + print_item("dense<123> : tensor") + # CHECK: ui16: 123 + print_item("dense<123> : tensor") + # CHECK: ui32: 123 + print_item("dense<123> : tensor") + # CHECK: ui64: 123 + print_item("dense<123> : tensor") + # CHECK: si8: -123 + print_item("dense<-123> : tensor") + # CHECK: si16: -123 + print_item("dense<-123> : tensor") + # CHECK: si32: -123 + print_item("dense<-123> : tensor") + # CHECK: si64: -123 + print_item("dense<-123> : tensor") + + # CHECK: i7: Unsupported integer type + print_item("dense<123> : tensor") + + # CHECK-LABEL: TEST: testDenseFPAttr @run def testDenseFPAttr(): From 9006bf424847bf91f0a624ffc27ad165c7b804c4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 21 Jan 2022 05:45:48 +0000 Subject: [PATCH 129/946] Remove obsolete `getAsmResultNames` from OpAsmDialectInterface This is superseded by the same method on OpAsmOpInterface, which is available on the Dialect through the Fallback mechanism, Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D117750 --- mlir/include/mlir/IR/OpImplementation.h | 4 ---- mlir/lib/IR/AsmPrinter.cpp | 14 ++++---------- mlir/test/IR/parser.mlir | 7 ++----- mlir/test/lib/Dialect/Test/TestDialect.cpp | 6 ------ 4 files changed, 6 insertions(+), 25 deletions(-) diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h index 56e7c72093219..77e858e484be7 100644 --- a/mlir/include/mlir/IR/OpImplementation.h +++ b/mlir/include/mlir/IR/OpImplementation.h @@ -1350,10 +1350,6 @@ class OpAsmDialectInterface return AliasResult::NoAlias; } - /// Get a special name to use when printing the given operation. See - /// OpAsmInterface.td#getAsmResultNames for usage details and documentation. - virtual void getAsmResultNames(Operation *op, - OpAsmSetValueNameFn setNameFn) const {} }; } // namespace mlir diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 6875aca46fcd2..f69d147c51b38 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -797,8 +797,7 @@ class SSANameState { /// A sentinel value used for values with names set. enum : unsigned { NameSentinel = ~0U }; - SSANameState(Operation *op, const OpPrintingFlags &printerFlags, - DialectInterfaceCollection &interfaces); + SSANameState(Operation *op, const OpPrintingFlags &printerFlags); /// Print the SSA identifier for the given value to 'stream'. If /// 'printResultNo' is true, it also presents the result number ('#' number) @@ -866,15 +865,12 @@ class SSANameState { /// These are the printing flags. They control, eg., whether to print in /// generic form. OpPrintingFlags printerFlags; - - DialectInterfaceCollection &interfaces; }; } // namespace SSANameState::SSANameState( - Operation *op, const OpPrintingFlags &printerFlags, - DialectInterfaceCollection &interfaces) - : printerFlags(printerFlags), interfaces(interfaces) { + Operation *op, const OpPrintingFlags &printerFlags) + : printerFlags(printerFlags) { llvm::SaveAndRestore valueIDSaver(nextValueID); llvm::SaveAndRestore argumentIDSaver(nextArgumentID); llvm::SaveAndRestore conflictIDSaver(nextConflictID); @@ -1071,8 +1067,6 @@ void SSANameState::numberValuesInOp(Operation &op) { if (!printerFlags.shouldPrintGenericOpForm()) { if (OpAsmOpInterface asmInterface = dyn_cast(&op)) asmInterface.getAsmResultNames(setResultNameFn); - else if (auto *asmInterface = interfaces.getInterfaceFor(op.getDialect())) - asmInterface->getAsmResultNames(&op, setResultNameFn); } // If the first result wasn't numbered, give it a default number. @@ -1172,7 +1166,7 @@ class AsmStateImpl { public: explicit AsmStateImpl(Operation *op, const OpPrintingFlags &printerFlags, AsmState::LocationMap *locationMap) - : interfaces(op->getContext()), nameState(op, printerFlags, interfaces), + : interfaces(op->getContext()), nameState(op, printerFlags), printerFlags(printerFlags), locationMap(locationMap) {} /// Initialize the alias state to enable the printing of aliases. diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index 636f4eea0c5ff..0c7bdd1e08bae 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -1209,18 +1209,15 @@ func private @string_attr_name() attributes {"0 . 0", nested = {"0 . 0"}} func private @nested_reference() attributes {test.ref = @some_symbol::@some_nested_symbol } // CHECK-LABEL: func @custom_asm_names -func @custom_asm_names() -> (i32, i32, i32, i32, i32, i32, i32) { +func @custom_asm_names() -> (i32, i32, i32, i32, i32, i32) { // CHECK: %[[FIRST:first.*]], %[[MIDDLE:middle_results.*]]:2, %[[LAST:[0-9]+]] %0, %1:2, %2 = "test.asm_interface_op"() : () -> (i32, i32, i32, i32) // CHECK: %[[FIRST_2:first.*]], %[[LAST_2:[0-9]+]] %3, %4 = "test.asm_interface_op"() : () -> (i32, i32) - // CHECK: %[[RESULT:result.*]] - %5 = "test.asm_dialect_interface_op"() : () -> (i32) - // CHECK: return %[[FIRST]], %[[MIDDLE]]#0, %[[MIDDLE]]#1, %[[LAST]], %[[FIRST_2]], %[[LAST_2]] - return %0, %1#0, %1#1, %2, %3, %4, %5 : i32, i32, i32, i32, i32, i32, i32 + return %0, %1#0, %1#1, %2, %3, %4 : i32, i32, i32, i32, i32, i32 } diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index 4d316bd58a8ad..2b915a9fd6c2a 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -99,12 +99,6 @@ struct TestOpAsmInterface : public OpAsmDialectInterface { } return AliasResult::NoAlias; } - - void getAsmResultNames(Operation *op, - OpAsmSetValueNameFn setNameFn) const final { - if (auto asmOp = dyn_cast(op)) - setNameFn(asmOp, "result"); - } }; struct TestDialectFoldInterface : public DialectFoldInterface { From a7f8aea71485c00db2ffb6288ae58475869048b1 Mon Sep 17 00:00:00 2001 From: Sockke Date: Fri, 21 Jan 2022 14:23:52 +0800 Subject: [PATCH 130/946] [clang-tidy] Fix wrong FixIt in performance-move-const-arg There are incorrect Fixit and missing warnings: case : A trivially-copyable object wrapped by std::move is passed to the function with rvalue reference parameters. Removing std::move will cause compilation errors. ``` void showInt(int&&) {} void testInt() { int a = 10; // expect: warning + nofix showInt(std::move(a)); // showInt(a) <--- wrong fix } struct Tmp {}; void showTmp(Tmp&&) {} void testTmp() { Tmp t; // expect: warning + nofix showTmp(std::move(t)); // showTmp(t) <--- wrong fix } ``` Reviewed By: aaron.ballman, Quuxplusone Differential Revision: https://reviews.llvm.org/D107450 --- .../performance/MoveConstArgCheck.cpp | 119 ++++++++++++++++-- .../performance/MoveConstArgCheck.h | 2 + clang-tools-extra/docs/ReleaseNotes.rst | 3 + .../checkers/performance-move-const-arg.cpp | 94 ++++++++++++++ 4 files changed, 205 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp index e946a1f39fe98..0e91451211aed 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp @@ -47,17 +47,62 @@ void MoveConstArgCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher(MoveCallMatcher, this); + auto ConstTypeParmMatcher = + qualType(references(isConstQualified())).bind("invocation-parm-type"); + auto RValueTypeParmMatcher = + qualType(rValueReferenceType()).bind("invocation-parm-type"); + // Matches respective ParmVarDecl for a CallExpr or CXXConstructExpr. + auto ArgumentWithParamMatcher = forEachArgumentWithParam( + MoveCallMatcher, parmVarDecl(anyOf(hasType(ConstTypeParmMatcher), + hasType(RValueTypeParmMatcher))) + .bind("invocation-parm")); + // Matches respective types of arguments for a CallExpr or CXXConstructExpr + // and it works on calls through function pointers as well. + auto ArgumentWithParamTypeMatcher = forEachArgumentWithParamType( + MoveCallMatcher, anyOf(ConstTypeParmMatcher, RValueTypeParmMatcher)); + Finder->addMatcher( - invocation(forEachArgumentWithParam( - MoveCallMatcher, - parmVarDecl(hasType(references(isConstQualified()))))) + invocation(anyOf(ArgumentWithParamMatcher, ArgumentWithParamTypeMatcher)) .bind("receiving-expr"), this); } +bool IsRValueReferenceParam(const Expr *Invocation, + const QualType &InvocationParmType, + const Expr *Arg) { + if (Invocation && InvocationParmType->isRValueReferenceType() && + Arg->isLValue()) { + if (!Invocation->getType()->isRecordType()) + return true; + else { + if (const auto *ConstructCallExpr = + dyn_cast(Invocation)) { + if (const auto *ConstructorDecl = ConstructCallExpr->getConstructor()) { + if (!ConstructorDecl->isCopyOrMoveConstructor() && + !ConstructorDecl->isDefaultConstructor()) + return true; + } + } + } + } + return false; +} + void MoveConstArgCheck::check(const MatchFinder::MatchResult &Result) { const auto *CallMove = Result.Nodes.getNodeAs("call-move"); const auto *ReceivingExpr = Result.Nodes.getNodeAs("receiving-expr"); + const auto *InvocationParm = + Result.Nodes.getNodeAs("invocation-parm"); + const auto *InvocationParmType = + Result.Nodes.getNodeAs("invocation-parm-type"); + + // Skipping matchers which have been matched. + if (!ReceivingExpr && AlreadyCheckedMoves.contains(CallMove)) + return; + + if (ReceivingExpr) + AlreadyCheckedMoves.insert(CallMove); + const Expr *Arg = CallMove->getArg(0); SourceManager &SM = Result.Context->getSourceManager(); @@ -90,20 +135,68 @@ void MoveConstArgCheck::check(const MatchFinder::MatchResult &Result) { return; bool IsVariable = isa(Arg); + // std::move shouldn't be removed when an lvalue wrapped by std::move is + // passed to the function with an rvalue reference parameter. + bool IsRVRefParam = + IsRValueReferenceParam(ReceivingExpr, *InvocationParmType, Arg); const auto *Var = IsVariable ? dyn_cast(Arg)->getDecl() : nullptr; - auto Diag = diag(FileMoveRange.getBegin(), - "std::move of the %select{|const }0" - "%select{expression|variable %4}1 " - "%select{|of the trivially-copyable type %5 }2" - "has no effect; remove std::move()" - "%select{| or make the variable non-const}3") - << IsConstArg << IsVariable << IsTriviallyCopyable - << (IsConstArg && IsVariable && !IsTriviallyCopyable) << Var - << Arg->getType(); - replaceCallWithArg(CallMove, Diag, SM, getLangOpts()); + { + auto Diag = diag(FileMoveRange.getBegin(), + "std::move of the %select{|const }0" + "%select{expression|variable %5}1 " + "%select{|of the trivially-copyable type %6 }2" + "has no effect%select{; remove std::move()|}3" + "%select{| or make the variable non-const}4") + << IsConstArg << IsVariable << IsTriviallyCopyable + << IsRVRefParam + << (IsConstArg && IsVariable && !IsTriviallyCopyable) << Var + << Arg->getType(); + if (!IsRVRefParam) + replaceCallWithArg(CallMove, Diag, SM, getLangOpts()); + } + if (IsRVRefParam) { + // Generate notes for an invocation with an rvalue reference parameter. + const auto *ReceivingCallExpr = dyn_cast(ReceivingExpr); + const auto *ReceivingConstructExpr = + dyn_cast(ReceivingExpr); + // Skipping the invocation which is a template instantiation. + if ((!ReceivingCallExpr || !ReceivingCallExpr->getDirectCallee() || + ReceivingCallExpr->getDirectCallee()->isTemplateInstantiation()) && + (!ReceivingConstructExpr || + !ReceivingConstructExpr->getConstructor() || + ReceivingConstructExpr->getConstructor()->isTemplateInstantiation())) + return; + + const NamedDecl *FunctionName = nullptr; + FunctionName = + ReceivingCallExpr + ? ReceivingCallExpr->getDirectCallee()->getUnderlyingDecl() + : ReceivingConstructExpr->getConstructor()->getUnderlyingDecl(); + + QualType NoRefType = (*InvocationParmType)->getPointeeType(); + PrintingPolicy PolicyWithSuppressedTag(getLangOpts()); + PolicyWithSuppressedTag.SuppressTagKeyword = true; + PolicyWithSuppressedTag.SuppressUnwrittenScope = true; + std::string ExpectParmTypeName = + NoRefType.getAsString(PolicyWithSuppressedTag); + if (!NoRefType->isPointerType()) { + NoRefType.addConst(); + ExpectParmTypeName = + NoRefType.getAsString(PolicyWithSuppressedTag) + " &"; + } + + diag(InvocationParm->getLocation(), + "consider changing the %ordinal0 parameter of %1 from %2 to '%3'", + DiagnosticIDs::Note) + << (InvocationParm->getFunctionScopeIndex() + 1) << FunctionName + << *InvocationParmType << ExpectParmTypeName; + } } else if (ReceivingExpr) { + if ((*InvocationParmType)->isRValueReferenceType()) + return; + auto Diag = diag(FileMoveRange.getBegin(), "passing result of std::move() as a const reference " "argument; no move will actually happen"); diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h index 28fe8d523d943..4a93e4c306e30 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h +++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MOVECONSTANTARGUMENTCHECK_H #include "../ClangTidyCheck.h" +#include "llvm/ADT/DenseSet.h" namespace clang { namespace tidy { @@ -36,6 +37,7 @@ class MoveConstArgCheck : public ClangTidyCheck { private: const bool CheckTriviallyCopyableMove; + llvm::DenseSet AlreadyCheckedMoves; }; } // namespace performance diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 683e914b7e863..e3c1c4b9411bb 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -175,6 +175,9 @@ Changes in existing checks option to control whether to warn on narrowing integer to floating-point conversions. +- Improved :doc:`performance-move-const-arg` check. + + Removed a wrong FixIt for trivially copyable objects wrapped by ``std::move()`` and passed to an rvalue reference parameter. Removal of ``std::move()`` would break the code. Removed checks ^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp index 06ed6e0b56b1c..c1e5761c538e9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp @@ -246,3 +246,97 @@ void lambda2() { }; f(MoveSemantics()); } + +void showInt(int &&v); +void showInt(int v1, int &&v2); +void showPointer(const char *&&s); +void showPointer2(const char *const &&s); +void showTriviallyCopyable(TriviallyCopyable &&obj); +void showTriviallyCopyablePointer(const TriviallyCopyable *&&obj); +void testFunctions() { + int a = 10; + showInt(std::move(a)); + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-10]]:20: note: consider changing the 1st parameter of 'showInt' from 'int &&' to 'const int &' + showInt(int()); + showInt(a, std::move(a)); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-13]]:28: note: consider changing the 2nd parameter of 'showInt' from 'int &&' to 'const int &' + const char* s = ""; + showPointer(std::move(s)); + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: std::move of the variable 's' of the trivially-copyable type 'const char *' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-16]]:32: note: consider changing the 1st parameter of 'showPointer' from 'const char *&&' to 'const char *' + showPointer2(std::move(s)); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: std::move of the variable 's' of the trivially-copyable type 'const char *' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-18]]:39: note: consider changing the 1st parameter of 'showPointer2' from 'const char *const &&' to 'const char *const' + TriviallyCopyable *obj = new TriviallyCopyable(); + showTriviallyCopyable(std::move(*obj)); + // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-21]]:48: note: consider changing the 1st parameter of 'showTriviallyCopyable' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &' + showTriviallyCopyablePointer(std::move(obj)); + // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: std::move of the variable 'obj' of the trivially-copyable type 'TriviallyCopyable *' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-23]]:62: note: consider changing the 1st parameter of 'showTriviallyCopyablePointer' from 'const TriviallyCopyable *&&' to 'const TriviallyCopyable *' +} +template +void forwardToShowInt(T && t) { + showInt(static_cast(t)); +} +void testTemplate() { + int a = 10; + forwardToShowInt(std::move(a)); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg] +} + +struct Tmp { + Tmp(); + Tmp(int &&a); + Tmp(int v1, int &&a); + Tmp(const char *&&s); + Tmp(TriviallyCopyable&& obj); + Tmp(const TriviallyCopyable *&&obj); + void showTmp(TriviallyCopyable&& t); + static void showTmpStatic(TriviallyCopyable&& t); +}; +void testMethods() { + Tmp t; + int a = 10; + Tmp t1(std::move(a)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-13]]:13: note: consider changing the 1st parameter of 'Tmp' from 'int &&' to 'const int &' + Tmp t2(a, std::move(a)); + // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-15]]:21: note: consider changing the 2nd parameter of 'Tmp' from 'int &&' to 'const int &' + const char* s = ""; + Tmp t3(std::move(s)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 's' of the trivially-copyable type 'const char *' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-18]]:21: note: consider changing the 1st parameter of 'Tmp' from 'const char *&&' to 'const char *' + TriviallyCopyable *obj = new TriviallyCopyable(); + Tmp t4(std::move(*obj)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-21]]:27: note: consider changing the 1st parameter of 'Tmp' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &' + Tmp t5(std::move(obj)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 'obj' of the trivially-copyable type 'TriviallyCopyable *' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-23]]:34: note: consider changing the 1st parameter of 'Tmp' from 'const TriviallyCopyable *&&' to 'const TriviallyCopyable *' + t.showTmp(std::move(*obj)); + // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-25]]:36: note: consider changing the 1st parameter of 'showTmp' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &' + Tmp::showTmpStatic(std::move(*obj)); + // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg] + // CHECK-MESSAGES: :[[@LINE-27]]:49: note: consider changing the 1st parameter of 'showTmpStatic' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &' +} + +void showA(A &&v) {} +void testA() { + A a; + showA(std::move(a)); +} + +void testFuncPointer() { + int a = 10; + void (*choice)(int, int &&); + choice = showInt; + choice(std::move(a), std::move(a)); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect; remove std::move() [performance-move-const-arg] + // CHECK-FIXES: choice(a, std::move(a)); + // CHECK-MESSAGES: :[[@LINE-3]]:24: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg] +} From 82bb8a588ddea104faadb54366e3d42b5857fc06 Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Thu, 20 Jan 2022 17:44:53 +0800 Subject: [PATCH 131/946] [CSKY] Add codegen support of GlobalTLSAddress lowering There are static and dynamic TLS address lowering in DAG stage according to different TLS model. It needs PseudoTLSLA32 pseudo to get address of TLS-related entry which resides in constant pool. --- llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp | 30 ++++ llvm/lib/Target/CSKY/CSKYAsmPrinter.h | 1 + llvm/lib/Target/CSKY/CSKYISelLowering.cpp | 115 ++++++++++++++ llvm/lib/Target/CSKY/CSKYISelLowering.h | 4 + llvm/test/CodeGen/CSKY/tls-models.ll | 179 ++++++++++++++++++++++ 5 files changed, 329 insertions(+) create mode 100644 llvm/test/CodeGen/CSKY/tls-models.ll diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp index cfe97b971c53d..c8269eeacfdb2 100644 --- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp +++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp @@ -58,6 +58,33 @@ void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { // instructions) auto-generated. #include "CSKYGenMCPseudoLowering.inc" +void CSKYAsmPrinter::expandTLSLA(const MachineInstr *MI) { + const CSKYInstrInfo *TII = Subtarget->getInstrInfo(); + + DebugLoc DL = MI->getDebugLoc(); + + MCSymbol *PCLabel = OutContext.getOrCreateSymbol( + Twine(MAI->getPrivateGlobalPrefix()) + "PC" + Twine(getFunctionNumber()) + + "_" + Twine(MI->getOperand(3).getImm())); + + OutStreamer->emitLabel(PCLabel); + + auto Instr = BuildMI(*MF, DL, TII->get(CSKY::LRW32)) + .add(MI->getOperand(0)) + .add(MI->getOperand(2)); + MCInst LRWInst; + MCInstLowering.Lower(Instr, LRWInst); + EmitToStreamer(*OutStreamer, LRWInst); + + Instr = BuildMI(*MF, DL, TII->get(CSKY::GRS32)) + .add(MI->getOperand(1)) + .addSym(PCLabel); + MCInst GRSInst; + MCInstLowering.Lower(Instr, GRSInst); + EmitToStreamer(*OutStreamer, GRSInst); + return; +} + void CSKYAsmPrinter::emitCustomConstantPool(const MachineInstr *MI) { // This instruction represents a floating constant pool in the function. @@ -102,6 +129,9 @@ void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) { InConstantPool = false; } + if (MI->getOpcode() == CSKY::PseudoTLSLA32) + return expandTLSLA(MI); + if (MI->getOpcode() == CSKY::CONSTPOOL_ENTRY) return emitCustomConstantPool(MI); diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h index 64c94f08eae4e..04a253d349c80 100644 --- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h +++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h @@ -26,6 +26,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter { /// MachineFunction. MachineConstantPool *MCP; + void expandTLSLA(const MachineInstr *MI); void emitCustomConstantPool(const MachineInstr *MI); public: diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index b253e10574a74..c4d5d687216d1 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -119,6 +119,8 @@ SDValue CSKYTargetLowering::LowerOperation(SDValue Op, return LowerGlobalAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::BlockAddress: @@ -1005,3 +1007,116 @@ Register CSKYTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return CSKY::R1; } + +SDValue CSKYTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT Ty = Op.getValueType(); + GlobalAddressSDNode *N = cast(Op); + int64_t Offset = N->getOffset(); + MVT XLenVT = MVT::i32; + + TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); + SDValue Addr; + switch (Model) { + case TLSModel::LocalExec: + Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); + break; + case TLSModel::InitialExec: + Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); + break; + case TLSModel::LocalDynamic: + case TLSModel::GeneralDynamic: + Addr = getDynamicTLSAddr(N, DAG); + break; + } + + // In order to maximise the opportunity for common subexpression elimination, + // emit a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + if (Offset != 0) + return DAG.getNode(ISD::ADD, DL, Ty, Addr, + DAG.getConstant(Offset, DL, XLenVT)); + return Addr; +} + +SDValue CSKYTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG, + bool UseGOT) const { + MachineFunction &MF = DAG.getMachineFunction(); + CSKYMachineFunctionInfo *CFI = MF.getInfo(); + + unsigned CSKYPCLabelIndex = CFI->createPICLabelUId(); + + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + + CSKYCP::CSKYCPModifier Flag = UseGOT ? CSKYCP::TLSIE : CSKYCP::TLSLE; + bool AddCurrentAddr = UseGOT ? true : false; + unsigned char PCAjust = UseGOT ? 4 : 0; + + CSKYConstantPoolValue *CPV = + CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, PCAjust, + Flag, AddCurrentAddr, CSKYPCLabelIndex); + SDValue CAddr = DAG.getTargetConstantPool(CPV, Ty); + + SDValue Load; + if (UseGOT) { + SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32); + auto *LRWGRS = DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty}, + {CAddr, PICLabel}); + auto LRWADDGRS = + DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1)); + Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), LRWADDGRS, + MachinePointerInfo(N->getGlobal())); + } else { + Load = SDValue(DAG.getMachineNode(CSKY::LRW32, DL, Ty, CAddr), 0); + } + + // Add the thread pointer. + SDValue TPReg = DAG.getRegister(CSKY::R31, MVT::i32); + return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); +} + +SDValue CSKYTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + CSKYMachineFunctionInfo *CFI = MF.getInfo(); + + unsigned CSKYPCLabelIndex = CFI->createPICLabelUId(); + + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); + + CSKYConstantPoolValue *CPV = + CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, 4, + CSKYCP::TLSGD, true, CSKYPCLabelIndex); + SDValue Addr = DAG.getTargetConstantPool(CPV, Ty); + SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32); + + auto *LRWGRS = + DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty}, {Addr, PICLabel}); + + auto Load = + DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1)); + + // Prepare argument list to generate call. + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Load; + Entry.Ty = CallTy; + Args.push_back(Entry); + + // Setup call to __tls_get_addr. + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, CallTy, + DAG.getExternalSymbol("__tls_get_addr", Ty), + std::move(Args)); + SDValue V = LowerCallTo(CLI).first; + + return V; +} diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h index 3b3218b015e8f..e1744d5ce2203 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.h +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h @@ -154,6 +154,10 @@ class CSKYTargetLowering : public TargetLowering { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, + bool UseGOT) const; + SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const; }; diff --git a/llvm/test/CodeGen/CSKY/tls-models.ll b/llvm/test/CodeGen/CSKY/tls-models.ll new file mode 100644 index 0000000000000..35dca36f515b2 --- /dev/null +++ b/llvm/test/CodeGen/CSKY/tls-models.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=csky -csky-no-aliases -relocation-model=pic -mattr=+2e3 < %s \ +; RUN: | FileCheck -check-prefix=CSKY-PIC %s +; RUN: llc -mtriple=csky -csky-no-aliases -mattr=+2e3 < %s | FileCheck -check-prefix=CSKY-NOPIC %s + +; Check that TLS symbols are lowered correctly based on the specified +; model. Make sure they're external to avoid them all being optimised to Local +; Exec for the executable. + +@unspecified = external thread_local global i32 +@ld = external thread_local(localdynamic) global i32 +@ie = external thread_local(initialexec) global i32 +@le = external thread_local(localexec) global i32 + + +; No model specified + +define i32* @f1() nounwind { +; CSKY-PIC-LABEL: f1: +; CSKY-PIC: # %bb.0: # %entry +; CSKY-PIC-NEXT: subi16 sp, sp, 8 +; CSKY-PIC-NEXT: st32.w rgb, (sp, 4) # 4-byte Folded Spill +; CSKY-PIC-NEXT: st32.w lr, (sp, 0) # 4-byte Folded Spill +; CSKY-PIC-NEXT: lrw32 rgb, [.LCPI0_0] +; CSKY-PIC-NEXT: .LPC0_1: +; CSKY-PIC-NEXT: lrw32 a0, [.LCPI0_1] +; CSKY-PIC-NEXT: grs32 a1, .LPC0_1 +; CSKY-PIC-NEXT: addu16 a0, a1 +; CSKY-PIC-NEXT: lrw32 a1, [.LCPI0_2] +; CSKY-PIC-NEXT: ldr32.w a1, (rgb, a1 << 0) +; CSKY-PIC-NEXT: jsr16 a1 +; CSKY-PIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload +; CSKY-PIC-NEXT: ld32.w rgb, (sp, 4) # 4-byte Folded Reload +; CSKY-PIC-NEXT: addi16 sp, sp, 8 +; CSKY-PIC-NEXT: rts16 +; CSKY-PIC-NEXT: .p2align 1 +; CSKY-PIC-NEXT: # %bb.1: +; CSKY-PIC-NEXT: .p2align 2 +; CSKY-PIC-NEXT: .LCPI0_0: +; CSKY-PIC-NEXT: .long _GLOBAL_OFFSET_TABLE_ +; CSKY-PIC-NEXT: .LCPI0_1: +; CSKY-PIC-NEXT: .Ltmp0: +; CSKY-PIC-NEXT: .long unspecified-(.LPC0_1-.Ltmp0)@TLSGD32 +; CSKY-PIC-NEXT: .LCPI0_2: +; CSKY-PIC-NEXT: .long __tls_get_addr@PLT +; +; CSKY-NOPIC-LABEL: f1: +; CSKY-NOPIC: # %bb.0: # %entry +; CSKY-NOPIC-NEXT: .LPC0_1: +; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI0_0] +; CSKY-NOPIC-NEXT: grs32 a1, .LPC0_1 +; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0) +; CSKY-NOPIC-NEXT: addu32 a0, a0, tls +; CSKY-NOPIC-NEXT: rts16 +; CSKY-NOPIC-NEXT: .p2align 1 +; CSKY-NOPIC-NEXT: # %bb.1: +; CSKY-NOPIC-NEXT: .p2align 2 +; CSKY-NOPIC-NEXT: .LCPI0_0: +; CSKY-NOPIC-NEXT: .Ltmp0: +; CSKY-NOPIC-NEXT: .long unspecified-(.LPC0_1-.Ltmp0)@GOTTPOFF +entry: + ret i32* @unspecified +} + + +; localdynamic specified + +define i32* @f2() nounwind { +; CSKY-PIC-LABEL: f2: +; CSKY-PIC: # %bb.0: # %entry +; CSKY-PIC-NEXT: subi16 sp, sp, 8 +; CSKY-PIC-NEXT: st32.w rgb, (sp, 4) # 4-byte Folded Spill +; CSKY-PIC-NEXT: st32.w lr, (sp, 0) # 4-byte Folded Spill +; CSKY-PIC-NEXT: lrw32 rgb, [.LCPI1_0] +; CSKY-PIC-NEXT: .LPC1_1: +; CSKY-PIC-NEXT: lrw32 a0, [.LCPI1_1] +; CSKY-PIC-NEXT: grs32 a1, .LPC1_1 +; CSKY-PIC-NEXT: addu16 a0, a1 +; CSKY-PIC-NEXT: lrw32 a1, [.LCPI1_2] +; CSKY-PIC-NEXT: ldr32.w a1, (rgb, a1 << 0) +; CSKY-PIC-NEXT: jsr16 a1 +; CSKY-PIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload +; CSKY-PIC-NEXT: ld32.w rgb, (sp, 4) # 4-byte Folded Reload +; CSKY-PIC-NEXT: addi16 sp, sp, 8 +; CSKY-PIC-NEXT: rts16 +; CSKY-PIC-NEXT: .p2align 1 +; CSKY-PIC-NEXT: # %bb.1: +; CSKY-PIC-NEXT: .p2align 2 +; CSKY-PIC-NEXT: .LCPI1_0: +; CSKY-PIC-NEXT: .long _GLOBAL_OFFSET_TABLE_ +; CSKY-PIC-NEXT: .LCPI1_1: +; CSKY-PIC-NEXT: .Ltmp1: +; CSKY-PIC-NEXT: .long ld-(.LPC1_1-.Ltmp1)@TLSGD32 +; CSKY-PIC-NEXT: .LCPI1_2: +; CSKY-PIC-NEXT: .long __tls_get_addr@PLT +; +; CSKY-NOPIC-LABEL: f2: +; CSKY-NOPIC: # %bb.0: # %entry +; CSKY-NOPIC-NEXT: .LPC1_1: +; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI1_0] +; CSKY-NOPIC-NEXT: grs32 a1, .LPC1_1 +; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0) +; CSKY-NOPIC-NEXT: addu32 a0, a0, tls +; CSKY-NOPIC-NEXT: rts16 +; CSKY-NOPIC-NEXT: .p2align 1 +; CSKY-NOPIC-NEXT: # %bb.1: +; CSKY-NOPIC-NEXT: .p2align 2 +; CSKY-NOPIC-NEXT: .LCPI1_0: +; CSKY-NOPIC-NEXT: .Ltmp1: +; CSKY-NOPIC-NEXT: .long ld-(.LPC1_1-.Ltmp1)@GOTTPOFF +entry: + ret i32* @ld +} + + +; initialexec specified + +define i32* @f3() nounwind { +; CSKY-PIC-LABEL: f3: +; CSKY-PIC: # %bb.0: # %entry +; CSKY-PIC-NEXT: .LPC2_1: +; CSKY-PIC-NEXT: lrw32 a0, [.LCPI2_0] +; CSKY-PIC-NEXT: grs32 a1, .LPC2_1 +; CSKY-PIC-NEXT: ldr32.w a0, (a0, a1 << 0) +; CSKY-PIC-NEXT: addu32 a0, a0, tls +; CSKY-PIC-NEXT: rts16 +; CSKY-PIC-NEXT: .p2align 1 +; CSKY-PIC-NEXT: # %bb.1: +; CSKY-PIC-NEXT: .p2align 2 +; CSKY-PIC-NEXT: .LCPI2_0: +; CSKY-PIC-NEXT: .Ltmp2: +; CSKY-PIC-NEXT: .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF +; +; CSKY-NOPIC-LABEL: f3: +; CSKY-NOPIC: # %bb.0: # %entry +; CSKY-NOPIC-NEXT: .LPC2_1: +; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI2_0] +; CSKY-NOPIC-NEXT: grs32 a1, .LPC2_1 +; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0) +; CSKY-NOPIC-NEXT: addu32 a0, a0, tls +; CSKY-NOPIC-NEXT: rts16 +; CSKY-NOPIC-NEXT: .p2align 1 +; CSKY-NOPIC-NEXT: # %bb.1: +; CSKY-NOPIC-NEXT: .p2align 2 +; CSKY-NOPIC-NEXT: .LCPI2_0: +; CSKY-NOPIC-NEXT: .Ltmp2: +; CSKY-NOPIC-NEXT: .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF +entry: + ret i32* @ie +} + + +; localexec specified + +define i32* @f4() nounwind { +; CSKY-PIC-LABEL: f4: +; CSKY-PIC: # %bb.0: # %entry +; CSKY-PIC-NEXT: lrw32 a0, [.LCPI3_0] +; CSKY-PIC-NEXT: addu32 a0, a0, tls +; CSKY-PIC-NEXT: rts16 +; CSKY-PIC-NEXT: .p2align 1 +; CSKY-PIC-NEXT: # %bb.1: +; CSKY-PIC-NEXT: .p2align 2 +; CSKY-PIC-NEXT: .LCPI3_0: +; CSKY-PIC-NEXT: .long le@TPOFF +; +; CSKY-NOPIC-LABEL: f4: +; CSKY-NOPIC: # %bb.0: # %entry +; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI3_0] +; CSKY-NOPIC-NEXT: addu32 a0, a0, tls +; CSKY-NOPIC-NEXT: rts16 +; CSKY-NOPIC-NEXT: .p2align 1 +; CSKY-NOPIC-NEXT: # %bb.1: +; CSKY-NOPIC-NEXT: .p2align 2 +; CSKY-NOPIC-NEXT: .LCPI3_0: +; CSKY-NOPIC-NEXT: .long le@TPOFF +entry: + ret i32* @le +} From 8def89b5dc82efba039cca4ff9d072e169da7329 Mon Sep 17 00:00:00 2001 From: wangpc Date: Fri, 21 Jan 2022 13:54:09 +0800 Subject: [PATCH 132/946] [RISCV] Set CostPerUse to 1 iff RVC is enabled After D86836, we can define multiple cost values for different cost models. So here we set CostPerUse to 1 iff RVC is enabled to avoid potential impact on RA. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117741 --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 5 + llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 2 + llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 7 +- llvm/test/CodeGen/RISCV/add-before-shl.ll | 20 +- llvm/test/CodeGen/RISCV/addcarry.ll | 30 +- llvm/test/CodeGen/RISCV/atomic-rmw.ll | 2400 ++++++++--------- llvm/test/CodeGen/RISCV/atomic-signext.ll | 480 ++-- .../CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll | 98 +- llvm/test/CodeGen/RISCV/callee-saved-gprs.ll | 796 +++--- ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 76 +- .../calling-conv-lp64-lp64f-lp64d-common.ll | 38 +- .../test/CodeGen/RISCV/double-arith-strict.ll | 92 +- llvm/test/CodeGen/RISCV/double-arith.ll | 208 +- llvm/test/CodeGen/RISCV/double-convert.ll | 186 +- llvm/test/CodeGen/RISCV/double-fcmp-strict.ll | 64 +- llvm/test/CodeGen/RISCV/double-fcmp.ll | 32 +- llvm/test/CodeGen/RISCV/float-arith-strict.ll | 32 +- llvm/test/CodeGen/RISCV/float-arith.ll | 124 +- llvm/test/CodeGen/RISCV/float-convert.ll | 88 +- llvm/test/CodeGen/RISCV/fp128.ll | 32 +- llvm/test/CodeGen/RISCV/fpclamptosat.ll | 414 +-- llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll | 1175 ++++---- llvm/test/CodeGen/RISCV/half-arith.ll | 708 +++-- llvm/test/CodeGen/RISCV/half-convert.ll | 112 +- llvm/test/CodeGen/RISCV/half-intrinsics.ll | 152 +- llvm/test/CodeGen/RISCV/mul.ll | 388 ++- llvm/test/CodeGen/RISCV/remat.ll | 44 +- llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll | 24 +- llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll | 30 +- llvm/test/CodeGen/RISCV/rv32zbb.ll | 96 +- llvm/test/CodeGen/RISCV/rv32zbp.ll | 546 ++-- llvm/test/CodeGen/RISCV/rv32zbt.ll | 28 +- .../test/CodeGen/RISCV/rv64i-complex-float.ll | 8 +- llvm/test/CodeGen/RISCV/rv64zbp.ll | 56 +- .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 86 +- .../rvv/fixed-vector-strided-load-store.ll | 88 +- .../RISCV/rvv/fixed-vectors-bitreverse.ll | 174 +- .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 80 +- .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 14 +- .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 66 +- .../CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 28 +- .../RISCV/rvv/fixed-vectors-fp-vrgather.ll | 48 +- .../RISCV/rvv/fixed-vectors-unaligned.ll | 32 +- .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 18 +- .../CodeGen/RISCV/rvv/no-reserved-frame.ll | 8 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 692 ++--- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 50 +- llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 52 +- .../CodeGen/RISCV/rvv/vreductions-int-vp.ll | 6 +- llvm/test/CodeGen/RISCV/sadd_sat.ll | 8 +- llvm/test/CodeGen/RISCV/sadd_sat_plus.ll | 12 +- .../CodeGen/RISCV/select-optimize-multiple.ll | 98 +- llvm/test/CodeGen/RISCV/sextw-removal.ll | 12 +- llvm/test/CodeGen/RISCV/shadowcallstack.ll | 16 +- llvm/test/CodeGen/RISCV/shifts.ll | 484 ++-- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 32 +- llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 948 ++++--- llvm/test/CodeGen/RISCV/ssub_sat.ll | 14 +- llvm/test/CodeGen/RISCV/ssub_sat_plus.ll | 16 +- llvm/test/CodeGen/RISCV/stack-store-check.ll | 84 +- llvm/test/CodeGen/RISCV/tail-calls.ll | 8 +- .../RISCV/umulo-128-legalisation-lowering.ll | 146 +- .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 12 +- llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 950 ++++--- llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll | 16 +- llvm/test/CodeGen/RISCV/xaluo.ll | 840 +++--- 66 files changed, 6801 insertions(+), 6928 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 9094dff1dda18..35363bf37c0db 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -347,3 +347,8 @@ void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset, Ops.push_back(dwarf::DW_OP_minus); } } + +unsigned +RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const { + return MF.getSubtarget().hasStdExtC() ? 1 : 0; +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 2b2bbdfbdf32f..9e0ef7902210b 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -66,6 +66,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { void getOffsetOpcodes(const StackOffset &Offset, SmallVectorImpl &Ops) const override; + + unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override; }; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 79370791efa24..8c1c03b51c249 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -73,12 +73,11 @@ def sub_vrm1_7 : ComposedSubRegIndex; // are not part of GPRC, the most restrictive register class used by the // compressed instruction set. This will influence the greedy register // allocator to reduce the use of registers that can't be encoded in 16 bit -// instructions. This affects register allocation even when compressed -// instruction isn't targeted, we see no major negative codegen impact. +// instructions. let RegAltNameIndices = [ABIRegAltName] in { def X0 : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>; - let CostPerUse = [1] in { + let CostPerUse = [0, 1] in { def X1 : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>; def X2 : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>; def X3 : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>; @@ -95,7 +94,7 @@ let RegAltNameIndices = [ABIRegAltName] in { def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>; def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>; def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>; - let CostPerUse = [1] in { + let CostPerUse = [0, 1] in { def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>; def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>; def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>; diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll index 212c74b79a343..a977cad504d3e 100644 --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -163,23 +163,23 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: lw a4, 12(a1) ; RV32I-NEXT: lw a1, 8(a1) ; RV32I-NEXT: srli a5, a2, 29 -; RV32I-NEXT: slli a4, a3, 3 -; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a6, a3, 3 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: srli a3, a3, 29 -; RV32I-NEXT: slli a5, a1, 3 -; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: slli a6, a1, 3 +; RV32I-NEXT: or a3, a6, a3 ; RV32I-NEXT: srli a1, a1, 29 -; RV32I-NEXT: slli a5, a6, 3 -; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: slli a4, a4, 3 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: slli a2, a2, 3 -; RV32I-NEXT: lui a5, 128 -; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: lui a4, 128 +; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: sw a3, 8(a0) -; RV32I-NEXT: sw a4, 4(a0) +; RV32I-NEXT: sw a5, 4(a0) ; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: jalr zero, 0(ra) ; diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll index 71711f9eb5054..18d1364f08d98 100644 --- a/llvm/test/CodeGen/RISCV/addcarry.ll +++ b/llvm/test/CodeGen/RISCV/addcarry.ll @@ -10,19 +10,19 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind { ; RISCV32-LABEL: addcarry: ; RISCV32: # %bb.0: ; RISCV32-NEXT: mul a4, a0, a3 -; RISCV32-NEXT: mulhu a7, a0, a2 -; RISCV32-NEXT: add a4, a7, a4 -; RISCV32-NEXT: mul a5, a1, a2 -; RISCV32-NEXT: add a6, a4, a5 -; RISCV32-NEXT: sltu t0, a6, a4 -; RISCV32-NEXT: sltu a4, a4, a7 -; RISCV32-NEXT: mulhu a5, a0, a3 -; RISCV32-NEXT: add a4, a5, a4 -; RISCV32-NEXT: mulhu a5, a1, a2 -; RISCV32-NEXT: add a4, a4, a5 -; RISCV32-NEXT: add a4, a4, t0 -; RISCV32-NEXT: mul a5, a1, a3 -; RISCV32-NEXT: add a5, a4, a5 +; RISCV32-NEXT: mulhu a5, a0, a2 +; RISCV32-NEXT: add a6, a5, a4 +; RISCV32-NEXT: mul a4, a1, a2 +; RISCV32-NEXT: add a4, a6, a4 +; RISCV32-NEXT: sltu a7, a4, a6 +; RISCV32-NEXT: sltu a5, a6, a5 +; RISCV32-NEXT: mulhu a6, a0, a3 +; RISCV32-NEXT: add a5, a6, a5 +; RISCV32-NEXT: mulhu a6, a1, a2 +; RISCV32-NEXT: add a5, a5, a6 +; RISCV32-NEXT: add a5, a5, a7 +; RISCV32-NEXT: mul a6, a1, a3 +; RISCV32-NEXT: add a5, a5, a6 ; RISCV32-NEXT: bgez a1, .LBB0_2 ; RISCV32-NEXT: # %bb.1: ; RISCV32-NEXT: sub a5, a5, a2 @@ -32,9 +32,9 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind { ; RISCV32-NEXT: sub a5, a5, a0 ; RISCV32-NEXT: .LBB0_4: ; RISCV32-NEXT: slli a1, a5, 30 -; RISCV32-NEXT: srli a3, a6, 2 +; RISCV32-NEXT: srli a3, a4, 2 ; RISCV32-NEXT: or a1, a1, a3 -; RISCV32-NEXT: slli a3, a6, 30 +; RISCV32-NEXT: slli a3, a4, 30 ; RISCV32-NEXT: mul a0, a0, a2 ; RISCV32-NEXT: srli a0, a0, 2 ; RISCV32-NEXT: or a0, a3, a0 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 2b3453456d6a9..62fcee85d5f99 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2008,9 +2008,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB35_2 ; RV32I-NEXT: .LBB35_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 @@ -2027,10 +2027,10 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB35_1 +; RV32I-NEXT: blt s2, a0, .LBB35_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB35_1 ; RV32I-NEXT: .LBB35_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2043,30 +2043,30 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB35_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB35_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB35_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB35_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2080,9 +2080,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB35_2 ; RV64I-NEXT: .LBB35_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 @@ -2099,10 +2099,10 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB35_1 +; RV64I-NEXT: blt s2, a0, .LBB35_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB35_1 ; RV64I-NEXT: .LBB35_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2115,30 +2115,30 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB35_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB35_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB35_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB35_3: # in Loop: Header=BB35_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB35_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB35_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2156,9 +2156,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB36_2 ; RV32I-NEXT: .LBB36_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB36_2 Depth=1 @@ -2175,10 +2175,10 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB36_1 +; RV32I-NEXT: blt s2, a0, .LBB36_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB36_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB36_1 ; RV32I-NEXT: .LBB36_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2191,30 +2191,30 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB36_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB36_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB36_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB36_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2228,9 +2228,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB36_2 ; RV64I-NEXT: .LBB36_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB36_2 Depth=1 @@ -2247,10 +2247,10 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB36_1 +; RV64I-NEXT: blt s2, a0, .LBB36_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB36_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB36_1 ; RV64I-NEXT: .LBB36_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2263,30 +2263,30 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB36_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB36_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB36_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB36_3: # in Loop: Header=BB36_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB36_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB36_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2304,9 +2304,9 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB37_2 ; RV32I-NEXT: .LBB37_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB37_2 Depth=1 @@ -2323,10 +2323,10 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB37_1 +; RV32I-NEXT: blt s2, a0, .LBB37_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB37_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB37_1 ; RV32I-NEXT: .LBB37_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2339,30 +2339,30 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB37_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB37_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB37_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB37_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2376,9 +2376,9 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB37_2 ; RV64I-NEXT: .LBB37_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB37_2 Depth=1 @@ -2395,10 +2395,10 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB37_1 +; RV64I-NEXT: blt s2, a0, .LBB37_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB37_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB37_1 ; RV64I-NEXT: .LBB37_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2411,30 +2411,30 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB37_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB37_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB37_3: # in Loop: Header=BB37_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB37_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB37_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2452,9 +2452,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB38_2 ; RV32I-NEXT: .LBB38_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB38_2 Depth=1 @@ -2471,10 +2471,10 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB38_1 +; RV32I-NEXT: blt s2, a0, .LBB38_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB38_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB38_1 ; RV32I-NEXT: .LBB38_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2487,30 +2487,30 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB38_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB38_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB38_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB38_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2524,9 +2524,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB38_2 ; RV64I-NEXT: .LBB38_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB38_2 Depth=1 @@ -2543,10 +2543,10 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB38_1 +; RV64I-NEXT: blt s2, a0, .LBB38_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB38_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB38_1 ; RV64I-NEXT: .LBB38_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2559,30 +2559,30 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB38_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB38_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB38_3: # in Loop: Header=BB38_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB38_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB38_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2600,9 +2600,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB39_2 ; RV32I-NEXT: .LBB39_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB39_2 Depth=1 @@ -2619,10 +2619,10 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB39_1 +; RV32I-NEXT: blt s2, a0, .LBB39_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB39_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB39_1 ; RV32I-NEXT: .LBB39_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2635,30 +2635,30 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB39_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB39_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB39_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB39_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2672,9 +2672,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB39_2 ; RV64I-NEXT: .LBB39_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB39_2 Depth=1 @@ -2691,10 +2691,10 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB39_1 +; RV64I-NEXT: blt s2, a0, .LBB39_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB39_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB39_1 ; RV64I-NEXT: .LBB39_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2707,30 +2707,30 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB39_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB39_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB39_3: # in Loop: Header=BB39_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB39_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB39_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2748,9 +2748,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB40_2 ; RV32I-NEXT: .LBB40_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB40_2 Depth=1 @@ -2767,10 +2767,10 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB40_1 +; RV32I-NEXT: bge s2, a0, .LBB40_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB40_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB40_1 ; RV32I-NEXT: .LBB40_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2783,30 +2783,30 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB40_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB40_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB40_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB40_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2820,9 +2820,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB40_2 ; RV64I-NEXT: .LBB40_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB40_2 Depth=1 @@ -2839,10 +2839,10 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB40_1 +; RV64I-NEXT: bge s2, a0, .LBB40_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB40_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB40_1 ; RV64I-NEXT: .LBB40_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2855,30 +2855,30 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB40_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB40_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB40_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB40_3: # in Loop: Header=BB40_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB40_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB40_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -2896,9 +2896,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB41_2 ; RV32I-NEXT: .LBB41_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB41_2 Depth=1 @@ -2915,10 +2915,10 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB41_1 +; RV32I-NEXT: bge s2, a0, .LBB41_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB41_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB41_1 ; RV32I-NEXT: .LBB41_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -2931,30 +2931,30 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB41_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB41_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB41_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB41_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -2968,9 +2968,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB41_2 ; RV64I-NEXT: .LBB41_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB41_2 Depth=1 @@ -2987,10 +2987,10 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB41_1 +; RV64I-NEXT: bge s2, a0, .LBB41_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB41_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB41_1 ; RV64I-NEXT: .LBB41_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3003,30 +3003,30 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB41_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB41_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB41_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB41_3: # in Loop: Header=BB41_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB41_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB41_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3044,9 +3044,9 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB42_2 ; RV32I-NEXT: .LBB42_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB42_2 Depth=1 @@ -3063,10 +3063,10 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB42_1 +; RV32I-NEXT: bge s2, a0, .LBB42_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB42_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB42_1 ; RV32I-NEXT: .LBB42_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3079,30 +3079,30 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB42_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB42_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB42_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB42_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -3116,9 +3116,9 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB42_2 ; RV64I-NEXT: .LBB42_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB42_2 Depth=1 @@ -3135,10 +3135,10 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB42_1 +; RV64I-NEXT: bge s2, a0, .LBB42_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB42_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB42_1 ; RV64I-NEXT: .LBB42_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3151,30 +3151,30 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB42_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB42_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB42_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB42_3: # in Loop: Header=BB42_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB42_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB42_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3192,9 +3192,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB43_2 ; RV32I-NEXT: .LBB43_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 @@ -3211,10 +3211,10 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB43_1 +; RV32I-NEXT: bge s2, a0, .LBB43_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB43_1 ; RV32I-NEXT: .LBB43_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3227,30 +3227,30 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB43_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB43_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB43_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB43_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -3264,9 +3264,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB43_2 ; RV64I-NEXT: .LBB43_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 @@ -3283,10 +3283,10 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB43_1 +; RV64I-NEXT: bge s2, a0, .LBB43_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB43_1 ; RV64I-NEXT: .LBB43_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3299,30 +3299,30 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB43_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB43_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB43_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB43_3: # in Loop: Header=BB43_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB43_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB43_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3340,9 +3340,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB44_2 ; RV32I-NEXT: .LBB44_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 @@ -3359,10 +3359,10 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB44_1 +; RV32I-NEXT: bge s2, a0, .LBB44_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB44_1 ; RV32I-NEXT: .LBB44_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3375,30 +3375,30 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB44_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB44_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB44_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB44_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -3412,9 +3412,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB44_2 ; RV64I-NEXT: .LBB44_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 @@ -3431,10 +3431,10 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB44_1 +; RV64I-NEXT: bge s2, a0, .LBB44_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB44_1 ; RV64I-NEXT: .LBB44_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3447,30 +3447,30 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB44_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB44_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB44_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB44_3: # in Loop: Header=BB44_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB44_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB44_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -3488,8 +3488,8 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB45_2 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 @@ -3505,10 +3505,10 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB45_1 +; RV32I-NEXT: bltu s2, a0, .LBB45_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB45_1 ; RV32I-NEXT: .LBB45_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3521,23 +3521,23 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB45_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB45_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB45_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3552,8 +3552,8 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB45_2 ; RV64I-NEXT: .LBB45_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 @@ -3569,10 +3569,10 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB45_1 +; RV64I-NEXT: bltu s2, a0, .LBB45_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB45_1 ; RV64I-NEXT: .LBB45_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3585,23 +3585,23 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB45_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB45_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB45_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -3620,8 +3620,8 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB46_2 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 @@ -3637,10 +3637,10 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB46_1 +; RV32I-NEXT: bltu s2, a0, .LBB46_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB46_1 ; RV32I-NEXT: .LBB46_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3653,23 +3653,23 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB46_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB46_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB46_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3684,8 +3684,8 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB46_2 ; RV64I-NEXT: .LBB46_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 @@ -3701,10 +3701,10 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB46_1 +; RV64I-NEXT: bltu s2, a0, .LBB46_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB46_1 ; RV64I-NEXT: .LBB46_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3717,23 +3717,23 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB46_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB46_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB46_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -3752,8 +3752,8 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB47_2 ; RV32I-NEXT: .LBB47_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 @@ -3769,10 +3769,10 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB47_1 +; RV32I-NEXT: bltu s2, a0, .LBB47_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB47_1 ; RV32I-NEXT: .LBB47_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3785,23 +3785,23 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB47_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB47_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB47_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3816,8 +3816,8 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB47_2 ; RV64I-NEXT: .LBB47_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 @@ -3833,10 +3833,10 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB47_1 +; RV64I-NEXT: bltu s2, a0, .LBB47_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB47_1 ; RV64I-NEXT: .LBB47_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3849,23 +3849,23 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB47_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB47_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB47_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -3884,8 +3884,8 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB48_2 ; RV32I-NEXT: .LBB48_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 @@ -3901,10 +3901,10 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB48_1 +; RV32I-NEXT: bltu s2, a0, .LBB48_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB48_1 ; RV32I-NEXT: .LBB48_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -3917,23 +3917,23 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB48_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB48_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB48_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -3948,8 +3948,8 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB48_2 ; RV64I-NEXT: .LBB48_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 @@ -3965,10 +3965,10 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB48_1 +; RV64I-NEXT: bltu s2, a0, .LBB48_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB48_1 ; RV64I-NEXT: .LBB48_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -3981,23 +3981,23 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB48_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB48_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB48_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4016,8 +4016,8 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB49_2 ; RV32I-NEXT: .LBB49_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 @@ -4033,10 +4033,10 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB49_1 +; RV32I-NEXT: bltu s2, a0, .LBB49_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB49_1 ; RV32I-NEXT: .LBB49_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4049,23 +4049,23 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB49_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB49_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB49_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4080,8 +4080,8 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB49_2 ; RV64I-NEXT: .LBB49_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 @@ -4097,10 +4097,10 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB49_1 +; RV64I-NEXT: bltu s2, a0, .LBB49_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB49_1 ; RV64I-NEXT: .LBB49_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4113,23 +4113,23 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB49_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB49_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB49_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4148,8 +4148,8 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB50_2 ; RV32I-NEXT: .LBB50_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 @@ -4165,10 +4165,10 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB50_1 +; RV32I-NEXT: bgeu s2, a0, .LBB50_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB50_1 ; RV32I-NEXT: .LBB50_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4181,23 +4181,23 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB50_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB50_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB50_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4212,8 +4212,8 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB50_2 ; RV64I-NEXT: .LBB50_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 @@ -4229,10 +4229,10 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB50_1 +; RV64I-NEXT: bgeu s2, a0, .LBB50_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB50_1 ; RV64I-NEXT: .LBB50_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4245,23 +4245,23 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB50_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB50_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB50_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4280,8 +4280,8 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB51_2 ; RV32I-NEXT: .LBB51_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 @@ -4297,10 +4297,10 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB51_1 +; RV32I-NEXT: bgeu s2, a0, .LBB51_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB51_1 ; RV32I-NEXT: .LBB51_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4313,23 +4313,23 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i8_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB51_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB51_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB51_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4344,8 +4344,8 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB51_2 ; RV64I-NEXT: .LBB51_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 @@ -4361,10 +4361,10 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB51_1 +; RV64I-NEXT: bgeu s2, a0, .LBB51_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB51_1 ; RV64I-NEXT: .LBB51_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4377,23 +4377,23 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB51_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB51_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB51_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4412,8 +4412,8 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB52_2 ; RV32I-NEXT: .LBB52_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 @@ -4429,10 +4429,10 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB52_1 +; RV32I-NEXT: bgeu s2, a0, .LBB52_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB52_1 ; RV32I-NEXT: .LBB52_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4445,23 +4445,23 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i8_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB52_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB52_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB52_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4476,8 +4476,8 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB52_2 ; RV64I-NEXT: .LBB52_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 @@ -4493,10 +4493,10 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB52_1 +; RV64I-NEXT: bgeu s2, a0, .LBB52_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB52_1 ; RV64I-NEXT: .LBB52_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4509,23 +4509,23 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB52_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB52_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB52_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4544,8 +4544,8 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB53_2 ; RV32I-NEXT: .LBB53_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 @@ -4561,10 +4561,10 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB53_1 +; RV32I-NEXT: bgeu s2, a0, .LBB53_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB53_1 ; RV32I-NEXT: .LBB53_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4577,23 +4577,23 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i8_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aq a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB53_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB53_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB53_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4608,8 +4608,8 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB53_2 ; RV64I-NEXT: .LBB53_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 @@ -4625,10 +4625,10 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB53_1 +; RV64I-NEXT: bgeu s2, a0, .LBB53_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB53_1 ; RV64I-NEXT: .LBB53_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4641,23 +4641,23 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aq a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB53_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB53_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB53_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -4676,8 +4676,8 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB54_2 ; RV32I-NEXT: .LBB54_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 @@ -4693,10 +4693,10 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB54_1 +; RV32I-NEXT: bgeu s2, a0, .LBB54_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB54_1 ; RV32I-NEXT: .LBB54_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -4709,23 +4709,23 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w.aqrl a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB54_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB54_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB54_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -4740,8 +4740,8 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB54_2 ; RV64I-NEXT: .LBB54_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 @@ -4757,10 +4757,10 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB54_1 +; RV64I-NEXT: bgeu s2, a0, .LBB54_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB54_1 ; RV64I-NEXT: .LBB54_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -4773,23 +4773,23 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB54_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB54_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB54_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -6868,9 +6868,9 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB90_2 ; RV32I-NEXT: .LBB90_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB90_2 Depth=1 @@ -6887,10 +6887,10 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB90_1 +; RV32I-NEXT: blt s2, a0, .LBB90_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB90_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB90_1 ; RV32I-NEXT: .LBB90_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -6903,31 +6903,31 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB90_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB90_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB90_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB90_3: # in Loop: Header=BB90_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB90_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB90_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -6941,9 +6941,9 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB90_2 ; RV64I-NEXT: .LBB90_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB90_2 Depth=1 @@ -6960,10 +6960,10 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB90_1 +; RV64I-NEXT: blt s2, a0, .LBB90_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB90_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB90_1 ; RV64I-NEXT: .LBB90_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -6976,31 +6976,31 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB90_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB90_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB90_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB90_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB90_3: # in Loop: Header=BB90_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB90_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB90_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7018,9 +7018,9 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB91_2 ; RV32I-NEXT: .LBB91_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB91_2 Depth=1 @@ -7037,10 +7037,10 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB91_1 +; RV32I-NEXT: blt s2, a0, .LBB91_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB91_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB91_1 ; RV32I-NEXT: .LBB91_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7053,31 +7053,31 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB91_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB91_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB91_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB91_3: # in Loop: Header=BB91_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB91_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB91_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7091,9 +7091,9 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB91_2 ; RV64I-NEXT: .LBB91_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB91_2 Depth=1 @@ -7110,10 +7110,10 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB91_1 +; RV64I-NEXT: blt s2, a0, .LBB91_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB91_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB91_1 ; RV64I-NEXT: .LBB91_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7126,31 +7126,31 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB91_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB91_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB91_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB91_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB91_3: # in Loop: Header=BB91_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB91_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB91_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7168,9 +7168,9 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB92_2 ; RV32I-NEXT: .LBB92_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB92_2 Depth=1 @@ -7187,10 +7187,10 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB92_1 +; RV32I-NEXT: blt s2, a0, .LBB92_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB92_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB92_1 ; RV32I-NEXT: .LBB92_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7203,31 +7203,31 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB92_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB92_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB92_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB92_3: # in Loop: Header=BB92_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB92_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB92_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7241,9 +7241,9 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB92_2 ; RV64I-NEXT: .LBB92_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB92_2 Depth=1 @@ -7260,10 +7260,10 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB92_1 +; RV64I-NEXT: blt s2, a0, .LBB92_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB92_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB92_1 ; RV64I-NEXT: .LBB92_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7276,31 +7276,31 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB92_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB92_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB92_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB92_3: # in Loop: Header=BB92_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB92_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB92_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7318,9 +7318,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB93_2 ; RV32I-NEXT: .LBB93_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB93_2 Depth=1 @@ -7337,10 +7337,10 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB93_1 +; RV32I-NEXT: blt s2, a0, .LBB93_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB93_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB93_1 ; RV32I-NEXT: .LBB93_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7353,31 +7353,31 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB93_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB93_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB93_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB93_3: # in Loop: Header=BB93_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB93_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB93_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7391,9 +7391,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB93_2 ; RV64I-NEXT: .LBB93_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB93_2 Depth=1 @@ -7410,10 +7410,10 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB93_1 +; RV64I-NEXT: blt s2, a0, .LBB93_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB93_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB93_1 ; RV64I-NEXT: .LBB93_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7426,31 +7426,31 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB93_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB93_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB93_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB93_3: # in Loop: Header=BB93_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB93_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB93_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7468,9 +7468,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB94_2 ; RV32I-NEXT: .LBB94_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB94_2 Depth=1 @@ -7487,10 +7487,10 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB94_1 +; RV32I-NEXT: blt s2, a0, .LBB94_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB94_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB94_1 ; RV32I-NEXT: .LBB94_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7503,31 +7503,31 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB94_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB94_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB94_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB94_3: # in Loop: Header=BB94_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB94_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB94_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7541,9 +7541,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB94_2 ; RV64I-NEXT: .LBB94_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB94_2 Depth=1 @@ -7560,10 +7560,10 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB94_1 +; RV64I-NEXT: blt s2, a0, .LBB94_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB94_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB94_1 ; RV64I-NEXT: .LBB94_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7576,31 +7576,31 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB94_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB94_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB94_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB94_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB94_3: # in Loop: Header=BB94_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB94_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB94_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7618,9 +7618,9 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB95_2 ; RV32I-NEXT: .LBB95_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB95_2 Depth=1 @@ -7637,10 +7637,10 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB95_1 +; RV32I-NEXT: bge s2, a0, .LBB95_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB95_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB95_1 ; RV32I-NEXT: .LBB95_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7653,31 +7653,31 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB95_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB95_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB95_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB95_3: # in Loop: Header=BB95_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB95_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB95_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7691,9 +7691,9 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB95_2 ; RV64I-NEXT: .LBB95_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB95_2 Depth=1 @@ -7710,10 +7710,10 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB95_1 +; RV64I-NEXT: bge s2, a0, .LBB95_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB95_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB95_1 ; RV64I-NEXT: .LBB95_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7726,31 +7726,31 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB95_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB95_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB95_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB95_3: # in Loop: Header=BB95_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB95_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB95_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7768,9 +7768,9 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB96_2 ; RV32I-NEXT: .LBB96_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB96_2 Depth=1 @@ -7787,10 +7787,10 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB96_1 +; RV32I-NEXT: bge s2, a0, .LBB96_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB96_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB96_1 ; RV32I-NEXT: .LBB96_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7803,31 +7803,31 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB96_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB96_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB96_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB96_3: # in Loop: Header=BB96_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB96_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB96_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7841,9 +7841,9 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB96_2 ; RV64I-NEXT: .LBB96_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB96_2 Depth=1 @@ -7860,10 +7860,10 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB96_1 +; RV64I-NEXT: bge s2, a0, .LBB96_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB96_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB96_1 ; RV64I-NEXT: .LBB96_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -7876,31 +7876,31 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB96_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB96_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB96_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB96_3: # in Loop: Header=BB96_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB96_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB96_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -7918,9 +7918,9 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB97_2 ; RV32I-NEXT: .LBB97_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB97_2 Depth=1 @@ -7937,10 +7937,10 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB97_1 +; RV32I-NEXT: bge s2, a0, .LBB97_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB97_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB97_1 ; RV32I-NEXT: .LBB97_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -7953,31 +7953,31 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB97_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB97_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB97_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB97_3: # in Loop: Header=BB97_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB97_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB97_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -7991,9 +7991,9 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB97_2 ; RV64I-NEXT: .LBB97_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB97_2 Depth=1 @@ -8010,10 +8010,10 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB97_1 +; RV64I-NEXT: bge s2, a0, .LBB97_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB97_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB97_1 ; RV64I-NEXT: .LBB97_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -8026,31 +8026,31 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB97_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB97_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB97_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB97_3: # in Loop: Header=BB97_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB97_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB97_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -8068,9 +8068,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB98_2 ; RV32I-NEXT: .LBB98_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB98_2 Depth=1 @@ -8087,10 +8087,10 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB98_1 +; RV32I-NEXT: bge s2, a0, .LBB98_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB98_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB98_1 ; RV32I-NEXT: .LBB98_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -8103,31 +8103,31 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB98_3 +; RV32IA-NEXT: lr.w.aq a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB98_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB98_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB98_3: # in Loop: Header=BB98_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB98_1 +; RV32IA-NEXT: sc.w.rl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB98_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -8141,9 +8141,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB98_2 ; RV64I-NEXT: .LBB98_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB98_2 Depth=1 @@ -8160,10 +8160,10 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB98_1 +; RV64I-NEXT: bge s2, a0, .LBB98_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB98_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB98_1 ; RV64I-NEXT: .LBB98_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -8176,31 +8176,31 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB98_3 +; RV64IA-NEXT: lr.w.aq a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB98_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB98_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB98_3: # in Loop: Header=BB98_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB98_1 +; RV64IA-NEXT: sc.w.rl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB98_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -8218,9 +8218,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB99_2 ; RV32I-NEXT: .LBB99_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB99_2 Depth=1 @@ -8237,10 +8237,10 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB99_1 +; RV32I-NEXT: bge s2, a0, .LBB99_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB99_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB99_1 ; RV32I-NEXT: .LBB99_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -8253,31 +8253,31 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB99_3 +; RV32IA-NEXT: lr.w.aqrl a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB99_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB99_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB99_3: # in Loop: Header=BB99_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB99_1 +; RV32IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB99_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: ret @@ -8291,9 +8291,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB99_2 ; RV64I-NEXT: .LBB99_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB99_2 Depth=1 @@ -8310,10 +8310,10 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB99_1 +; RV64I-NEXT: bge s2, a0, .LBB99_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB99_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB99_1 ; RV64I-NEXT: .LBB99_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -8326,31 +8326,31 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB99_3 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB99_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB99_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB99_3: # in Loop: Header=BB99_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB99_1 +; RV64IA-NEXT: sc.w.aqrl a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB99_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret @@ -8367,18 +8367,18 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB100_2 ; RV32I-NEXT: .LBB100_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB100_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -8386,12 +8386,12 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: bnez a0, .LBB100_4 ; RV32I-NEXT: .LBB100_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB100_1 +; RV32I-NEXT: bltu s3, a0, .LBB100_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB100_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB100_1 ; RV32I-NEXT: .LBB100_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8405,7 +8405,7 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8413,16 +8413,16 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB100_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB100_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB100_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB100_3: # in Loop: Header=BB100_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB100_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8436,18 +8436,18 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB100_2 ; RV64I-NEXT: .LBB100_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB100_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -8455,12 +8455,12 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: bnez a0, .LBB100_4 ; RV64I-NEXT: .LBB100_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB100_1 +; RV64I-NEXT: bltu s3, a0, .LBB100_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB100_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB100_1 ; RV64I-NEXT: .LBB100_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8474,7 +8474,7 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8482,16 +8482,16 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB100_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB100_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB100_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB100_3: # in Loop: Header=BB100_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB100_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8509,12 +8509,12 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB101_2 ; RV32I-NEXT: .LBB101_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB101_2 Depth=1 @@ -8522,18 +8522,18 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB101_4 ; RV32I-NEXT: .LBB101_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB101_1 +; RV32I-NEXT: bltu s3, a0, .LBB101_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB101_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB101_1 ; RV32I-NEXT: .LBB101_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8547,7 +8547,7 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8555,16 +8555,16 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB101_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB101_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB101_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB101_3: # in Loop: Header=BB101_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB101_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8578,12 +8578,12 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB101_2 ; RV64I-NEXT: .LBB101_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB101_2 Depth=1 @@ -8591,18 +8591,18 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB101_4 ; RV64I-NEXT: .LBB101_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB101_1 +; RV64I-NEXT: bltu s3, a0, .LBB101_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB101_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB101_1 ; RV64I-NEXT: .LBB101_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8616,7 +8616,7 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8624,16 +8624,16 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB101_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB101_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB101_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB101_3: # in Loop: Header=BB101_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB101_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8651,31 +8651,31 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB102_2 ; RV32I-NEXT: .LBB102_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB102_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB102_4 ; RV32I-NEXT: .LBB102_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB102_1 +; RV32I-NEXT: bltu s3, a0, .LBB102_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB102_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB102_1 ; RV32I-NEXT: .LBB102_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8689,7 +8689,7 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8697,16 +8697,16 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB102_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB102_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB102_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB102_3: # in Loop: Header=BB102_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB102_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8720,31 +8720,31 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB102_2 ; RV64I-NEXT: .LBB102_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB102_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB102_4 ; RV64I-NEXT: .LBB102_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB102_1 +; RV64I-NEXT: bltu s3, a0, .LBB102_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB102_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB102_1 ; RV64I-NEXT: .LBB102_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8758,7 +8758,7 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8766,16 +8766,16 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB102_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB102_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB102_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB102_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB102_3: # in Loop: Header=BB102_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB102_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8793,12 +8793,12 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB103_2 ; RV32I-NEXT: .LBB103_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB103_2 Depth=1 @@ -8806,18 +8806,18 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB103_4 ; RV32I-NEXT: .LBB103_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB103_1 +; RV32I-NEXT: bltu s3, a0, .LBB103_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB103_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB103_1 ; RV32I-NEXT: .LBB103_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8831,7 +8831,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8839,16 +8839,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB103_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB103_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB103_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB103_3: # in Loop: Header=BB103_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB103_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -8862,12 +8862,12 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB103_2 ; RV64I-NEXT: .LBB103_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB103_2 Depth=1 @@ -8875,18 +8875,18 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB103_4 ; RV64I-NEXT: .LBB103_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB103_1 +; RV64I-NEXT: bltu s3, a0, .LBB103_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB103_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB103_1 ; RV64I-NEXT: .LBB103_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -8900,7 +8900,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -8908,16 +8908,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB103_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB103_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB103_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB103_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB103_3: # in Loop: Header=BB103_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB103_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -8935,12 +8935,12 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB104_2 ; RV32I-NEXT: .LBB104_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB104_2 Depth=1 @@ -8948,18 +8948,18 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB104_4 ; RV32I-NEXT: .LBB104_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB104_1 +; RV32I-NEXT: bltu s3, a0, .LBB104_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB104_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB104_1 ; RV32I-NEXT: .LBB104_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -8973,7 +8973,7 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -8981,16 +8981,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aqrl a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB104_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB104_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB104_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB104_3: # in Loop: Header=BB104_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB104_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9004,12 +9004,12 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB104_2 ; RV64I-NEXT: .LBB104_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB104_2 Depth=1 @@ -9017,18 +9017,18 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB104_4 ; RV64I-NEXT: .LBB104_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB104_1 +; RV64I-NEXT: bltu s3, a0, .LBB104_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB104_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB104_1 ; RV64I-NEXT: .LBB104_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9042,7 +9042,7 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9050,16 +9050,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB104_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB104_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB104_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB104_3: # in Loop: Header=BB104_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB104_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9077,18 +9077,18 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB105_2 ; RV32I-NEXT: .LBB105_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB105_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -9096,12 +9096,12 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: bnez a0, .LBB105_4 ; RV32I-NEXT: .LBB105_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB105_1 +; RV32I-NEXT: bgeu s3, a0, .LBB105_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB105_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB105_1 ; RV32I-NEXT: .LBB105_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9115,7 +9115,7 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9123,16 +9123,16 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB105_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB105_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB105_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB105_3: # in Loop: Header=BB105_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB105_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9146,18 +9146,18 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB105_2 ; RV64I-NEXT: .LBB105_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB105_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -9165,12 +9165,12 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: bnez a0, .LBB105_4 ; RV64I-NEXT: .LBB105_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB105_1 +; RV64I-NEXT: bgeu s3, a0, .LBB105_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB105_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB105_1 ; RV64I-NEXT: .LBB105_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9184,7 +9184,7 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9192,16 +9192,16 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB105_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB105_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB105_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB105_3: # in Loop: Header=BB105_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB105_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9219,12 +9219,12 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB106_2 ; RV32I-NEXT: .LBB106_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB106_2 Depth=1 @@ -9232,18 +9232,18 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB106_4 ; RV32I-NEXT: .LBB106_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB106_1 +; RV32I-NEXT: bgeu s3, a0, .LBB106_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB106_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB106_1 ; RV32I-NEXT: .LBB106_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9257,7 +9257,7 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i16_acquire: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9265,16 +9265,16 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB106_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB106_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB106_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB106_3: # in Loop: Header=BB106_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB106_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9288,12 +9288,12 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB106_2 ; RV64I-NEXT: .LBB106_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB106_2 Depth=1 @@ -9301,18 +9301,18 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB106_4 ; RV64I-NEXT: .LBB106_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB106_1 +; RV64I-NEXT: bgeu s3, a0, .LBB106_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB106_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB106_1 ; RV64I-NEXT: .LBB106_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9326,7 +9326,7 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i16_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9334,16 +9334,16 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB106_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB106_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB106_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB106_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB106_3: # in Loop: Header=BB106_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB106_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9361,31 +9361,31 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB107_2 ; RV32I-NEXT: .LBB107_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB107_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB107_4 ; RV32I-NEXT: .LBB107_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB107_1 +; RV32I-NEXT: bgeu s3, a0, .LBB107_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB107_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB107_1 ; RV32I-NEXT: .LBB107_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9399,7 +9399,7 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i16_release: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9407,16 +9407,16 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB107_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB107_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB107_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB107_3: # in Loop: Header=BB107_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB107_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9430,31 +9430,31 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB107_2 ; RV64I-NEXT: .LBB107_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB107_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB107_4 ; RV64I-NEXT: .LBB107_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB107_1 +; RV64I-NEXT: bgeu s3, a0, .LBB107_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB107_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB107_1 ; RV64I-NEXT: .LBB107_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9468,7 +9468,7 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i16_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9476,16 +9476,16 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB107_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB107_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB107_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB107_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB107_3: # in Loop: Header=BB107_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB107_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9503,12 +9503,12 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB108_2 ; RV32I-NEXT: .LBB108_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB108_2 Depth=1 @@ -9516,18 +9516,18 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB108_4 ; RV32I-NEXT: .LBB108_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB108_1 +; RV32I-NEXT: bgeu s3, a0, .LBB108_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB108_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB108_1 ; RV32I-NEXT: .LBB108_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9541,7 +9541,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i16_acq_rel: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9549,16 +9549,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aq a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aq a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB108_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB108_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB108_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB108_3: # in Loop: Header=BB108_1 Depth=1 -; RV32IA-NEXT: sc.w.rl a5, a5, (a6) +; RV32IA-NEXT: sc.w.rl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB108_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9572,12 +9572,12 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB108_2 ; RV64I-NEXT: .LBB108_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB108_2 Depth=1 @@ -9585,18 +9585,18 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB108_4 ; RV64I-NEXT: .LBB108_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB108_1 +; RV64I-NEXT: bgeu s3, a0, .LBB108_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB108_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB108_1 ; RV64I-NEXT: .LBB108_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9610,7 +9610,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i16_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9618,16 +9618,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aq a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aq a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB108_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB108_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB108_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB108_3: # in Loop: Header=BB108_1 Depth=1 -; RV64IA-NEXT: sc.w.rl a5, a5, (a6) +; RV64IA-NEXT: sc.w.rl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB108_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -9645,12 +9645,12 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB109_2 ; RV32I-NEXT: .LBB109_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB109_2 Depth=1 @@ -9658,18 +9658,18 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt ; RV32I-NEXT: lh a1, 10(sp) ; RV32I-NEXT: bnez a0, .LBB109_4 ; RV32I-NEXT: .LBB109_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB109_1 +; RV32I-NEXT: bgeu s3, a0, .LBB109_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB109_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB109_1 ; RV32I-NEXT: .LBB109_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a1 @@ -9683,7 +9683,7 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -9691,16 +9691,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w.aqrl a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w.aqrl a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB109_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB109_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB109_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB109_3: # in Loop: Header=BB109_1 Depth=1 -; RV32IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV32IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB109_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -9714,12 +9714,12 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB109_2 ; RV64I-NEXT: .LBB109_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB109_2 Depth=1 @@ -9727,18 +9727,18 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt ; RV64I-NEXT: lh a1, 6(sp) ; RV64I-NEXT: bnez a0, .LBB109_4 ; RV64I-NEXT: .LBB109_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB109_1 +; RV64I-NEXT: bgeu s3, a0, .LBB109_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB109_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB109_1 ; RV64I-NEXT: .LBB109_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a1 @@ -9752,7 +9752,7 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i16_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -9760,16 +9760,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w.aqrl a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w.aqrl a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB109_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB109_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB109_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB109_3: # in Loop: Header=BB109_1 Depth=1 -; RV64IA-NEXT: sc.w.aqrl a5, a5, (a6) +; RV64IA-NEXT: sc.w.aqrl a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB109_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -11099,8 +11099,8 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB145_2 ; RV64I-NEXT: .LBB145_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB145_2 Depth=1 @@ -11115,10 +11115,10 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB145_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB145_1 +; RV64I-NEXT: blt s2, a3, .LBB145_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB145_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB145_1 ; RV64I-NEXT: .LBB145_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11188,8 +11188,8 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB146_2 ; RV64I-NEXT: .LBB146_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB146_2 Depth=1 @@ -11204,10 +11204,10 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB146_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB146_1 +; RV64I-NEXT: blt s2, a3, .LBB146_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB146_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB146_1 ; RV64I-NEXT: .LBB146_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11277,8 +11277,8 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB147_2 ; RV64I-NEXT: .LBB147_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB147_2 Depth=1 @@ -11293,10 +11293,10 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB147_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB147_1 +; RV64I-NEXT: blt s2, a3, .LBB147_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB147_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB147_1 ; RV64I-NEXT: .LBB147_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11366,8 +11366,8 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB148_2 ; RV64I-NEXT: .LBB148_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB148_2 Depth=1 @@ -11382,10 +11382,10 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB148_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB148_1 +; RV64I-NEXT: blt s2, a3, .LBB148_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB148_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB148_1 ; RV64I-NEXT: .LBB148_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11455,8 +11455,8 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB149_2 ; RV64I-NEXT: .LBB149_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB149_2 Depth=1 @@ -11471,10 +11471,10 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB149_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB149_1 +; RV64I-NEXT: blt s2, a3, .LBB149_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB149_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB149_1 ; RV64I-NEXT: .LBB149_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11544,8 +11544,8 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB150_2 ; RV64I-NEXT: .LBB150_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB150_2 Depth=1 @@ -11560,10 +11560,10 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB150_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB150_1 +; RV64I-NEXT: bge s2, a3, .LBB150_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB150_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB150_1 ; RV64I-NEXT: .LBB150_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11633,8 +11633,8 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB151_2 ; RV64I-NEXT: .LBB151_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB151_2 Depth=1 @@ -11649,10 +11649,10 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB151_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB151_1 +; RV64I-NEXT: bge s2, a3, .LBB151_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB151_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB151_1 ; RV64I-NEXT: .LBB151_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11722,8 +11722,8 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB152_2 ; RV64I-NEXT: .LBB152_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB152_2 Depth=1 @@ -11738,10 +11738,10 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB152_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB152_1 +; RV64I-NEXT: bge s2, a3, .LBB152_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB152_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB152_1 ; RV64I-NEXT: .LBB152_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11811,8 +11811,8 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB153_2 ; RV64I-NEXT: .LBB153_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB153_2 Depth=1 @@ -11827,10 +11827,10 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB153_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB153_1 +; RV64I-NEXT: bge s2, a3, .LBB153_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB153_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB153_1 ; RV64I-NEXT: .LBB153_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11900,8 +11900,8 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB154_2 ; RV64I-NEXT: .LBB154_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB154_2 Depth=1 @@ -11916,10 +11916,10 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB154_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB154_1 +; RV64I-NEXT: bge s2, a3, .LBB154_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB154_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB154_1 ; RV64I-NEXT: .LBB154_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -11989,8 +11989,8 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB155_2 ; RV64I-NEXT: .LBB155_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB155_2 Depth=1 @@ -12005,10 +12005,10 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB155_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB155_1 +; RV64I-NEXT: bltu s2, a3, .LBB155_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB155_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB155_1 ; RV64I-NEXT: .LBB155_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12078,8 +12078,8 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB156_2 ; RV64I-NEXT: .LBB156_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB156_2 Depth=1 @@ -12094,10 +12094,10 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB156_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB156_1 +; RV64I-NEXT: bltu s2, a3, .LBB156_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB156_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB156_1 ; RV64I-NEXT: .LBB156_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12167,8 +12167,8 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB157_2 ; RV64I-NEXT: .LBB157_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB157_2 Depth=1 @@ -12183,10 +12183,10 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB157_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB157_1 +; RV64I-NEXT: bltu s2, a3, .LBB157_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB157_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB157_1 ; RV64I-NEXT: .LBB157_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12256,8 +12256,8 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB158_2 ; RV64I-NEXT: .LBB158_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB158_2 Depth=1 @@ -12272,10 +12272,10 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB158_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB158_1 +; RV64I-NEXT: bltu s2, a3, .LBB158_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB158_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB158_1 ; RV64I-NEXT: .LBB158_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12345,8 +12345,8 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB159_2 ; RV64I-NEXT: .LBB159_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB159_2 Depth=1 @@ -12361,10 +12361,10 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB159_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB159_1 +; RV64I-NEXT: bltu s2, a3, .LBB159_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB159_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB159_1 ; RV64I-NEXT: .LBB159_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12434,8 +12434,8 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB160_2 ; RV64I-NEXT: .LBB160_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB160_2 Depth=1 @@ -12450,10 +12450,10 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB160_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB160_1 +; RV64I-NEXT: bgeu s2, a3, .LBB160_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB160_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB160_1 ; RV64I-NEXT: .LBB160_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12523,8 +12523,8 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB161_2 ; RV64I-NEXT: .LBB161_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB161_2 Depth=1 @@ -12539,10 +12539,10 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB161_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB161_1 +; RV64I-NEXT: bgeu s2, a3, .LBB161_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB161_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB161_1 ; RV64I-NEXT: .LBB161_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12612,8 +12612,8 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB162_2 ; RV64I-NEXT: .LBB162_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB162_2 Depth=1 @@ -12628,10 +12628,10 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB162_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB162_1 +; RV64I-NEXT: bgeu s2, a3, .LBB162_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB162_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB162_1 ; RV64I-NEXT: .LBB162_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12701,8 +12701,8 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB163_2 ; RV64I-NEXT: .LBB163_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB163_2 Depth=1 @@ -12717,10 +12717,10 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB163_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB163_1 +; RV64I-NEXT: bgeu s2, a3, .LBB163_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB163_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB163_1 ; RV64I-NEXT: .LBB163_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -12790,8 +12790,8 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB164_2 ; RV64I-NEXT: .LBB164_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB164_2 Depth=1 @@ -12806,10 +12806,10 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB164_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB164_1 +; RV64I-NEXT: bgeu s2, a3, .LBB164_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB164_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB164_1 ; RV64I-NEXT: .LBB164_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index a8785f95d0367..7b4d37985f988 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -584,9 +584,9 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB10_2 ; RV32I-NEXT: .LBB10_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1 @@ -603,10 +603,10 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB10_1 +; RV32I-NEXT: blt s2, a0, .LBB10_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB10_1 ; RV32I-NEXT: .LBB10_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -620,30 +620,30 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB10_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB10_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB10_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB10_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 24 @@ -659,9 +659,9 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB10_2 ; RV64I-NEXT: .LBB10_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1 @@ -678,10 +678,10 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB10_1 +; RV64I-NEXT: blt s2, a0, .LBB10_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB10_1 ; RV64I-NEXT: .LBB10_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -695,30 +695,30 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB10_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB10_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB10_3: # in Loop: Header=BB10_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB10_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB10_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 56 @@ -738,9 +738,9 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s1, a0, 24 +; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB11_2 ; RV32I-NEXT: .LBB11_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1 @@ -757,10 +757,10 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB11_1 +; RV32I-NEXT: bge s2, a0, .LBB11_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB11_1 ; RV32I-NEXT: .LBB11_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -774,30 +774,30 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: li a4, 255 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 24 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB11_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB11_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB11_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB11_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 24 @@ -813,9 +813,9 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s1, a0, 56 +; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB11_2 ; RV64I-NEXT: .LBB11_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1 @@ -832,10 +832,10 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB11_1 +; RV64I-NEXT: bge s2, a0, .LBB11_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB11_1 ; RV64I-NEXT: .LBB11_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -849,30 +849,30 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: li a4, 255 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 56 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB11_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB11_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB11_3: # in Loop: Header=BB11_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB11_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB11_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 56 @@ -892,8 +892,8 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB12_2 ; RV32I-NEXT: .LBB12_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1 @@ -909,10 +909,10 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a0, .LBB12_1 +; RV32I-NEXT: bltu s2, a0, .LBB12_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB12_1 ; RV32I-NEXT: .LBB12_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -926,23 +926,23 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a2, a1, .LBB12_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB12_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB12_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -959,8 +959,8 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB12_2 ; RV64I-NEXT: .LBB12_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1 @@ -976,10 +976,10 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a0, .LBB12_1 +; RV64I-NEXT: bltu s2, a0, .LBB12_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB12_1 ; RV64I-NEXT: .LBB12_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -993,23 +993,23 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a2, a1, .LBB12_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB12_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB12_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -1030,8 +1030,8 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: andi s1, a1, 255 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB13_2 ; RV32I-NEXT: .LBB13_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1 @@ -1047,10 +1047,10 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: andi a0, a3, 255 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a0, .LBB13_1 +; RV32I-NEXT: bgeu s2, a0, .LBB13_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB13_1 ; RV32I-NEXT: .LBB13_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 24 @@ -1064,23 +1064,23 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: li a3, 255 ; RV32IA-NEXT: sll a3, a3, a0 ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a4, (a6) -; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: lr.w a4, (a2) +; RV32IA-NEXT: and a6, a4, a3 ; RV32IA-NEXT: mv a5, a4 -; RV32IA-NEXT: bgeu a1, a2, .LBB13_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB13_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ; RV32IA-NEXT: xor a5, a4, a1 ; RV32IA-NEXT: and a5, a5, a3 ; RV32IA-NEXT: xor a5, a4, a5 ; RV32IA-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB13_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a4, a0 @@ -1097,8 +1097,8 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: andi s1, a1, 255 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB13_2 ; RV64I-NEXT: .LBB13_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1 @@ -1114,10 +1114,10 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: andi a0, a3, 255 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a0, .LBB13_1 +; RV64I-NEXT: bgeu s2, a0, .LBB13_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB13_1 ; RV64I-NEXT: .LBB13_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 56 @@ -1131,23 +1131,23 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: li a3, 255 ; RV64IA-NEXT: sllw a3, a3, a0 ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a4, (a6) -; RV64IA-NEXT: and a2, a4, a3 +; RV64IA-NEXT: lr.w a4, (a2) +; RV64IA-NEXT: and a6, a4, a3 ; RV64IA-NEXT: mv a5, a4 -; RV64IA-NEXT: bgeu a1, a2, .LBB13_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB13_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ; RV64IA-NEXT: xor a5, a4, a1 ; RV64IA-NEXT: and a5, a5, a3 ; RV64IA-NEXT: xor a5, a4, a5 ; RV64IA-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB13_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a4, a0 @@ -1636,9 +1636,9 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB21_2 ; RV32I-NEXT: .LBB21_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1 @@ -1655,10 +1655,10 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a0, .LBB21_1 +; RV32I-NEXT: blt s2, a0, .LBB21_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB21_1 ; RV32I-NEXT: .LBB21_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 16 @@ -1672,31 +1672,31 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_max_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a4, a1, .LBB21_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a7, a1, .LBB21_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB21_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB21_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 16 @@ -1712,9 +1712,9 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB21_2 ; RV64I-NEXT: .LBB21_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1 @@ -1731,10 +1731,10 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a0, .LBB21_1 +; RV64I-NEXT: blt s2, a0, .LBB21_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB21_1 ; RV64I-NEXT: .LBB21_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 48 @@ -1748,31 +1748,31 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_max_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a4, a1, .LBB21_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a7, a1, .LBB21_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB21_3: # in Loop: Header=BB21_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB21_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB21_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 48 @@ -1792,9 +1792,9 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s1, a0, 16 +; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB22_2 ; RV32I-NEXT: .LBB22_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1 @@ -1811,10 +1811,10 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a0, a3, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a0, .LBB22_1 +; RV32I-NEXT: bge s2, a0, .LBB22_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: j .LBB22_1 ; RV32I-NEXT: .LBB22_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a3, 16 @@ -1828,31 +1828,31 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_min_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: andi a3, a0, 24 ; RV32IA-NEXT: lui a4, 16 ; RV32IA-NEXT: addi a4, a4, -1 -; RV32IA-NEXT: sll a7, a4, a0 +; RV32IA-NEXT: sll a4, a4, a0 ; RV32IA-NEXT: slli a1, a1, 16 ; RV32IA-NEXT: srai a1, a1, 16 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: li a5, 16 ; RV32IA-NEXT: sub a3, a5, a3 ; RV32IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a5, (a6) -; RV32IA-NEXT: and a4, a5, a7 -; RV32IA-NEXT: mv a2, a5 -; RV32IA-NEXT: sll a4, a4, a3 -; RV32IA-NEXT: sra a4, a4, a3 -; RV32IA-NEXT: bge a1, a4, .LBB22_3 +; RV32IA-NEXT: lr.w a5, (a2) +; RV32IA-NEXT: and a7, a5, a4 +; RV32IA-NEXT: mv a6, a5 +; RV32IA-NEXT: sll a7, a7, a3 +; RV32IA-NEXT: sra a7, a7, a3 +; RV32IA-NEXT: bge a1, a7, .LBB22_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV32IA-NEXT: xor a2, a5, a1 -; RV32IA-NEXT: and a2, a2, a7 -; RV32IA-NEXT: xor a2, a5, a2 +; RV32IA-NEXT: xor a6, a5, a1 +; RV32IA-NEXT: and a6, a6, a4 +; RV32IA-NEXT: xor a6, a5, a6 ; RV32IA-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 -; RV32IA-NEXT: sc.w a2, a2, (a6) -; RV32IA-NEXT: bnez a2, .LBB22_1 +; RV32IA-NEXT: sc.w a6, a6, (a2) +; RV32IA-NEXT: bnez a6, .LBB22_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a5, a0 ; RV32IA-NEXT: slli a0, a0, 16 @@ -1868,9 +1868,9 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s1, a0, 48 +; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB22_2 ; RV64I-NEXT: .LBB22_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1 @@ -1887,10 +1887,10 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a0, a3, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a0, .LBB22_1 +; RV64I-NEXT: bge s2, a0, .LBB22_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB22_1 ; RV64I-NEXT: .LBB22_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a3, 48 @@ -1904,31 +1904,31 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_min_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: andi a3, a0, 24 ; RV64IA-NEXT: lui a4, 16 ; RV64IA-NEXT: addiw a4, a4, -1 -; RV64IA-NEXT: sllw a7, a4, a0 +; RV64IA-NEXT: sllw a4, a4, a0 ; RV64IA-NEXT: slli a1, a1, 48 ; RV64IA-NEXT: srai a1, a1, 48 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: li a5, 48 ; RV64IA-NEXT: sub a3, a5, a3 ; RV64IA-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a5, (a6) -; RV64IA-NEXT: and a4, a5, a7 -; RV64IA-NEXT: mv a2, a5 -; RV64IA-NEXT: sll a4, a4, a3 -; RV64IA-NEXT: sra a4, a4, a3 -; RV64IA-NEXT: bge a1, a4, .LBB22_3 +; RV64IA-NEXT: lr.w a5, (a2) +; RV64IA-NEXT: and a7, a5, a4 +; RV64IA-NEXT: mv a6, a5 +; RV64IA-NEXT: sll a7, a7, a3 +; RV64IA-NEXT: sra a7, a7, a3 +; RV64IA-NEXT: bge a1, a7, .LBB22_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 -; RV64IA-NEXT: xor a2, a5, a1 -; RV64IA-NEXT: and a2, a2, a7 -; RV64IA-NEXT: xor a2, a5, a2 +; RV64IA-NEXT: xor a6, a5, a1 +; RV64IA-NEXT: and a6, a6, a4 +; RV64IA-NEXT: xor a6, a5, a6 ; RV64IA-NEXT: .LBB22_3: # in Loop: Header=BB22_1 Depth=1 -; RV64IA-NEXT: sc.w a2, a2, (a6) -; RV64IA-NEXT: bnez a2, .LBB22_1 +; RV64IA-NEXT: sc.w a6, a6, (a2) +; RV64IA-NEXT: bnez a6, .LBB22_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: slli a0, a0, 48 @@ -1947,18 +1947,18 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB23_2 ; RV32I-NEXT: .LBB23_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -1966,12 +1966,12 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: bnez a0, .LBB23_4 ; RV32I-NEXT: .LBB23_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s1, a0, .LBB23_1 +; RV32I-NEXT: bltu s3, a0, .LBB23_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB23_1 ; RV32I-NEXT: .LBB23_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a1, 16 @@ -1986,7 +1986,7 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -1994,16 +1994,16 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a2, a1, .LBB23_3 +; RV32IA-NEXT: bgeu a6, a1, .LBB23_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB23_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -2019,18 +2019,18 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB23_2 ; RV64I-NEXT: .LBB23_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -2038,12 +2038,12 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: bnez a0, .LBB23_4 ; RV64I-NEXT: .LBB23_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s1, a0, .LBB23_1 +; RV64I-NEXT: bltu s3, a0, .LBB23_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB23_1 ; RV64I-NEXT: .LBB23_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a1, 48 @@ -2058,7 +2058,7 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -2066,16 +2066,16 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a2, a1, .LBB23_3 +; RV64IA-NEXT: bgeu a6, a1, .LBB23_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB23_3: # in Loop: Header=BB23_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB23_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -2095,18 +2095,18 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lhu a1, 0(a0) ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and s1, s2, s0 +; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: and s3, s0, s2 ; RV32I-NEXT: j .LBB24_2 ; RV32I-NEXT: .LBB24_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1 ; RV32I-NEXT: sh a1, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2@plt @@ -2114,12 +2114,12 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: bnez a0, .LBB24_4 ; RV32I-NEXT: .LBB24_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: and a0, a1, s2 ; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s1, a0, .LBB24_1 +; RV32I-NEXT: bgeu s3, a0, .LBB24_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB24_1 ; RV32I-NEXT: .LBB24_4: # %atomicrmw.end ; RV32I-NEXT: slli a0, a1, 16 @@ -2134,7 +2134,7 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a6, a0, -4 +; RV32IA-NEXT: andi a2, a0, -4 ; RV32IA-NEXT: slli a0, a0, 3 ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: addi a3, a3, -1 @@ -2142,16 +2142,16 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: sll a1, a1, a0 ; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: lr.w a3, (a6) -; RV32IA-NEXT: and a2, a3, a4 +; RV32IA-NEXT: lr.w a3, (a2) +; RV32IA-NEXT: and a6, a3, a4 ; RV32IA-NEXT: mv a5, a3 -; RV32IA-NEXT: bgeu a1, a2, .LBB24_3 +; RV32IA-NEXT: bgeu a1, a6, .LBB24_3 ; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ; RV32IA-NEXT: xor a5, a3, a1 ; RV32IA-NEXT: and a5, a5, a4 ; RV32IA-NEXT: xor a5, a3, a5 ; RV32IA-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV32IA-NEXT: sc.w a5, a5, (a6) +; RV32IA-NEXT: sc.w a5, a5, (a2) ; RV32IA-NEXT: bnez a5, .LBB24_1 ; RV32IA-NEXT: # %bb.4: ; RV32IA-NEXT: srl a0, a3, a0 @@ -2167,18 +2167,18 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lhu a1, 0(a0) ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and s1, s2, s0 +; RV64I-NEXT: addiw s2, a0, -1 +; RV64I-NEXT: and s3, s0, s2 ; RV64I-NEXT: j .LBB24_2 ; RV64I-NEXT: .LBB24_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1 ; RV64I-NEXT: sh a1, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2@plt @@ -2186,12 +2186,12 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: bnez a0, .LBB24_4 ; RV64I-NEXT: .LBB24_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: and a0, a1, s2 ; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s1, a0, .LBB24_1 +; RV64I-NEXT: bgeu s3, a0, .LBB24_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB24_1 ; RV64I-NEXT: .LBB24_4: # %atomicrmw.end ; RV64I-NEXT: slli a0, a1, 48 @@ -2206,7 +2206,7 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a6, a0, -4 +; RV64IA-NEXT: andi a2, a0, -4 ; RV64IA-NEXT: slliw a0, a0, 3 ; RV64IA-NEXT: lui a3, 16 ; RV64IA-NEXT: addiw a3, a3, -1 @@ -2214,16 +2214,16 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: sllw a1, a1, a0 ; RV64IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 -; RV64IA-NEXT: lr.w a3, (a6) -; RV64IA-NEXT: and a2, a3, a4 +; RV64IA-NEXT: lr.w a3, (a2) +; RV64IA-NEXT: and a6, a3, a4 ; RV64IA-NEXT: mv a5, a3 -; RV64IA-NEXT: bgeu a1, a2, .LBB24_3 +; RV64IA-NEXT: bgeu a1, a6, .LBB24_3 ; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ; RV64IA-NEXT: xor a5, a3, a1 ; RV64IA-NEXT: and a5, a5, a4 ; RV64IA-NEXT: xor a5, a3, a5 ; RV64IA-NEXT: .LBB24_3: # in Loop: Header=BB24_1 Depth=1 -; RV64IA-NEXT: sc.w a5, a5, (a6) +; RV64IA-NEXT: sc.w a5, a5, (a2) ; RV64IA-NEXT: bnez a5, .LBB24_1 ; RV64IA-NEXT: # %bb.4: ; RV64IA-NEXT: srlw a0, a3, a0 @@ -2546,8 +2546,8 @@ define signext i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB32_2 ; RV64I-NEXT: .LBB32_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1 @@ -2562,10 +2562,10 @@ define signext i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB32_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB32_1 +; RV64I-NEXT: blt s2, a3, .LBB32_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB32_1 ; RV64I-NEXT: .LBB32_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2635,8 +2635,8 @@ define signext i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB33_2 ; RV64I-NEXT: .LBB33_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1 @@ -2651,10 +2651,10 @@ define signext i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB33_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB33_1 +; RV64I-NEXT: bge s2, a3, .LBB33_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB33_1 ; RV64I-NEXT: .LBB33_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2724,8 +2724,8 @@ define signext i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB34_2 ; RV64I-NEXT: .LBB34_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1 @@ -2740,10 +2740,10 @@ define signext i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB34_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB34_1 +; RV64I-NEXT: bltu s2, a3, .LBB34_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB34_1 ; RV64I-NEXT: .LBB34_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -2813,8 +2813,8 @@ define signext i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: sext.w s1, a1 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB35_2 ; RV64I-NEXT: .LBB35_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 @@ -2829,10 +2829,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: .LBB35_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB35_1 +; RV64I-NEXT: bgeu s2, a3, .LBB35_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: mv a2, s1 ; RV64I-NEXT: j .LBB35_1 ; RV64I-NEXT: .LBB35_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll index 007027e5c0e58..4be19eefa948f 100644 --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -437,21 +437,21 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: not a1, s2 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -459,32 +459,32 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: addi a0, s3, -1 -; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a0, s1, -1 +; RV32I-NEXT: not a1, s1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s4, .LBB7_2 +; RV32I-NEXT: bnez s2, .LBB7_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s0, 24 ; RV32I-NEXT: .LBB7_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -711,21 +711,21 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: not a1, s2 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -733,32 +733,32 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: addi a0, s3, -1 -; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a0, s1, -1 +; RV32I-NEXT: not a1, s1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s4, .LBB11_2 +; RV32I-NEXT: bnez s2, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB11_3 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s0, 24 ; RV32I-NEXT: .LBB11_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -877,17 +877,17 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: addi s2, a2, 1365 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s3, a1, 819 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -899,12 +899,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3@plt ; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s2, a0 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, s0, 1 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: sub a0, s0, a0 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll index 1387a646ce994..5e0c01ee2678c 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -56,26 +56,26 @@ define void @callee() nounwind { ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) ; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw t4, 24(a5) -; RV32I-NEXT: lw t5, 28(a5) -; RV32I-NEXT: lw t6, 32(a5) -; RV32I-NEXT: lw s2, 36(a5) -; RV32I-NEXT: lw s3, 40(a5) -; RV32I-NEXT: lw s4, 44(a5) -; RV32I-NEXT: lw s5, 48(a5) -; RV32I-NEXT: lw s6, 52(a5) -; RV32I-NEXT: lw s7, 56(a5) -; RV32I-NEXT: lw s8, 60(a5) -; RV32I-NEXT: lw s9, 64(a5) -; RV32I-NEXT: lw s10, 68(a5) -; RV32I-NEXT: lw s11, 72(a5) -; RV32I-NEXT: lw ra, 76(a5) -; RV32I-NEXT: lw s1, 80(a5) -; RV32I-NEXT: lw t3, 84(a5) -; RV32I-NEXT: lw t2, 88(a5) -; RV32I-NEXT: lw t1, 92(a5) -; RV32I-NEXT: lw t0, 96(a5) -; RV32I-NEXT: lw s0, 100(a5) +; RV32I-NEXT: lw t0, 24(a5) +; RV32I-NEXT: lw t1, 28(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: lw s5, 72(a5) +; RV32I-NEXT: lw s6, 76(a5) +; RV32I-NEXT: lw s7, 80(a5) +; RV32I-NEXT: lw s8, 84(a5) +; RV32I-NEXT: lw s9, 88(a5) +; RV32I-NEXT: lw s10, 92(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) ; RV32I-NEXT: lw a6, 104(a5) ; RV32I-NEXT: lw a4, 108(a5) ; RV32I-NEXT: lw a0, 124(a5) @@ -88,26 +88,26 @@ define void @callee() nounwind { ; RV32I-NEXT: sw a3, 112(a5) ; RV32I-NEXT: sw a4, 108(a5) ; RV32I-NEXT: sw a6, 104(a5) -; RV32I-NEXT: sw s0, 100(a5) -; RV32I-NEXT: sw t0, 96(a5) -; RV32I-NEXT: sw t1, 92(a5) -; RV32I-NEXT: sw t2, 88(a5) -; RV32I-NEXT: sw t3, 84(a5) -; RV32I-NEXT: sw s1, 80(a5) -; RV32I-NEXT: sw ra, 76(a5) -; RV32I-NEXT: sw s11, 72(a5) -; RV32I-NEXT: sw s10, 68(a5) -; RV32I-NEXT: sw s9, 64(a5) -; RV32I-NEXT: sw s8, 60(a5) -; RV32I-NEXT: sw s7, 56(a5) -; RV32I-NEXT: sw s6, 52(a5) -; RV32I-NEXT: sw s5, 48(a5) -; RV32I-NEXT: sw s4, 44(a5) -; RV32I-NEXT: sw s3, 40(a5) -; RV32I-NEXT: sw s2, 36(a5) -; RV32I-NEXT: sw t6, 32(a5) -; RV32I-NEXT: sw t5, 28(a5) -; RV32I-NEXT: sw t4, 24(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) +; RV32I-NEXT: sw s10, 92(a5) +; RV32I-NEXT: sw s9, 88(a5) +; RV32I-NEXT: sw s8, 84(a5) +; RV32I-NEXT: sw s7, 80(a5) +; RV32I-NEXT: sw s6, 76(a5) +; RV32I-NEXT: sw s5, 72(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) +; RV32I-NEXT: sw t1, 28(a5) +; RV32I-NEXT: sw t0, 24(a5) ; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 20(a5) ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload @@ -169,24 +169,24 @@ define void @callee() nounwind { ; RV32I-WITH-FP-NEXT: sw a0, -76(s0) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: lw a0, 24(a5) ; RV32I-WITH-FP-NEXT: sw a0, -80(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw t5, 28(a5) -; RV32I-WITH-FP-NEXT: lw t6, 32(a5) -; RV32I-WITH-FP-NEXT: lw s2, 36(a5) -; RV32I-WITH-FP-NEXT: lw s3, 40(a5) -; RV32I-WITH-FP-NEXT: lw s4, 44(a5) -; RV32I-WITH-FP-NEXT: lw s5, 48(a5) -; RV32I-WITH-FP-NEXT: lw s6, 52(a5) -; RV32I-WITH-FP-NEXT: lw s7, 56(a5) -; RV32I-WITH-FP-NEXT: lw s8, 60(a5) -; RV32I-WITH-FP-NEXT: lw s9, 64(a5) -; RV32I-WITH-FP-NEXT: lw s10, 68(a5) -; RV32I-WITH-FP-NEXT: lw s11, 72(a5) -; RV32I-WITH-FP-NEXT: lw ra, 76(a5) -; RV32I-WITH-FP-NEXT: lw t4, 80(a5) -; RV32I-WITH-FP-NEXT: lw t3, 84(a5) -; RV32I-WITH-FP-NEXT: lw t2, 88(a5) -; RV32I-WITH-FP-NEXT: lw s1, 92(a5) -; RV32I-WITH-FP-NEXT: lw t1, 96(a5) +; RV32I-WITH-FP-NEXT: lw t1, 28(a5) +; RV32I-WITH-FP-NEXT: lw t2, 32(a5) +; RV32I-WITH-FP-NEXT: lw t3, 36(a5) +; RV32I-WITH-FP-NEXT: lw t4, 40(a5) +; RV32I-WITH-FP-NEXT: lw t5, 44(a5) +; RV32I-WITH-FP-NEXT: lw t6, 48(a5) +; RV32I-WITH-FP-NEXT: lw s1, 52(a5) +; RV32I-WITH-FP-NEXT: lw s2, 56(a5) +; RV32I-WITH-FP-NEXT: lw s3, 60(a5) +; RV32I-WITH-FP-NEXT: lw s4, 64(a5) +; RV32I-WITH-FP-NEXT: lw s5, 68(a5) +; RV32I-WITH-FP-NEXT: lw s6, 72(a5) +; RV32I-WITH-FP-NEXT: lw s7, 76(a5) +; RV32I-WITH-FP-NEXT: lw s8, 80(a5) +; RV32I-WITH-FP-NEXT: lw s9, 84(a5) +; RV32I-WITH-FP-NEXT: lw s10, 88(a5) +; RV32I-WITH-FP-NEXT: lw s11, 92(a5) +; RV32I-WITH-FP-NEXT: lw ra, 96(a5) ; RV32I-WITH-FP-NEXT: lw t0, 100(a5) ; RV32I-WITH-FP-NEXT: lw a6, 104(a5) ; RV32I-WITH-FP-NEXT: lw a4, 108(a5) @@ -201,24 +201,24 @@ define void @callee() nounwind { ; RV32I-WITH-FP-NEXT: sw a4, 108(a5) ; RV32I-WITH-FP-NEXT: sw a6, 104(a5) ; RV32I-WITH-FP-NEXT: sw t0, 100(a5) -; RV32I-WITH-FP-NEXT: sw t1, 96(a5) -; RV32I-WITH-FP-NEXT: sw s1, 92(a5) -; RV32I-WITH-FP-NEXT: sw t2, 88(a5) -; RV32I-WITH-FP-NEXT: sw t3, 84(a5) -; RV32I-WITH-FP-NEXT: sw t4, 80(a5) -; RV32I-WITH-FP-NEXT: sw ra, 76(a5) -; RV32I-WITH-FP-NEXT: sw s11, 72(a5) -; RV32I-WITH-FP-NEXT: sw s10, 68(a5) -; RV32I-WITH-FP-NEXT: sw s9, 64(a5) -; RV32I-WITH-FP-NEXT: sw s8, 60(a5) -; RV32I-WITH-FP-NEXT: sw s7, 56(a5) -; RV32I-WITH-FP-NEXT: sw s6, 52(a5) -; RV32I-WITH-FP-NEXT: sw s5, 48(a5) -; RV32I-WITH-FP-NEXT: sw s4, 44(a5) -; RV32I-WITH-FP-NEXT: sw s3, 40(a5) -; RV32I-WITH-FP-NEXT: sw s2, 36(a5) -; RV32I-WITH-FP-NEXT: sw t6, 32(a5) -; RV32I-WITH-FP-NEXT: sw t5, 28(a5) +; RV32I-WITH-FP-NEXT: sw ra, 96(a5) +; RV32I-WITH-FP-NEXT: sw s11, 92(a5) +; RV32I-WITH-FP-NEXT: sw s10, 88(a5) +; RV32I-WITH-FP-NEXT: sw s9, 84(a5) +; RV32I-WITH-FP-NEXT: sw s8, 80(a5) +; RV32I-WITH-FP-NEXT: sw s7, 76(a5) +; RV32I-WITH-FP-NEXT: sw s6, 72(a5) +; RV32I-WITH-FP-NEXT: sw s5, 68(a5) +; RV32I-WITH-FP-NEXT: sw s4, 64(a5) +; RV32I-WITH-FP-NEXT: sw s3, 60(a5) +; RV32I-WITH-FP-NEXT: sw s2, 56(a5) +; RV32I-WITH-FP-NEXT: sw s1, 52(a5) +; RV32I-WITH-FP-NEXT: sw t6, 48(a5) +; RV32I-WITH-FP-NEXT: sw t5, 44(a5) +; RV32I-WITH-FP-NEXT: sw t4, 40(a5) +; RV32I-WITH-FP-NEXT: sw t3, 36(a5) +; RV32I-WITH-FP-NEXT: sw t2, 32(a5) +; RV32I-WITH-FP-NEXT: sw t1, 28(a5) ; RV32I-WITH-FP-NEXT: lw a0, -80(s0) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: sw a0, 24(a5) ; RV32I-WITH-FP-NEXT: lw a0, -76(s0) # 4-byte Folded Reload @@ -279,26 +279,26 @@ define void @callee() nounwind { ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) ; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw t4, 24(a5) -; RV64I-NEXT: lw t5, 28(a5) -; RV64I-NEXT: lw t6, 32(a5) -; RV64I-NEXT: lw s2, 36(a5) -; RV64I-NEXT: lw s3, 40(a5) -; RV64I-NEXT: lw s4, 44(a5) -; RV64I-NEXT: lw s5, 48(a5) -; RV64I-NEXT: lw s6, 52(a5) -; RV64I-NEXT: lw s7, 56(a5) -; RV64I-NEXT: lw s8, 60(a5) -; RV64I-NEXT: lw s9, 64(a5) -; RV64I-NEXT: lw s10, 68(a5) -; RV64I-NEXT: lw s11, 72(a5) -; RV64I-NEXT: lw ra, 76(a5) -; RV64I-NEXT: lw s1, 80(a5) -; RV64I-NEXT: lw t3, 84(a5) -; RV64I-NEXT: lw t2, 88(a5) -; RV64I-NEXT: lw t1, 92(a5) -; RV64I-NEXT: lw t0, 96(a5) -; RV64I-NEXT: lw s0, 100(a5) +; RV64I-NEXT: lw t0, 24(a5) +; RV64I-NEXT: lw t1, 28(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: lw s5, 72(a5) +; RV64I-NEXT: lw s6, 76(a5) +; RV64I-NEXT: lw s7, 80(a5) +; RV64I-NEXT: lw s8, 84(a5) +; RV64I-NEXT: lw s9, 88(a5) +; RV64I-NEXT: lw s10, 92(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) ; RV64I-NEXT: lw a6, 104(a5) ; RV64I-NEXT: lw a4, 108(a5) ; RV64I-NEXT: lw a0, 124(a5) @@ -311,26 +311,26 @@ define void @callee() nounwind { ; RV64I-NEXT: sw a3, 112(a5) ; RV64I-NEXT: sw a4, 108(a5) ; RV64I-NEXT: sw a6, 104(a5) -; RV64I-NEXT: sw s0, 100(a5) -; RV64I-NEXT: sw t0, 96(a5) -; RV64I-NEXT: sw t1, 92(a5) -; RV64I-NEXT: sw t2, 88(a5) -; RV64I-NEXT: sw t3, 84(a5) -; RV64I-NEXT: sw s1, 80(a5) -; RV64I-NEXT: sw ra, 76(a5) -; RV64I-NEXT: sw s11, 72(a5) -; RV64I-NEXT: sw s10, 68(a5) -; RV64I-NEXT: sw s9, 64(a5) -; RV64I-NEXT: sw s8, 60(a5) -; RV64I-NEXT: sw s7, 56(a5) -; RV64I-NEXT: sw s6, 52(a5) -; RV64I-NEXT: sw s5, 48(a5) -; RV64I-NEXT: sw s4, 44(a5) -; RV64I-NEXT: sw s3, 40(a5) -; RV64I-NEXT: sw s2, 36(a5) -; RV64I-NEXT: sw t6, 32(a5) -; RV64I-NEXT: sw t5, 28(a5) -; RV64I-NEXT: sw t4, 24(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) +; RV64I-NEXT: sw s10, 92(a5) +; RV64I-NEXT: sw s9, 88(a5) +; RV64I-NEXT: sw s8, 84(a5) +; RV64I-NEXT: sw s7, 80(a5) +; RV64I-NEXT: sw s6, 76(a5) +; RV64I-NEXT: sw s5, 72(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) +; RV64I-NEXT: sw t1, 28(a5) +; RV64I-NEXT: sw t0, 24(a5) ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 20(a5) ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload @@ -392,24 +392,24 @@ define void @callee() nounwind { ; RV64I-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: lw a0, 24(a5) ; RV64I-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw t5, 28(a5) -; RV64I-WITH-FP-NEXT: lw t6, 32(a5) -; RV64I-WITH-FP-NEXT: lw s2, 36(a5) -; RV64I-WITH-FP-NEXT: lw s3, 40(a5) -; RV64I-WITH-FP-NEXT: lw s4, 44(a5) -; RV64I-WITH-FP-NEXT: lw s5, 48(a5) -; RV64I-WITH-FP-NEXT: lw s6, 52(a5) -; RV64I-WITH-FP-NEXT: lw s7, 56(a5) -; RV64I-WITH-FP-NEXT: lw s8, 60(a5) -; RV64I-WITH-FP-NEXT: lw s9, 64(a5) -; RV64I-WITH-FP-NEXT: lw s10, 68(a5) -; RV64I-WITH-FP-NEXT: lw s11, 72(a5) -; RV64I-WITH-FP-NEXT: lw ra, 76(a5) -; RV64I-WITH-FP-NEXT: lw t4, 80(a5) -; RV64I-WITH-FP-NEXT: lw t3, 84(a5) -; RV64I-WITH-FP-NEXT: lw t2, 88(a5) -; RV64I-WITH-FP-NEXT: lw s1, 92(a5) -; RV64I-WITH-FP-NEXT: lw t1, 96(a5) +; RV64I-WITH-FP-NEXT: lw t1, 28(a5) +; RV64I-WITH-FP-NEXT: lw t2, 32(a5) +; RV64I-WITH-FP-NEXT: lw t3, 36(a5) +; RV64I-WITH-FP-NEXT: lw t4, 40(a5) +; RV64I-WITH-FP-NEXT: lw t5, 44(a5) +; RV64I-WITH-FP-NEXT: lw t6, 48(a5) +; RV64I-WITH-FP-NEXT: lw s1, 52(a5) +; RV64I-WITH-FP-NEXT: lw s2, 56(a5) +; RV64I-WITH-FP-NEXT: lw s3, 60(a5) +; RV64I-WITH-FP-NEXT: lw s4, 64(a5) +; RV64I-WITH-FP-NEXT: lw s5, 68(a5) +; RV64I-WITH-FP-NEXT: lw s6, 72(a5) +; RV64I-WITH-FP-NEXT: lw s7, 76(a5) +; RV64I-WITH-FP-NEXT: lw s8, 80(a5) +; RV64I-WITH-FP-NEXT: lw s9, 84(a5) +; RV64I-WITH-FP-NEXT: lw s10, 88(a5) +; RV64I-WITH-FP-NEXT: lw s11, 92(a5) +; RV64I-WITH-FP-NEXT: lw ra, 96(a5) ; RV64I-WITH-FP-NEXT: lw t0, 100(a5) ; RV64I-WITH-FP-NEXT: lw a6, 104(a5) ; RV64I-WITH-FP-NEXT: lw a4, 108(a5) @@ -424,24 +424,24 @@ define void @callee() nounwind { ; RV64I-WITH-FP-NEXT: sw a4, 108(a5) ; RV64I-WITH-FP-NEXT: sw a6, 104(a5) ; RV64I-WITH-FP-NEXT: sw t0, 100(a5) -; RV64I-WITH-FP-NEXT: sw t1, 96(a5) -; RV64I-WITH-FP-NEXT: sw s1, 92(a5) -; RV64I-WITH-FP-NEXT: sw t2, 88(a5) -; RV64I-WITH-FP-NEXT: sw t3, 84(a5) -; RV64I-WITH-FP-NEXT: sw t4, 80(a5) -; RV64I-WITH-FP-NEXT: sw ra, 76(a5) -; RV64I-WITH-FP-NEXT: sw s11, 72(a5) -; RV64I-WITH-FP-NEXT: sw s10, 68(a5) -; RV64I-WITH-FP-NEXT: sw s9, 64(a5) -; RV64I-WITH-FP-NEXT: sw s8, 60(a5) -; RV64I-WITH-FP-NEXT: sw s7, 56(a5) -; RV64I-WITH-FP-NEXT: sw s6, 52(a5) -; RV64I-WITH-FP-NEXT: sw s5, 48(a5) -; RV64I-WITH-FP-NEXT: sw s4, 44(a5) -; RV64I-WITH-FP-NEXT: sw s3, 40(a5) -; RV64I-WITH-FP-NEXT: sw s2, 36(a5) -; RV64I-WITH-FP-NEXT: sw t6, 32(a5) -; RV64I-WITH-FP-NEXT: sw t5, 28(a5) +; RV64I-WITH-FP-NEXT: sw ra, 96(a5) +; RV64I-WITH-FP-NEXT: sw s11, 92(a5) +; RV64I-WITH-FP-NEXT: sw s10, 88(a5) +; RV64I-WITH-FP-NEXT: sw s9, 84(a5) +; RV64I-WITH-FP-NEXT: sw s8, 80(a5) +; RV64I-WITH-FP-NEXT: sw s7, 76(a5) +; RV64I-WITH-FP-NEXT: sw s6, 72(a5) +; RV64I-WITH-FP-NEXT: sw s5, 68(a5) +; RV64I-WITH-FP-NEXT: sw s4, 64(a5) +; RV64I-WITH-FP-NEXT: sw s3, 60(a5) +; RV64I-WITH-FP-NEXT: sw s2, 56(a5) +; RV64I-WITH-FP-NEXT: sw s1, 52(a5) +; RV64I-WITH-FP-NEXT: sw t6, 48(a5) +; RV64I-WITH-FP-NEXT: sw t5, 44(a5) +; RV64I-WITH-FP-NEXT: sw t4, 40(a5) +; RV64I-WITH-FP-NEXT: sw t3, 36(a5) +; RV64I-WITH-FP-NEXT: sw t2, 32(a5) +; RV64I-WITH-FP-NEXT: sw t1, 28(a5) ; RV64I-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: sw a0, 24(a5) ; RV64I-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload @@ -505,100 +505,100 @@ define void @caller() nounwind { ; RV32I-NEXT: sw a0, 80(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, %lo(var+12)(s0) ; RV32I-NEXT: sw a0, 76(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi s1, s0, %lo(var) -; RV32I-NEXT: lw a0, 16(s1) +; RV32I-NEXT: addi s5, s0, %lo(var) +; RV32I-NEXT: lw a0, 16(s5) ; RV32I-NEXT: sw a0, 72(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 20(s1) +; RV32I-NEXT: lw a0, 20(s5) ; RV32I-NEXT: sw a0, 68(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 24(s1) +; RV32I-NEXT: lw a0, 24(s5) ; RV32I-NEXT: sw a0, 64(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 28(s1) +; RV32I-NEXT: lw a0, 28(s5) ; RV32I-NEXT: sw a0, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 32(s1) +; RV32I-NEXT: lw a0, 32(s5) ; RV32I-NEXT: sw a0, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 36(s1) +; RV32I-NEXT: lw a0, 36(s5) ; RV32I-NEXT: sw a0, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 40(s1) +; RV32I-NEXT: lw a0, 40(s5) ; RV32I-NEXT: sw a0, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 44(s1) +; RV32I-NEXT: lw a0, 44(s5) ; RV32I-NEXT: sw a0, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 48(s1) +; RV32I-NEXT: lw a0, 48(s5) ; RV32I-NEXT: sw a0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 52(s1) +; RV32I-NEXT: lw a0, 52(s5) ; RV32I-NEXT: sw a0, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 56(s1) +; RV32I-NEXT: lw a0, 56(s5) ; RV32I-NEXT: sw a0, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 60(s1) +; RV32I-NEXT: lw a0, 60(s5) ; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 64(s1) +; RV32I-NEXT: lw a0, 64(s5) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 68(s1) +; RV32I-NEXT: lw a0, 68(s5) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 72(s1) +; RV32I-NEXT: lw a0, 72(s5) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 76(s1) +; RV32I-NEXT: lw a0, 76(s5) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 80(s1) +; RV32I-NEXT: lw a0, 80(s5) ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, 84(s1) +; RV32I-NEXT: lw a0, 84(s5) ; RV32I-NEXT: sw a0, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s4, 88(s1) -; RV32I-NEXT: lw s5, 92(s1) -; RV32I-NEXT: lw s6, 96(s1) -; RV32I-NEXT: lw s7, 100(s1) -; RV32I-NEXT: lw s8, 104(s1) -; RV32I-NEXT: lw s9, 108(s1) -; RV32I-NEXT: lw s10, 112(s1) -; RV32I-NEXT: lw s11, 116(s1) -; RV32I-NEXT: lw s2, 120(s1) -; RV32I-NEXT: lw s3, 124(s1) +; RV32I-NEXT: lw s3, 88(s5) +; RV32I-NEXT: lw s4, 92(s5) +; RV32I-NEXT: lw s6, 96(s5) +; RV32I-NEXT: lw s7, 100(s5) +; RV32I-NEXT: lw s8, 104(s5) +; RV32I-NEXT: lw s9, 108(s5) +; RV32I-NEXT: lw s10, 112(s5) +; RV32I-NEXT: lw s11, 116(s5) +; RV32I-NEXT: lw s1, 120(s5) +; RV32I-NEXT: lw s2, 124(s5) ; RV32I-NEXT: call callee@plt -; RV32I-NEXT: sw s3, 124(s1) -; RV32I-NEXT: sw s2, 120(s1) -; RV32I-NEXT: sw s11, 116(s1) -; RV32I-NEXT: sw s10, 112(s1) -; RV32I-NEXT: sw s9, 108(s1) -; RV32I-NEXT: sw s8, 104(s1) -; RV32I-NEXT: sw s7, 100(s1) -; RV32I-NEXT: sw s6, 96(s1) -; RV32I-NEXT: sw s5, 92(s1) -; RV32I-NEXT: sw s4, 88(s1) +; RV32I-NEXT: sw s2, 124(s5) +; RV32I-NEXT: sw s1, 120(s5) +; RV32I-NEXT: sw s11, 116(s5) +; RV32I-NEXT: sw s10, 112(s5) +; RV32I-NEXT: sw s9, 108(s5) +; RV32I-NEXT: sw s8, 104(s5) +; RV32I-NEXT: sw s7, 100(s5) +; RV32I-NEXT: sw s6, 96(s5) +; RV32I-NEXT: sw s4, 92(s5) +; RV32I-NEXT: sw s3, 88(s5) ; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 84(s1) +; RV32I-NEXT: sw a0, 84(s5) ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 80(s1) +; RV32I-NEXT: sw a0, 80(s5) ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 76(s1) +; RV32I-NEXT: sw a0, 76(s5) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 72(s1) +; RV32I-NEXT: sw a0, 72(s5) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 68(s1) +; RV32I-NEXT: sw a0, 68(s5) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 64(s1) +; RV32I-NEXT: sw a0, 64(s5) ; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 60(s1) +; RV32I-NEXT: sw a0, 60(s5) ; RV32I-NEXT: lw a0, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 56(s1) +; RV32I-NEXT: sw a0, 56(s5) ; RV32I-NEXT: lw a0, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 52(s1) +; RV32I-NEXT: sw a0, 52(s5) ; RV32I-NEXT: lw a0, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 48(s1) +; RV32I-NEXT: sw a0, 48(s5) ; RV32I-NEXT: lw a0, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 44(s1) +; RV32I-NEXT: sw a0, 44(s5) ; RV32I-NEXT: lw a0, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 40(s1) +; RV32I-NEXT: sw a0, 40(s5) ; RV32I-NEXT: lw a0, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 36(s1) +; RV32I-NEXT: sw a0, 36(s5) ; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 32(s1) +; RV32I-NEXT: sw a0, 32(s5) ; RV32I-NEXT: lw a0, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 28(s1) +; RV32I-NEXT: sw a0, 28(s5) ; RV32I-NEXT: lw a0, 64(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 24(s1) +; RV32I-NEXT: sw a0, 24(s5) ; RV32I-NEXT: lw a0, 68(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 20(s1) +; RV32I-NEXT: sw a0, 20(s5) ; RV32I-NEXT: lw a0, 72(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, 16(s1) +; RV32I-NEXT: sw a0, 16(s5) ; RV32I-NEXT: lw a0, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, %lo(var+12)(s0) ; RV32I-NEXT: lw a0, 80(sp) # 4-byte Folded Reload @@ -640,119 +640,119 @@ define void @caller() nounwind { ; RV32I-WITH-FP-NEXT: sw s10, 96(sp) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: sw s11, 92(sp) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: addi s0, sp, 144 -; RV32I-WITH-FP-NEXT: lui s6, %hi(var) -; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV32I-WITH-FP-NEXT: lui s1, %hi(var) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(s1) ; RV32I-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: addi s1, s6, %lo(var) -; RV32I-WITH-FP-NEXT: lw a0, 16(s1) +; RV32I-WITH-FP-NEXT: addi s6, s1, %lo(var) +; RV32I-WITH-FP-NEXT: lw a0, 16(s6) ; RV32I-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 20(s1) +; RV32I-WITH-FP-NEXT: lw a0, 20(s6) ; RV32I-WITH-FP-NEXT: sw a0, -76(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 24(s1) +; RV32I-WITH-FP-NEXT: lw a0, 24(s6) ; RV32I-WITH-FP-NEXT: sw a0, -80(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 28(s1) +; RV32I-WITH-FP-NEXT: lw a0, 28(s6) ; RV32I-WITH-FP-NEXT: sw a0, -84(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 32(s1) +; RV32I-WITH-FP-NEXT: lw a0, 32(s6) ; RV32I-WITH-FP-NEXT: sw a0, -88(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 36(s1) +; RV32I-WITH-FP-NEXT: lw a0, 36(s6) ; RV32I-WITH-FP-NEXT: sw a0, -92(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 40(s1) +; RV32I-WITH-FP-NEXT: lw a0, 40(s6) ; RV32I-WITH-FP-NEXT: sw a0, -96(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 44(s1) +; RV32I-WITH-FP-NEXT: lw a0, 44(s6) ; RV32I-WITH-FP-NEXT: sw a0, -100(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 48(s1) +; RV32I-WITH-FP-NEXT: lw a0, 48(s6) ; RV32I-WITH-FP-NEXT: sw a0, -104(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 52(s1) +; RV32I-WITH-FP-NEXT: lw a0, 52(s6) ; RV32I-WITH-FP-NEXT: sw a0, -108(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 56(s1) +; RV32I-WITH-FP-NEXT: lw a0, 56(s6) ; RV32I-WITH-FP-NEXT: sw a0, -112(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 60(s1) +; RV32I-WITH-FP-NEXT: lw a0, 60(s6) ; RV32I-WITH-FP-NEXT: sw a0, -116(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 64(s1) +; RV32I-WITH-FP-NEXT: lw a0, 64(s6) ; RV32I-WITH-FP-NEXT: sw a0, -120(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 68(s1) +; RV32I-WITH-FP-NEXT: lw a0, 68(s6) ; RV32I-WITH-FP-NEXT: sw a0, -124(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 72(s1) +; RV32I-WITH-FP-NEXT: lw a0, 72(s6) ; RV32I-WITH-FP-NEXT: sw a0, -128(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 76(s1) +; RV32I-WITH-FP-NEXT: lw a0, 76(s6) ; RV32I-WITH-FP-NEXT: sw a0, -132(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 80(s1) +; RV32I-WITH-FP-NEXT: lw a0, 80(s6) ; RV32I-WITH-FP-NEXT: sw a0, -136(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 84(s1) +; RV32I-WITH-FP-NEXT: lw a0, 84(s6) ; RV32I-WITH-FP-NEXT: sw a0, -140(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, 88(s1) +; RV32I-WITH-FP-NEXT: lw a0, 88(s6) ; RV32I-WITH-FP-NEXT: sw a0, -144(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw s8, 92(s1) -; RV32I-WITH-FP-NEXT: lw s9, 96(s1) -; RV32I-WITH-FP-NEXT: lw s10, 100(s1) -; RV32I-WITH-FP-NEXT: lw s11, 104(s1) -; RV32I-WITH-FP-NEXT: lw s2, 108(s1) -; RV32I-WITH-FP-NEXT: lw s3, 112(s1) -; RV32I-WITH-FP-NEXT: lw s4, 116(s1) -; RV32I-WITH-FP-NEXT: lw s5, 120(s1) -; RV32I-WITH-FP-NEXT: lw s7, 124(s1) +; RV32I-WITH-FP-NEXT: lw s8, 92(s6) +; RV32I-WITH-FP-NEXT: lw s9, 96(s6) +; RV32I-WITH-FP-NEXT: lw s10, 100(s6) +; RV32I-WITH-FP-NEXT: lw s11, 104(s6) +; RV32I-WITH-FP-NEXT: lw s2, 108(s6) +; RV32I-WITH-FP-NEXT: lw s3, 112(s6) +; RV32I-WITH-FP-NEXT: lw s4, 116(s6) +; RV32I-WITH-FP-NEXT: lw s5, 120(s6) +; RV32I-WITH-FP-NEXT: lw s7, 124(s6) ; RV32I-WITH-FP-NEXT: call callee@plt -; RV32I-WITH-FP-NEXT: sw s7, 124(s1) -; RV32I-WITH-FP-NEXT: sw s5, 120(s1) -; RV32I-WITH-FP-NEXT: sw s4, 116(s1) -; RV32I-WITH-FP-NEXT: sw s3, 112(s1) -; RV32I-WITH-FP-NEXT: sw s2, 108(s1) -; RV32I-WITH-FP-NEXT: sw s11, 104(s1) -; RV32I-WITH-FP-NEXT: sw s10, 100(s1) -; RV32I-WITH-FP-NEXT: sw s9, 96(s1) -; RV32I-WITH-FP-NEXT: sw s8, 92(s1) +; RV32I-WITH-FP-NEXT: sw s7, 124(s6) +; RV32I-WITH-FP-NEXT: sw s5, 120(s6) +; RV32I-WITH-FP-NEXT: sw s4, 116(s6) +; RV32I-WITH-FP-NEXT: sw s3, 112(s6) +; RV32I-WITH-FP-NEXT: sw s2, 108(s6) +; RV32I-WITH-FP-NEXT: sw s11, 104(s6) +; RV32I-WITH-FP-NEXT: sw s10, 100(s6) +; RV32I-WITH-FP-NEXT: sw s9, 96(s6) +; RV32I-WITH-FP-NEXT: sw s8, 92(s6) ; RV32I-WITH-FP-NEXT: lw a0, -144(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 88(s1) +; RV32I-WITH-FP-NEXT: sw a0, 88(s6) ; RV32I-WITH-FP-NEXT: lw a0, -140(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 84(s1) +; RV32I-WITH-FP-NEXT: sw a0, 84(s6) ; RV32I-WITH-FP-NEXT: lw a0, -136(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 80(s1) +; RV32I-WITH-FP-NEXT: sw a0, 80(s6) ; RV32I-WITH-FP-NEXT: lw a0, -132(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 76(s1) +; RV32I-WITH-FP-NEXT: sw a0, 76(s6) ; RV32I-WITH-FP-NEXT: lw a0, -128(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 72(s1) +; RV32I-WITH-FP-NEXT: sw a0, 72(s6) ; RV32I-WITH-FP-NEXT: lw a0, -124(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 68(s1) +; RV32I-WITH-FP-NEXT: sw a0, 68(s6) ; RV32I-WITH-FP-NEXT: lw a0, -120(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 64(s1) +; RV32I-WITH-FP-NEXT: sw a0, 64(s6) ; RV32I-WITH-FP-NEXT: lw a0, -116(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 60(s1) +; RV32I-WITH-FP-NEXT: sw a0, 60(s6) ; RV32I-WITH-FP-NEXT: lw a0, -112(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 56(s1) +; RV32I-WITH-FP-NEXT: sw a0, 56(s6) ; RV32I-WITH-FP-NEXT: lw a0, -108(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 52(s1) +; RV32I-WITH-FP-NEXT: sw a0, 52(s6) ; RV32I-WITH-FP-NEXT: lw a0, -104(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 48(s1) +; RV32I-WITH-FP-NEXT: sw a0, 48(s6) ; RV32I-WITH-FP-NEXT: lw a0, -100(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 44(s1) +; RV32I-WITH-FP-NEXT: sw a0, 44(s6) ; RV32I-WITH-FP-NEXT: lw a0, -96(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 40(s1) +; RV32I-WITH-FP-NEXT: sw a0, 40(s6) ; RV32I-WITH-FP-NEXT: lw a0, -92(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 36(s1) +; RV32I-WITH-FP-NEXT: sw a0, 36(s6) ; RV32I-WITH-FP-NEXT: lw a0, -88(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 32(s1) +; RV32I-WITH-FP-NEXT: sw a0, 32(s6) ; RV32I-WITH-FP-NEXT: lw a0, -84(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 28(s1) +; RV32I-WITH-FP-NEXT: sw a0, 28(s6) ; RV32I-WITH-FP-NEXT: lw a0, -80(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 24(s1) +; RV32I-WITH-FP-NEXT: sw a0, 24(s6) ; RV32I-WITH-FP-NEXT: lw a0, -76(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 20(s1) +; RV32I-WITH-FP-NEXT: sw a0, 20(s6) ; RV32I-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, 16(s1) +; RV32I-WITH-FP-NEXT: sw a0, 16(s6) ; RV32I-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(s1) ; RV32I-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(s1) ; RV32I-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(s1) ; RV32I-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(s1) ; RV32I-WITH-FP-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s0, 136(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s1, 132(sp) # 4-byte Folded Reload @@ -794,100 +794,100 @@ define void @caller() nounwind { ; RV64I-NEXT: sd a0, 160(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, %lo(var+12)(s0) ; RV64I-NEXT: sd a0, 152(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi s1, s0, %lo(var) -; RV64I-NEXT: lw a0, 16(s1) +; RV64I-NEXT: addi s5, s0, %lo(var) +; RV64I-NEXT: lw a0, 16(s5) ; RV64I-NEXT: sd a0, 144(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 20(s1) +; RV64I-NEXT: lw a0, 20(s5) ; RV64I-NEXT: sd a0, 136(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 24(s1) +; RV64I-NEXT: lw a0, 24(s5) ; RV64I-NEXT: sd a0, 128(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 28(s1) +; RV64I-NEXT: lw a0, 28(s5) ; RV64I-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 32(s1) +; RV64I-NEXT: lw a0, 32(s5) ; RV64I-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 36(s1) +; RV64I-NEXT: lw a0, 36(s5) ; RV64I-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 40(s1) +; RV64I-NEXT: lw a0, 40(s5) ; RV64I-NEXT: sd a0, 96(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 44(s1) +; RV64I-NEXT: lw a0, 44(s5) ; RV64I-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 48(s1) +; RV64I-NEXT: lw a0, 48(s5) ; RV64I-NEXT: sd a0, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 52(s1) +; RV64I-NEXT: lw a0, 52(s5) ; RV64I-NEXT: sd a0, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 56(s1) +; RV64I-NEXT: lw a0, 56(s5) ; RV64I-NEXT: sd a0, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 60(s1) +; RV64I-NEXT: lw a0, 60(s5) ; RV64I-NEXT: sd a0, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 64(s1) +; RV64I-NEXT: lw a0, 64(s5) ; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 68(s1) +; RV64I-NEXT: lw a0, 68(s5) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 72(s1) +; RV64I-NEXT: lw a0, 72(s5) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 76(s1) +; RV64I-NEXT: lw a0, 76(s5) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 80(s1) +; RV64I-NEXT: lw a0, 80(s5) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, 84(s1) +; RV64I-NEXT: lw a0, 84(s5) ; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw s4, 88(s1) -; RV64I-NEXT: lw s5, 92(s1) -; RV64I-NEXT: lw s6, 96(s1) -; RV64I-NEXT: lw s7, 100(s1) -; RV64I-NEXT: lw s8, 104(s1) -; RV64I-NEXT: lw s9, 108(s1) -; RV64I-NEXT: lw s10, 112(s1) -; RV64I-NEXT: lw s11, 116(s1) -; RV64I-NEXT: lw s2, 120(s1) -; RV64I-NEXT: lw s3, 124(s1) +; RV64I-NEXT: lw s3, 88(s5) +; RV64I-NEXT: lw s4, 92(s5) +; RV64I-NEXT: lw s6, 96(s5) +; RV64I-NEXT: lw s7, 100(s5) +; RV64I-NEXT: lw s8, 104(s5) +; RV64I-NEXT: lw s9, 108(s5) +; RV64I-NEXT: lw s10, 112(s5) +; RV64I-NEXT: lw s11, 116(s5) +; RV64I-NEXT: lw s1, 120(s5) +; RV64I-NEXT: lw s2, 124(s5) ; RV64I-NEXT: call callee@plt -; RV64I-NEXT: sw s3, 124(s1) -; RV64I-NEXT: sw s2, 120(s1) -; RV64I-NEXT: sw s11, 116(s1) -; RV64I-NEXT: sw s10, 112(s1) -; RV64I-NEXT: sw s9, 108(s1) -; RV64I-NEXT: sw s8, 104(s1) -; RV64I-NEXT: sw s7, 100(s1) -; RV64I-NEXT: sw s6, 96(s1) -; RV64I-NEXT: sw s5, 92(s1) -; RV64I-NEXT: sw s4, 88(s1) +; RV64I-NEXT: sw s2, 124(s5) +; RV64I-NEXT: sw s1, 120(s5) +; RV64I-NEXT: sw s11, 116(s5) +; RV64I-NEXT: sw s10, 112(s5) +; RV64I-NEXT: sw s9, 108(s5) +; RV64I-NEXT: sw s8, 104(s5) +; RV64I-NEXT: sw s7, 100(s5) +; RV64I-NEXT: sw s6, 96(s5) +; RV64I-NEXT: sw s4, 92(s5) +; RV64I-NEXT: sw s3, 88(s5) ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 84(s1) +; RV64I-NEXT: sw a0, 84(s5) ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 80(s1) +; RV64I-NEXT: sw a0, 80(s5) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 76(s1) +; RV64I-NEXT: sw a0, 76(s5) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 72(s1) +; RV64I-NEXT: sw a0, 72(s5) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 68(s1) +; RV64I-NEXT: sw a0, 68(s5) ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 64(s1) +; RV64I-NEXT: sw a0, 64(s5) ; RV64I-NEXT: ld a0, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 60(s1) +; RV64I-NEXT: sw a0, 60(s5) ; RV64I-NEXT: ld a0, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 56(s1) +; RV64I-NEXT: sw a0, 56(s5) ; RV64I-NEXT: ld a0, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 52(s1) +; RV64I-NEXT: sw a0, 52(s5) ; RV64I-NEXT: ld a0, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 48(s1) +; RV64I-NEXT: sw a0, 48(s5) ; RV64I-NEXT: ld a0, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 44(s1) +; RV64I-NEXT: sw a0, 44(s5) ; RV64I-NEXT: ld a0, 96(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 40(s1) +; RV64I-NEXT: sw a0, 40(s5) ; RV64I-NEXT: ld a0, 104(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 36(s1) +; RV64I-NEXT: sw a0, 36(s5) ; RV64I-NEXT: ld a0, 112(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 32(s1) +; RV64I-NEXT: sw a0, 32(s5) ; RV64I-NEXT: ld a0, 120(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 28(s1) +; RV64I-NEXT: sw a0, 28(s5) ; RV64I-NEXT: ld a0, 128(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 24(s1) +; RV64I-NEXT: sw a0, 24(s5) ; RV64I-NEXT: ld a0, 136(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 20(s1) +; RV64I-NEXT: sw a0, 20(s5) ; RV64I-NEXT: ld a0, 144(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, 16(s1) +; RV64I-NEXT: sw a0, 16(s5) ; RV64I-NEXT: ld a0, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, %lo(var+12)(s0) ; RV64I-NEXT: ld a0, 160(sp) # 8-byte Folded Reload @@ -929,119 +929,119 @@ define void @caller() nounwind { ; RV64I-WITH-FP-NEXT: sd s10, 192(sp) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: sd s11, 184(sp) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: addi s0, sp, 288 -; RV64I-WITH-FP-NEXT: lui s6, %hi(var) -; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(s6) +; RV64I-WITH-FP-NEXT: lui s1, %hi(var) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(s6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(s6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(s6) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(s1) ; RV64I-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: addi s1, s6, %lo(var) -; RV64I-WITH-FP-NEXT: lw a0, 16(s1) +; RV64I-WITH-FP-NEXT: addi s6, s1, %lo(var) +; RV64I-WITH-FP-NEXT: lw a0, 16(s6) ; RV64I-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 20(s1) +; RV64I-WITH-FP-NEXT: lw a0, 20(s6) ; RV64I-WITH-FP-NEXT: sd a0, -152(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 24(s1) +; RV64I-WITH-FP-NEXT: lw a0, 24(s6) ; RV64I-WITH-FP-NEXT: sd a0, -160(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 28(s1) +; RV64I-WITH-FP-NEXT: lw a0, 28(s6) ; RV64I-WITH-FP-NEXT: sd a0, -168(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 32(s1) +; RV64I-WITH-FP-NEXT: lw a0, 32(s6) ; RV64I-WITH-FP-NEXT: sd a0, -176(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 36(s1) +; RV64I-WITH-FP-NEXT: lw a0, 36(s6) ; RV64I-WITH-FP-NEXT: sd a0, -184(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 40(s1) +; RV64I-WITH-FP-NEXT: lw a0, 40(s6) ; RV64I-WITH-FP-NEXT: sd a0, -192(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 44(s1) +; RV64I-WITH-FP-NEXT: lw a0, 44(s6) ; RV64I-WITH-FP-NEXT: sd a0, -200(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 48(s1) +; RV64I-WITH-FP-NEXT: lw a0, 48(s6) ; RV64I-WITH-FP-NEXT: sd a0, -208(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 52(s1) +; RV64I-WITH-FP-NEXT: lw a0, 52(s6) ; RV64I-WITH-FP-NEXT: sd a0, -216(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 56(s1) +; RV64I-WITH-FP-NEXT: lw a0, 56(s6) ; RV64I-WITH-FP-NEXT: sd a0, -224(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 60(s1) +; RV64I-WITH-FP-NEXT: lw a0, 60(s6) ; RV64I-WITH-FP-NEXT: sd a0, -232(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 64(s1) +; RV64I-WITH-FP-NEXT: lw a0, 64(s6) ; RV64I-WITH-FP-NEXT: sd a0, -240(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 68(s1) +; RV64I-WITH-FP-NEXT: lw a0, 68(s6) ; RV64I-WITH-FP-NEXT: sd a0, -248(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 72(s1) +; RV64I-WITH-FP-NEXT: lw a0, 72(s6) ; RV64I-WITH-FP-NEXT: sd a0, -256(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 76(s1) +; RV64I-WITH-FP-NEXT: lw a0, 76(s6) ; RV64I-WITH-FP-NEXT: sd a0, -264(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 80(s1) +; RV64I-WITH-FP-NEXT: lw a0, 80(s6) ; RV64I-WITH-FP-NEXT: sd a0, -272(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 84(s1) +; RV64I-WITH-FP-NEXT: lw a0, 84(s6) ; RV64I-WITH-FP-NEXT: sd a0, -280(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, 88(s1) +; RV64I-WITH-FP-NEXT: lw a0, 88(s6) ; RV64I-WITH-FP-NEXT: sd a0, -288(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw s8, 92(s1) -; RV64I-WITH-FP-NEXT: lw s9, 96(s1) -; RV64I-WITH-FP-NEXT: lw s10, 100(s1) -; RV64I-WITH-FP-NEXT: lw s11, 104(s1) -; RV64I-WITH-FP-NEXT: lw s2, 108(s1) -; RV64I-WITH-FP-NEXT: lw s3, 112(s1) -; RV64I-WITH-FP-NEXT: lw s4, 116(s1) -; RV64I-WITH-FP-NEXT: lw s5, 120(s1) -; RV64I-WITH-FP-NEXT: lw s7, 124(s1) +; RV64I-WITH-FP-NEXT: lw s8, 92(s6) +; RV64I-WITH-FP-NEXT: lw s9, 96(s6) +; RV64I-WITH-FP-NEXT: lw s10, 100(s6) +; RV64I-WITH-FP-NEXT: lw s11, 104(s6) +; RV64I-WITH-FP-NEXT: lw s2, 108(s6) +; RV64I-WITH-FP-NEXT: lw s3, 112(s6) +; RV64I-WITH-FP-NEXT: lw s4, 116(s6) +; RV64I-WITH-FP-NEXT: lw s5, 120(s6) +; RV64I-WITH-FP-NEXT: lw s7, 124(s6) ; RV64I-WITH-FP-NEXT: call callee@plt -; RV64I-WITH-FP-NEXT: sw s7, 124(s1) -; RV64I-WITH-FP-NEXT: sw s5, 120(s1) -; RV64I-WITH-FP-NEXT: sw s4, 116(s1) -; RV64I-WITH-FP-NEXT: sw s3, 112(s1) -; RV64I-WITH-FP-NEXT: sw s2, 108(s1) -; RV64I-WITH-FP-NEXT: sw s11, 104(s1) -; RV64I-WITH-FP-NEXT: sw s10, 100(s1) -; RV64I-WITH-FP-NEXT: sw s9, 96(s1) -; RV64I-WITH-FP-NEXT: sw s8, 92(s1) +; RV64I-WITH-FP-NEXT: sw s7, 124(s6) +; RV64I-WITH-FP-NEXT: sw s5, 120(s6) +; RV64I-WITH-FP-NEXT: sw s4, 116(s6) +; RV64I-WITH-FP-NEXT: sw s3, 112(s6) +; RV64I-WITH-FP-NEXT: sw s2, 108(s6) +; RV64I-WITH-FP-NEXT: sw s11, 104(s6) +; RV64I-WITH-FP-NEXT: sw s10, 100(s6) +; RV64I-WITH-FP-NEXT: sw s9, 96(s6) +; RV64I-WITH-FP-NEXT: sw s8, 92(s6) ; RV64I-WITH-FP-NEXT: ld a0, -288(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 88(s1) +; RV64I-WITH-FP-NEXT: sw a0, 88(s6) ; RV64I-WITH-FP-NEXT: ld a0, -280(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 84(s1) +; RV64I-WITH-FP-NEXT: sw a0, 84(s6) ; RV64I-WITH-FP-NEXT: ld a0, -272(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 80(s1) +; RV64I-WITH-FP-NEXT: sw a0, 80(s6) ; RV64I-WITH-FP-NEXT: ld a0, -264(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 76(s1) +; RV64I-WITH-FP-NEXT: sw a0, 76(s6) ; RV64I-WITH-FP-NEXT: ld a0, -256(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 72(s1) +; RV64I-WITH-FP-NEXT: sw a0, 72(s6) ; RV64I-WITH-FP-NEXT: ld a0, -248(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 68(s1) +; RV64I-WITH-FP-NEXT: sw a0, 68(s6) ; RV64I-WITH-FP-NEXT: ld a0, -240(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 64(s1) +; RV64I-WITH-FP-NEXT: sw a0, 64(s6) ; RV64I-WITH-FP-NEXT: ld a0, -232(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 60(s1) +; RV64I-WITH-FP-NEXT: sw a0, 60(s6) ; RV64I-WITH-FP-NEXT: ld a0, -224(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 56(s1) +; RV64I-WITH-FP-NEXT: sw a0, 56(s6) ; RV64I-WITH-FP-NEXT: ld a0, -216(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 52(s1) +; RV64I-WITH-FP-NEXT: sw a0, 52(s6) ; RV64I-WITH-FP-NEXT: ld a0, -208(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 48(s1) +; RV64I-WITH-FP-NEXT: sw a0, 48(s6) ; RV64I-WITH-FP-NEXT: ld a0, -200(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 44(s1) +; RV64I-WITH-FP-NEXT: sw a0, 44(s6) ; RV64I-WITH-FP-NEXT: ld a0, -192(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 40(s1) +; RV64I-WITH-FP-NEXT: sw a0, 40(s6) ; RV64I-WITH-FP-NEXT: ld a0, -184(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 36(s1) +; RV64I-WITH-FP-NEXT: sw a0, 36(s6) ; RV64I-WITH-FP-NEXT: ld a0, -176(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 32(s1) +; RV64I-WITH-FP-NEXT: sw a0, 32(s6) ; RV64I-WITH-FP-NEXT: ld a0, -168(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 28(s1) +; RV64I-WITH-FP-NEXT: sw a0, 28(s6) ; RV64I-WITH-FP-NEXT: ld a0, -160(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 24(s1) +; RV64I-WITH-FP-NEXT: sw a0, 24(s6) ; RV64I-WITH-FP-NEXT: ld a0, -152(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 20(s1) +; RV64I-WITH-FP-NEXT: sw a0, 20(s6) ; RV64I-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, 16(s1) +; RV64I-WITH-FP-NEXT: sw a0, 16(s6) ; RV64I-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(s1) ; RV64I-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(s1) ; RV64I-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(s1) ; RV64I-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(s6) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(s1) ; RV64I-WITH-FP-NEXT: ld ra, 280(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s0, 272(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s1, 264(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll index 61a95f3a400a3..0b73661af5c06 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -190,21 +190,21 @@ define i32 @caller_many_scalars() nounwind { define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-FPELIM-LABEL: callee_large_scalars: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: lw a6, 0(a1) -; RV32I-FPELIM-NEXT: lw a7, 0(a0) +; RV32I-FPELIM-NEXT: lw a2, 0(a1) +; RV32I-FPELIM-NEXT: lw a3, 0(a0) ; RV32I-FPELIM-NEXT: lw a4, 4(a1) ; RV32I-FPELIM-NEXT: lw a5, 12(a1) -; RV32I-FPELIM-NEXT: lw a2, 12(a0) -; RV32I-FPELIM-NEXT: lw a3, 4(a0) +; RV32I-FPELIM-NEXT: lw a6, 12(a0) +; RV32I-FPELIM-NEXT: lw a7, 4(a0) ; RV32I-FPELIM-NEXT: lw a1, 8(a1) ; RV32I-FPELIM-NEXT: lw a0, 8(a0) -; RV32I-FPELIM-NEXT: xor a2, a2, a5 -; RV32I-FPELIM-NEXT: xor a3, a3, a4 -; RV32I-FPELIM-NEXT: or a2, a3, a2 +; RV32I-FPELIM-NEXT: xor a5, a6, a5 +; RV32I-FPELIM-NEXT: xor a4, a7, a4 +; RV32I-FPELIM-NEXT: or a4, a4, a5 ; RV32I-FPELIM-NEXT: xor a0, a0, a1 -; RV32I-FPELIM-NEXT: xor a1, a7, a6 +; RV32I-FPELIM-NEXT: xor a1, a3, a2 ; RV32I-FPELIM-NEXT: or a0, a1, a0 -; RV32I-FPELIM-NEXT: or a0, a0, a2 +; RV32I-FPELIM-NEXT: or a0, a0, a4 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -214,21 +214,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 -; RV32I-WITHFP-NEXT: lw a6, 0(a1) -; RV32I-WITHFP-NEXT: lw a7, 0(a0) +; RV32I-WITHFP-NEXT: lw a2, 0(a1) +; RV32I-WITHFP-NEXT: lw a3, 0(a0) ; RV32I-WITHFP-NEXT: lw a4, 4(a1) ; RV32I-WITHFP-NEXT: lw a5, 12(a1) -; RV32I-WITHFP-NEXT: lw a2, 12(a0) -; RV32I-WITHFP-NEXT: lw a3, 4(a0) +; RV32I-WITHFP-NEXT: lw a6, 12(a0) +; RV32I-WITHFP-NEXT: lw a7, 4(a0) ; RV32I-WITHFP-NEXT: lw a1, 8(a1) ; RV32I-WITHFP-NEXT: lw a0, 8(a0) -; RV32I-WITHFP-NEXT: xor a2, a2, a5 -; RV32I-WITHFP-NEXT: xor a3, a3, a4 -; RV32I-WITHFP-NEXT: or a2, a3, a2 +; RV32I-WITHFP-NEXT: xor a5, a6, a5 +; RV32I-WITHFP-NEXT: xor a4, a7, a4 +; RV32I-WITHFP-NEXT: or a4, a4, a5 ; RV32I-WITHFP-NEXT: xor a0, a0, a1 -; RV32I-WITHFP-NEXT: xor a1, a7, a6 +; RV32I-WITHFP-NEXT: xor a1, a3, a2 ; RV32I-WITHFP-NEXT: or a0, a1, a0 -; RV32I-WITHFP-NEXT: or a0, a0, a2 +; RV32I-WITHFP-NEXT: or a0, a0, a4 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -297,21 +297,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-FPELIM-LABEL: callee_large_scalars_exhausted_regs: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 4(sp) -; RV32I-FPELIM-NEXT: lw a6, 0(a0) -; RV32I-FPELIM-NEXT: lw t0, 0(a7) +; RV32I-FPELIM-NEXT: lw a1, 0(a0) +; RV32I-FPELIM-NEXT: lw a2, 0(a7) ; RV32I-FPELIM-NEXT: lw a3, 4(a0) ; RV32I-FPELIM-NEXT: lw a4, 12(a0) ; RV32I-FPELIM-NEXT: lw a5, 12(a7) -; RV32I-FPELIM-NEXT: lw a1, 4(a7) +; RV32I-FPELIM-NEXT: lw a6, 4(a7) ; RV32I-FPELIM-NEXT: lw a0, 8(a0) -; RV32I-FPELIM-NEXT: lw a2, 8(a7) +; RV32I-FPELIM-NEXT: lw a7, 8(a7) ; RV32I-FPELIM-NEXT: xor a4, a5, a4 -; RV32I-FPELIM-NEXT: xor a1, a1, a3 -; RV32I-FPELIM-NEXT: or a1, a1, a4 -; RV32I-FPELIM-NEXT: xor a0, a2, a0 -; RV32I-FPELIM-NEXT: xor a2, t0, a6 -; RV32I-FPELIM-NEXT: or a0, a2, a0 -; RV32I-FPELIM-NEXT: or a0, a0, a1 +; RV32I-FPELIM-NEXT: xor a3, a6, a3 +; RV32I-FPELIM-NEXT: or a3, a3, a4 +; RV32I-FPELIM-NEXT: xor a0, a7, a0 +; RV32I-FPELIM-NEXT: xor a1, a2, a1 +; RV32I-FPELIM-NEXT: or a0, a1, a0 +; RV32I-FPELIM-NEXT: or a0, a0, a3 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -322,21 +322,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 4(s0) -; RV32I-WITHFP-NEXT: lw a6, 0(a0) -; RV32I-WITHFP-NEXT: lw t0, 0(a7) +; RV32I-WITHFP-NEXT: lw a1, 0(a0) +; RV32I-WITHFP-NEXT: lw a2, 0(a7) ; RV32I-WITHFP-NEXT: lw a3, 4(a0) ; RV32I-WITHFP-NEXT: lw a4, 12(a0) ; RV32I-WITHFP-NEXT: lw a5, 12(a7) -; RV32I-WITHFP-NEXT: lw a1, 4(a7) +; RV32I-WITHFP-NEXT: lw a6, 4(a7) ; RV32I-WITHFP-NEXT: lw a0, 8(a0) -; RV32I-WITHFP-NEXT: lw a2, 8(a7) +; RV32I-WITHFP-NEXT: lw a7, 8(a7) ; RV32I-WITHFP-NEXT: xor a4, a5, a4 -; RV32I-WITHFP-NEXT: xor a1, a1, a3 -; RV32I-WITHFP-NEXT: or a1, a1, a4 -; RV32I-WITHFP-NEXT: xor a0, a2, a0 -; RV32I-WITHFP-NEXT: xor a2, t0, a6 -; RV32I-WITHFP-NEXT: or a0, a2, a0 -; RV32I-WITHFP-NEXT: or a0, a0, a1 +; RV32I-WITHFP-NEXT: xor a3, a6, a3 +; RV32I-WITHFP-NEXT: or a3, a3, a4 +; RV32I-WITHFP-NEXT: xor a0, a7, a0 +; RV32I-WITHFP-NEXT: xor a1, a2, a1 +; RV32I-WITHFP-NEXT: or a0, a1, a0 +; RV32I-WITHFP-NEXT: or a0, a0, a3 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index c3c4666d0f504..f5d316e684740 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -106,21 +106,21 @@ define i32 @caller_many_scalars() nounwind { define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ; RV64I-LABEL: callee_large_scalars: ; RV64I: # %bb.0: -; RV64I-NEXT: ld a6, 0(a1) -; RV64I-NEXT: ld a7, 0(a0) +; RV64I-NEXT: ld a2, 0(a1) +; RV64I-NEXT: ld a3, 0(a0) ; RV64I-NEXT: ld a4, 8(a1) ; RV64I-NEXT: ld a5, 24(a1) -; RV64I-NEXT: ld a2, 24(a0) -; RV64I-NEXT: ld a3, 8(a0) +; RV64I-NEXT: ld a6, 24(a0) +; RV64I-NEXT: ld a7, 8(a0) ; RV64I-NEXT: ld a1, 16(a1) ; RV64I-NEXT: ld a0, 16(a0) -; RV64I-NEXT: xor a2, a2, a5 -; RV64I-NEXT: xor a3, a3, a4 -; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: xor a5, a6, a5 +; RV64I-NEXT: xor a4, a7, a4 +; RV64I-NEXT: or a4, a4, a5 ; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: xor a1, a7, a6 +; RV64I-NEXT: xor a1, a3, a2 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %a, %b @@ -161,21 +161,21 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, ; RV64I-LABEL: callee_large_scalars_exhausted_regs: ; RV64I: # %bb.0: ; RV64I-NEXT: ld a0, 8(sp) -; RV64I-NEXT: ld a6, 0(a0) -; RV64I-NEXT: ld t0, 0(a7) +; RV64I-NEXT: ld a1, 0(a0) +; RV64I-NEXT: ld a2, 0(a7) ; RV64I-NEXT: ld a3, 8(a0) ; RV64I-NEXT: ld a4, 24(a0) ; RV64I-NEXT: ld a5, 24(a7) -; RV64I-NEXT: ld a1, 8(a7) +; RV64I-NEXT: ld a6, 8(a7) ; RV64I-NEXT: ld a0, 16(a0) -; RV64I-NEXT: ld a2, 16(a7) +; RV64I-NEXT: ld a7, 16(a7) ; RV64I-NEXT: xor a4, a5, a4 -; RV64I-NEXT: xor a1, a1, a3 -; RV64I-NEXT: or a1, a1, a4 -; RV64I-NEXT: xor a0, a2, a0 -; RV64I-NEXT: xor a2, t0, a6 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: xor a3, a6, a3 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: xor a0, a7, a0 +; RV64I-NEXT: xor a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %h, %j diff --git a/llvm/test/CodeGen/RISCV/double-arith-strict.ll b/llvm/test/CodeGen/RISCV/double-arith-strict.ll index 9f207cde78f61..915b63588d5a7 100644 --- a/llvm/test/CodeGen/RISCV/double-arith-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-arith-strict.ll @@ -313,10 +313,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: li a2, 0 @@ -325,10 +325,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -393,25 +393,25 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: mv s5, a4 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a3 ; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a2, s1, a0 +; RV32I-NEXT: xor a2, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: mv a3, s2 @@ -434,19 +434,19 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind strictfp { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -489,7 +489,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 +; RV32I-NEXT: mv s0, a5 ; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 @@ -498,20 +498,20 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a3, s0, a0 +; RV32I-NEXT: xor a3, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s5 +; RV32I-NEXT: mv a2, s4 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -531,19 +531,19 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind strictfp { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -582,19 +582,19 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -654,10 +654,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: li a2, 0 @@ -666,10 +666,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind strictfp { ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a3, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll index add838302f80c..55c710dbf95d9 100644 --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -479,10 +479,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: li a2, 0 @@ -491,10 +491,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -559,25 +559,25 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 -; RV32I-NEXT: mv s5, a4 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a3 ; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a2, s1, a0 +; RV32I-NEXT: xor a2, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: mv a3, s2 @@ -600,19 +600,19 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -655,7 +655,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s4, a5 +; RV32I-NEXT: mv s0, a5 ; RV32I-NEXT: mv s1, a4 ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: mv s3, a0 @@ -664,20 +664,20 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s5, a0 -; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a0, 524288 -; RV32I-NEXT: xor a3, s0, a0 +; RV32I-NEXT: xor a3, s5, a0 ; RV32I-NEXT: xor a5, a1, a0 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s5 +; RV32I-NEXT: mv a2, s4 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -697,19 +697,19 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -748,19 +748,19 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -820,10 +820,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: li a2, 0 @@ -832,10 +832,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: xor a3, a1, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a4, s3 -; RV32I-NEXT: mv a5, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a4, s1 +; RV32I-NEXT: mv a5, s0 ; RV32I-NEXT: call fma@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -945,10 +945,10 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: li a2, 0 @@ -956,10 +956,10 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __muldf3@plt ; RV32I-NEXT: mv a2, s4 ; RV32I-NEXT: mv a3, s5 @@ -981,16 +981,16 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __muldf3@plt +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __subdf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1033,22 +1033,15 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: li a2, 0 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a2, 0 @@ -1056,15 +1049,22 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __adddf3@plt +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s5 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: mv a3, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __muldf3@plt ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s3 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __subdf3@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1078,26 +1078,25 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; ; RV64I-LABEL: fnmadd_d_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: li a1, -1 @@ -1105,12 +1104,11 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __subdf3@plt -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd double 0.0, %a ; avoid negation using xor %b_ = fadd double 0.0, %b ; avoid negation using xor @@ -1148,29 +1146,29 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a5 -; RV32I-NEXT: mv s3, a4 -; RV32I-NEXT: mv s4, a3 -; RV32I-NEXT: mv s5, a2 +; RV32I-NEXT: mv s0, a5 +; RV32I-NEXT: mv s1, a4 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __adddf3@plt ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s5 ; RV32I-NEXT: call __muldf3@plt ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __subdf3@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1189,19 +1187,19 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subdf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index 63af598ee4653..4cae4e16b4499 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -137,15 +137,15 @@ define i32 @fcvt_w_d_sat(double %a) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 794112 ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: lui s5, 524288 ; RV32I-NEXT: lui s4, 524288 @@ -156,17 +156,17 @@ define i32 @fcvt_w_d_sat(double %a) nounwind { ; RV32I-NEXT: lui a0, 269824 ; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: lui a2, 1047552 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: bge s0, a0, .LBB3_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: addi s4, s5, -1 ; RV32I-NEXT: .LBB3_4: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: bne a0, s0, .LBB3_6 ; RV32I-NEXT: # %bb.5: # %start @@ -363,20 +363,20 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 270080 ; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: lui a2, 1048064 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt ; RV32I-NEXT: li a1, 0 @@ -385,7 +385,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind { ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB6_2: # %start ; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: bgtz s2, .LBB6_4 +; RV32I-NEXT: bgtz s0, .LBB6_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB6_4: # %start @@ -694,7 +694,7 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 278016 ; RV32I-NEXT: addi s3, a0, -1 @@ -704,67 +704,67 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: lui a3, 802304 -; RV32I-NEXT: li s0, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixdfdi@plt ; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: blt s6, s0, .LBB12_2 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: blt s6, s2, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB12_2: # %start ; RV32I-NEXT: li s6, -1 -; RV32I-NEXT: blt s0, s4, .LBB12_4 +; RV32I-NEXT: blt s2, s4, .LBB12_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv s6, a1 ; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: mv s4, s0 -; RV32I-NEXT: bne a0, s0, .LBB12_6 +; RV32I-NEXT: mv s4, s2 +; RV32I-NEXT: bne a0, s2, .LBB12_6 ; RV32I-NEXT: # %bb.5: # %start ; RV32I-NEXT: mv s4, s6 ; RV32I-NEXT: .LBB12_6: # %start ; RV32I-NEXT: lui a3, 802304 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: lui s7, 524288 ; RV32I-NEXT: lui s6, 524288 -; RV32I-NEXT: blt a0, s0, .LBB12_8 +; RV32I-NEXT: blt a0, s2, .LBB12_8 ; RV32I-NEXT: # %bb.7: # %start ; RV32I-NEXT: mv s6, s5 ; RV32I-NEXT: .LBB12_8: # %start ; RV32I-NEXT: li a2, -1 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: bge s0, a0, .LBB12_10 +; RV32I-NEXT: bge s2, a0, .LBB12_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: addi s6, s7, -1 ; RV32I-NEXT: .LBB12_10: # %start ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt -; RV32I-NEXT: bne a0, s0, .LBB12_12 +; RV32I-NEXT: bne a0, s2, .LBB12_12 ; RV32I-NEXT: # %bb.11: # %start -; RV32I-NEXT: mv s0, s6 +; RV32I-NEXT: mv s2, s6 ; RV32I-NEXT: .LBB12_12: # %start ; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -936,22 +936,22 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 278272 ; RV32I-NEXT: addi s3, a0, -1 ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: li s2, -1 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: li s0, -1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfdi@plt ; RV32I-NEXT: mv s5, a1 @@ -966,12 +966,12 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32I-NEXT: mv s4, a1 ; RV32I-NEXT: .LBB14_4: # %start ; RV32I-NEXT: li a2, -1 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a3, s3 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 @@ -983,10 +983,10 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32I-NEXT: .LBB14_6: # %start ; RV32I-NEXT: bgtz s3, .LBB14_8 ; RV32I-NEXT: # %bb.7: # %start -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: .LBB14_8: # %start ; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1515,15 +1515,15 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 790016 ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: lui s4, 1048568 ; RV32I-NEXT: blt s3, s0, .LBB26_2 @@ -1532,8 +1532,8 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV32I-NEXT: .LBB26_2: # %start ; RV32I-NEXT: lui a0, 265728 ; RV32I-NEXT: addi a3, a0, -64 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: bge s0, a0, .LBB26_4 @@ -1541,10 +1541,10 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV32I-NEXT: lui a0, 8 ; RV32I-NEXT: addi s4, a0, -1 ; RV32I-NEXT: .LBB26_4: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: bne a0, s0, .LBB26_6 ; RV32I-NEXT: # %bb.5: # %start @@ -1574,12 +1574,12 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV64I-NEXT: slli a1, a0, 53 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: lui s3, 1048568 -; RV64I-NEXT: bltz s2, .LBB26_2 +; RV64I-NEXT: bltz s1, .LBB26_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB26_2: # %start @@ -1588,7 +1588,7 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV64I-NEXT: slli a1, a0, 38 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: bge s1, a0, .LBB26_4 +; RV64I-NEXT: bge s2, a0, .LBB26_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 ; RV64I-NEXT: addiw s3, a0, -1 @@ -1596,11 +1596,11 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s1, .LBB26_6 +; RV64I-NEXT: bne a0, s2, .LBB26_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s2, s3 ; RV64I-NEXT: .LBB26_6: # %start -; RV64I-NEXT: slli a0, s1, 48 +; RV64I-NEXT: slli a0, s2, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1677,20 +1677,20 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 265984 ; RV32I-NEXT: addi a3, a0, -32 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt ; RV32I-NEXT: li a1, 0 @@ -1701,7 +1701,7 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind { ; RV32I-NEXT: lui a0, 16 ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgtz s2, .LBB28_4 +; RV32I-NEXT: bgtz s0, .LBB28_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: .LBB28_4: # %start @@ -1833,15 +1833,15 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 787968 ; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixdfsi@plt ; RV32I-NEXT: li s4, -128 ; RV32I-NEXT: blt s3, s0, .LBB30_2 @@ -1849,8 +1849,8 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: .LBB30_2: # %start ; RV32I-NEXT: lui a3, 263676 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call __gtdf2@plt ; RV32I-NEXT: li s3, 127 @@ -1858,10 +1858,10 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv s3, s4 ; RV32I-NEXT: .LBB30_4: # %start -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s1 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: bne a0, s0, .LBB30_6 ; RV32I-NEXT: # %bb.5: # %start @@ -1891,12 +1891,12 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV64I-NEXT: slli a1, a0, 53 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixdfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: li s3, -128 -; RV64I-NEXT: bltz s2, .LBB30_2 +; RV64I-NEXT: bltz s1, .LBB30_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB30_2: # %start @@ -1904,19 +1904,19 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV64I-NEXT: slli a1, a0, 34 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtdf2@plt -; RV64I-NEXT: li s2, 127 -; RV64I-NEXT: blt s1, a0, .LBB30_4 +; RV64I-NEXT: li s1, 127 +; RV64I-NEXT: blt s2, a0, .LBB30_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s1, s3 ; RV64I-NEXT: .LBB30_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unorddf2@plt -; RV64I-NEXT: bne a0, s1, .LBB30_6 +; RV64I-NEXT: bne a0, s2, .LBB30_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s2 +; RV64I-NEXT: mv s2, s1 ; RV64I-NEXT: .LBB30_6: # %start -; RV64I-NEXT: slli a0, s1, 56 +; RV64I-NEXT: slli a0, s2, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1997,18 +1997,18 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a3, 263934 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gtdf2@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __fixunsdfsi@plt ; RV32I-NEXT: li a1, 0 @@ -2017,7 +2017,7 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind { ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: .LBB32_2: # %start ; RV32I-NEXT: li a0, 255 -; RV32I-NEXT: bgtz s2, .LBB32_4 +; RV32I-NEXT: bgtz s0, .LBB32_4 ; RV32I-NEXT: # %bb.3: # %start ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB32_4: # %start diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll index 15cf27319e328..2af4ee69ce222 100644 --- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll @@ -250,16 +250,16 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: snez s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: and a0, a0, s4 @@ -378,16 +378,16 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: seqz s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: or a0, a0, s4 @@ -885,16 +885,16 @@ define i32 @fcmps_one(double %a, double %b) nounwind strictfp { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: snez s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: and a0, a0, s4 @@ -999,16 +999,16 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: seqz s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: or a0, a0, s4 diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll index 4f987a077b446..9b8fbc3473f41 100644 --- a/llvm/test/CodeGen/RISCV/double-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll @@ -234,16 +234,16 @@ define i32 @fcmp_one(double %a, double %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: snez s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: and a0, a0, s4 @@ -348,16 +348,16 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a3 -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: call __eqdf2@plt ; RV32I-NEXT: seqz s4, a0 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __unorddf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: or a0, a0, s4 diff --git a/llvm/test/CodeGen/RISCV/float-arith-strict.ll b/llvm/test/CodeGen/RISCV/float-arith-strict.ll index 401e0f79526f0..e58110dca92c5 100644 --- a/llvm/test/CodeGen/RISCV/float-arith-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-arith-strict.ll @@ -379,18 +379,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind strictfp { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -407,18 +407,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind strictfp { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -459,18 +459,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind strictfp { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -487,18 +487,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind strictfp { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll index 3a525cf4d8feb..ba6b2eef908a4 100644 --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -557,18 +557,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -585,18 +585,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -637,18 +637,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: xor a1, s1, a2 +; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -665,18 +665,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -883,16 +883,16 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __mulsf3@plt +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -908,16 +908,16 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __mulsf3@plt +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -952,74 +952,70 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { ; ; RV32I-LABEL: fnmadd_s_contract: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __subsf3@plt -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fnmadd_s_contract: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __subsf3@plt -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret %a_ = fadd float 0.0, %a ; avoid negation using xor %b_ = fadd float 0.0, %b ; avoid negation using xor @@ -1054,19 +1050,19 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -1082,19 +1078,19 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index e3a11088e15f3..ce97b2930e393 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -903,25 +903,25 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi@plt -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: bltz s1, .LBB14_2 +; RV32I-NEXT: bltz s2, .LBB14_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: .LBB14_2: # %start ; RV32I-NEXT: lui a0, 391168 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s4, a0, -1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: li s2, -1 ; RV32I-NEXT: li s3, -1 -; RV32I-NEXT: li s4, -1 ; RV32I-NEXT: bgtz a0, .LBB14_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s4, s5 +; RV32I-NEXT: mv s3, s5 ; RV32I-NEXT: .LBB14_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 @@ -929,17 +929,17 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32I-NEXT: li s5, 0 ; RV32I-NEXT: bltz a0, .LBB14_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s5, s2 +; RV32I-NEXT: mv s5, s1 ; RV32I-NEXT: .LBB14_6: # %start ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: bgtz a0, .LBB14_8 ; RV32I-NEXT: # %bb.7: # %start -; RV32I-NEXT: mv s3, s5 +; RV32I-NEXT: mv s2, s5 ; RV32I-NEXT: .LBB14_8: # %start -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1382,12 +1382,12 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 815104 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: lui s3, 1048568 -; RV32I-NEXT: bltz s2, .LBB24_2 +; RV32I-NEXT: bltz s1, .LBB24_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB24_2: # %start @@ -1395,7 +1395,7 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV32I-NEXT: addi a1, a0, -512 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s1, a0, .LBB24_4 +; RV32I-NEXT: bge s2, a0, .LBB24_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 ; RV32I-NEXT: addi s3, a0, -1 @@ -1403,11 +1403,11 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB24_6 +; RV32I-NEXT: bne a0, s2, .LBB24_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv s2, s3 ; RV32I-NEXT: .LBB24_6: # %start -; RV32I-NEXT: slli a0, s1, 16 +; RV32I-NEXT: slli a0, s2, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1428,12 +1428,12 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 815104 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: lui s3, 1048568 -; RV64I-NEXT: bltz s2, .LBB24_2 +; RV64I-NEXT: bltz s1, .LBB24_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB24_2: # %start @@ -1441,7 +1441,7 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV64I-NEXT: addiw a1, a0, -512 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB24_4 +; RV64I-NEXT: bge s2, a0, .LBB24_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 ; RV64I-NEXT: addiw s3, a0, -1 @@ -1449,11 +1449,11 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB24_6 +; RV64I-NEXT: bne a0, s2, .LBB24_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s2, s3 ; RV64I-NEXT: .LBB24_6: # %start -; RV64I-NEXT: slli a0, s1, 48 +; RV64I-NEXT: slli a0, s2, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1678,31 +1678,31 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind { ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 798720 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: li s3, -128 -; RV32I-NEXT: bltz s2, .LBB28_2 +; RV32I-NEXT: bltz s1, .LBB28_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB28_2: # %start ; RV32I-NEXT: lui a1, 274400 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: li s2, 127 -; RV32I-NEXT: blt s1, a0, .LBB28_4 +; RV32I-NEXT: li s1, 127 +; RV32I-NEXT: blt s2, a0, .LBB28_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv s1, s3 ; RV32I-NEXT: .LBB28_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB28_6 +; RV32I-NEXT: bne a0, s2, .LBB28_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s2 +; RV32I-NEXT: mv s2, s1 ; RV32I-NEXT: .LBB28_6: # %start -; RV32I-NEXT: slli a0, s1, 24 +; RV32I-NEXT: slli a0, s2, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1723,31 +1723,31 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind { ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 798720 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: li s3, -128 -; RV64I-NEXT: bltz s2, .LBB28_2 +; RV64I-NEXT: bltz s1, .LBB28_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB28_2: # %start ; RV64I-NEXT: lui a1, 274400 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: li s2, 127 -; RV64I-NEXT: blt s1, a0, .LBB28_4 +; RV64I-NEXT: li s1, 127 +; RV64I-NEXT: blt s2, a0, .LBB28_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s1, s3 ; RV64I-NEXT: .LBB28_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB28_6 +; RV64I-NEXT: bne a0, s2, .LBB28_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s2 +; RV64I-NEXT: mv s2, s1 ; RV64I-NEXT: .LBB28_6: # %start -; RV64I-NEXT: slli a0, s1, 56 +; RV64I-NEXT: slli a0, s2, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll index cea4e40644523..947aa7c19a2da 100644 --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -14,25 +14,25 @@ define i32 @test_load_and_cmp() nounwind { ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: lw a6, %lo(x)(a0) -; RV32I-NEXT: lw a7, %lo(x+4)(a0) +; RV32I-NEXT: lw a2, %lo(x)(a0) +; RV32I-NEXT: lw a1, %lo(x+4)(a0) ; RV32I-NEXT: lw a3, %lo(x+8)(a0) ; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: lui a4, %hi(y) ; RV32I-NEXT: lw a5, %lo(y)(a4) -; RV32I-NEXT: lw a2, %lo(y+4)(a4) -; RV32I-NEXT: lw a1, %lo(y+8)(a4) +; RV32I-NEXT: lw a6, %lo(y+4)(a4) +; RV32I-NEXT: lw a7, %lo(y+8)(a4) ; RV32I-NEXT: lw a4, %lo(y+12)(a4) ; RV32I-NEXT: sw a4, 20(sp) -; RV32I-NEXT: sw a1, 16(sp) -; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: sw a7, 16(sp) +; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: sw a5, 8(sp) ; RV32I-NEXT: sw a0, 36(sp) ; RV32I-NEXT: sw a3, 32(sp) -; RV32I-NEXT: sw a7, 28(sp) +; RV32I-NEXT: sw a1, 28(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: sw a6, 24(sp) +; RV32I-NEXT: sw a2, 24(sp) ; RV32I-NEXT: call __netf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -51,26 +51,26 @@ define i32 @test_add_and_fptosi() nounwind { ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: lw a6, %lo(x)(a0) -; RV32I-NEXT: lw a7, %lo(x+4)(a0) +; RV32I-NEXT: lw a3, %lo(x)(a0) +; RV32I-NEXT: lw a1, %lo(x+4)(a0) ; RV32I-NEXT: lw a2, %lo(x+8)(a0) ; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: lui a4, %hi(y) ; RV32I-NEXT: lw a5, %lo(y)(a4) -; RV32I-NEXT: lw a3, %lo(y+4)(a4) -; RV32I-NEXT: lw a1, %lo(y+8)(a4) +; RV32I-NEXT: lw a6, %lo(y+4)(a4) +; RV32I-NEXT: lw a7, %lo(y+8)(a4) ; RV32I-NEXT: lw a4, %lo(y+12)(a4) ; RV32I-NEXT: sw a4, 36(sp) -; RV32I-NEXT: sw a1, 32(sp) -; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw a6, 28(sp) ; RV32I-NEXT: sw a5, 24(sp) ; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: sw a2, 48(sp) -; RV32I-NEXT: sw a7, 44(sp) +; RV32I-NEXT: sw a1, 44(sp) ; RV32I-NEXT: addi a0, sp, 56 ; RV32I-NEXT: addi a1, sp, 40 ; RV32I-NEXT: addi a2, sp, 24 -; RV32I-NEXT: sw a6, 40(sp) +; RV32I-NEXT: sw a3, 40(sp) ; RV32I-NEXT: call __addtf3@plt ; RV32I-NEXT: lw a1, 56(sp) ; RV32I-NEXT: lw a0, 60(sp) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 46e78e375edff..69a646db09aa3 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1159,43 +1159,43 @@ define i64 @stest_f64i64(double %x) { ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixdfti@plt ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw t0, 16(sp) +; RV32-NEXT: lw a3, 16(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a5, a7, -1 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a5, a4, -1 ; RV32-NEXT: beq a1, a5, .LBB18_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a4, a1, a5 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: bnez a3, .LBB18_3 +; RV32-NEXT: sltu a7, a1, a5 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: bnez a6, .LBB18_3 ; RV32-NEXT: j .LBB18_4 ; RV32-NEXT: .LBB18_2: -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: beqz a3, .LBB18_4 +; RV32-NEXT: addi a6, a0, 1 +; RV32-NEXT: snez a7, a6 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: beqz a6, .LBB18_4 ; RV32-NEXT: .LBB18_3: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a7, a2, 0 ; RV32-NEXT: .LBB18_4: # %entry ; RV32-NEXT: li a6, -1 -; RV32-NEXT: beqz a4, .LBB18_7 +; RV32-NEXT: beqz a7, .LBB18_7 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: beq a1, a7, .LBB18_8 +; RV32-NEXT: beq a1, a4, .LBB18_8 ; RV32-NEXT: .LBB18_6: # %entry -; RV32-NEXT: sltu a4, a7, a1 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: bne a3, a6, .LBB18_9 ; RV32-NEXT: j .LBB18_10 ; RV32-NEXT: .LBB18_7: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: li t0, 0 +; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bne a1, a7, .LBB18_6 +; RV32-NEXT: bne a1, a4, .LBB18_6 ; RV32-NEXT: .LBB18_8: ; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: beq a3, a6, .LBB18_10 ; RV32-NEXT: .LBB18_9: # %entry ; RV32-NEXT: slt a4, a6, a2 @@ -1441,43 +1441,43 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw t0, 16(sp) +; RV32-NEXT: lw a3, 16(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a5, a7, -1 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a5, a4, -1 ; RV32-NEXT: beq a1, a5, .LBB21_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a4, a1, a5 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: bnez a3, .LBB21_3 +; RV32-NEXT: sltu a7, a1, a5 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: bnez a6, .LBB21_3 ; RV32-NEXT: j .LBB21_4 ; RV32-NEXT: .LBB21_2: -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: beqz a3, .LBB21_4 +; RV32-NEXT: addi a6, a0, 1 +; RV32-NEXT: snez a7, a6 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: beqz a6, .LBB21_4 ; RV32-NEXT: .LBB21_3: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a7, a2, 0 ; RV32-NEXT: .LBB21_4: # %entry ; RV32-NEXT: li a6, -1 -; RV32-NEXT: beqz a4, .LBB21_7 +; RV32-NEXT: beqz a7, .LBB21_7 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: beq a1, a7, .LBB21_8 +; RV32-NEXT: beq a1, a4, .LBB21_8 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: sltu a4, a7, a1 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: bne a3, a6, .LBB21_9 ; RV32-NEXT: j .LBB21_10 ; RV32-NEXT: .LBB21_7: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: li t0, 0 +; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bne a1, a7, .LBB21_6 +; RV32-NEXT: bne a1, a4, .LBB21_6 ; RV32-NEXT: .LBB21_8: ; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: beq a3, a6, .LBB21_10 ; RV32-NEXT: .LBB21_9: # %entry ; RV32-NEXT: slt a4, a6, a2 @@ -1685,43 +1685,43 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw t0, 16(sp) +; RV32-NEXT: lw a3, 16(sp) ; RV32-NEXT: lw a1, 12(sp) ; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a5, a7, -1 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a5, a4, -1 ; RV32-NEXT: beq a1, a5, .LBB24_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a4, a1, a5 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: bnez a3, .LBB24_3 +; RV32-NEXT: sltu a7, a1, a5 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: bnez a6, .LBB24_3 ; RV32-NEXT: j .LBB24_4 ; RV32-NEXT: .LBB24_2: -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: snez a4, a4 -; RV32-NEXT: or a3, t0, a2 -; RV32-NEXT: beqz a3, .LBB24_4 +; RV32-NEXT: addi a6, a0, 1 +; RV32-NEXT: snez a7, a6 +; RV32-NEXT: or a6, a3, a2 +; RV32-NEXT: beqz a6, .LBB24_4 ; RV32-NEXT: .LBB24_3: # %entry -; RV32-NEXT: slti a4, a2, 0 +; RV32-NEXT: slti a7, a2, 0 ; RV32-NEXT: .LBB24_4: # %entry ; RV32-NEXT: li a6, -1 -; RV32-NEXT: beqz a4, .LBB24_7 +; RV32-NEXT: beqz a7, .LBB24_7 ; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: beq a1, a7, .LBB24_8 +; RV32-NEXT: beq a1, a4, .LBB24_8 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: sltu a4, a7, a1 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: sltu a4, a4, a1 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: bne a3, a6, .LBB24_9 ; RV32-NEXT: j .LBB24_10 ; RV32-NEXT: .LBB24_7: # %entry ; RV32-NEXT: li a2, 0 -; RV32-NEXT: li t0, 0 +; RV32-NEXT: li a3, 0 ; RV32-NEXT: li a0, -1 ; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bne a1, a7, .LBB24_6 +; RV32-NEXT: bne a1, a4, .LBB24_6 ; RV32-NEXT: .LBB24_8: ; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, t0, a2 +; RV32-NEXT: and a3, a3, a2 ; RV32-NEXT: beq a3, a6, .LBB24_10 ; RV32-NEXT: .LBB24_9: # %entry ; RV32-NEXT: slt a4, a6, a2 @@ -3090,109 +3090,109 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32-NEXT: lw a5, 8(sp) ; RV32-NEXT: lw a3, 20(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: li a6, -1 -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a2, -1 +; RV32-NEXT: mv a7, a5 ; RV32-NEXT: bltz a3, .LBB45_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a4, -1 +; RV32-NEXT: li a7, -1 ; RV32-NEXT: .LBB45_2: # %entry -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a2, a7, -1 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgeu a1, a2, .LBB45_19 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a6, a4, -1 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgeu a1, a6, .LBB45_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: bne a1, a2, .LBB45_20 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: bne a1, a6, .LBB45_20 ; RV32-NEXT: .LBB45_4: # %entry -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: bnez a0, .LBB45_21 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: bnez t0, .LBB45_21 ; RV32-NEXT: .LBB45_5: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bgez a3, .LBB45_22 ; RV32-NEXT: .LBB45_6: # %entry -; RV32-NEXT: bgeu a1, a2, .LBB45_23 +; RV32-NEXT: bgeu a1, a6, .LBB45_23 ; RV32-NEXT: .LBB45_7: # %entry -; RV32-NEXT: bnez a0, .LBB45_24 +; RV32-NEXT: bnez t0, .LBB45_24 ; RV32-NEXT: .LBB45_8: # %entry -; RV32-NEXT: li a2, 0 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: bnez a3, .LBB45_25 ; RV32-NEXT: .LBB45_9: # %entry ; RV32-NEXT: bgez a3, .LBB45_26 ; RV32-NEXT: .LBB45_10: # %entry -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bgeu a7, a1, .LBB45_27 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgeu a4, a1, .LBB45_27 ; RV32-NEXT: .LBB45_11: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a7, .LBB45_28 +; RV32-NEXT: bne a1, a4, .LBB45_28 ; RV32-NEXT: .LBB45_12: # %entry ; RV32-NEXT: bltz a3, .LBB45_29 ; RV32-NEXT: .LBB45_13: # %entry -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: bne a2, a6, .LBB45_30 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: bne a6, a2, .LBB45_30 ; RV32-NEXT: .LBB45_14: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bltz a3, .LBB45_31 ; RV32-NEXT: .LBB45_15: # %entry -; RV32-NEXT: bgeu a7, a1, .LBB45_32 +; RV32-NEXT: bgeu a4, a1, .LBB45_32 ; RV32-NEXT: .LBB45_16: # %entry -; RV32-NEXT: beq a2, a6, .LBB45_18 +; RV32-NEXT: beq a6, a2, .LBB45_18 ; RV32-NEXT: .LBB45_17: # %entry -; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB45_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB45_19: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: beq a1, a2, .LBB45_4 +; RV32-NEXT: li t0, -1 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beq a1, a6, .LBB45_4 ; RV32-NEXT: .LBB45_20: # %entry -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: beqz a0, .LBB45_5 +; RV32-NEXT: mv a5, t0 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: beqz t0, .LBB45_5 ; RV32-NEXT: .LBB45_21: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bltz a3, .LBB45_6 ; RV32-NEXT: .LBB45_22: # %entry -; RV32-NEXT: mv a4, a2 -; RV32-NEXT: bltu a1, a2, .LBB45_7 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: bltu a1, a6, .LBB45_7 ; RV32-NEXT: .LBB45_23: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: beqz a0, .LBB45_8 +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: beqz t0, .LBB45_8 ; RV32-NEXT: .LBB45_24: # %entry -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: li a2, 0 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: beqz a3, .LBB45_9 ; RV32-NEXT: .LBB45_25: # %entry -; RV32-NEXT: srai a0, a3, 31 -; RV32-NEXT: and a2, a0, t0 +; RV32-NEXT: srai a6, a3, 31 +; RV32-NEXT: and a6, a6, a0 ; RV32-NEXT: bltz a3, .LBB45_10 ; RV32-NEXT: .LBB45_26: # %entry ; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bltu a7, a1, .LBB45_11 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltu a4, a1, .LBB45_11 ; RV32-NEXT: .LBB45_27: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a7, 0 ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a7, .LBB45_12 +; RV32-NEXT: beq a1, a4, .LBB45_12 ; RV32-NEXT: .LBB45_28: # %entry -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a0, a7 ; RV32-NEXT: bgez a3, .LBB45_13 ; RV32-NEXT: .LBB45_29: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: beq a2, a6, .LBB45_14 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: beq a6, a2, .LBB45_14 ; RV32-NEXT: .LBB45_30: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bgez a3, .LBB45_15 ; RV32-NEXT: .LBB45_31: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: bltu a7, a1, .LBB45_16 +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: bltu a4, a1, .LBB45_16 ; RV32-NEXT: .LBB45_32: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a2, a6, .LBB45_17 +; RV32-NEXT: bne a6, a2, .LBB45_17 ; RV32-NEXT: j .LBB45_18 ; ; RV64IF-LABEL: stest_f64i64_mm: @@ -3514,109 +3514,109 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: lw a5, 8(sp) ; RV32-NEXT: lw a3, 20(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: li a6, -1 -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a2, -1 +; RV32-NEXT: mv a7, a5 ; RV32-NEXT: bltz a3, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a4, -1 +; RV32-NEXT: li a7, -1 ; RV32-NEXT: .LBB48_2: # %entry -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a2, a7, -1 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgeu a1, a2, .LBB48_19 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a6, a4, -1 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgeu a1, a6, .LBB48_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: bne a1, a2, .LBB48_20 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: bne a1, a6, .LBB48_20 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: bnez a0, .LBB48_21 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: bnez t0, .LBB48_21 ; RV32-NEXT: .LBB48_5: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bgez a3, .LBB48_22 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: bgeu a1, a2, .LBB48_23 +; RV32-NEXT: bgeu a1, a6, .LBB48_23 ; RV32-NEXT: .LBB48_7: # %entry -; RV32-NEXT: bnez a0, .LBB48_24 +; RV32-NEXT: bnez t0, .LBB48_24 ; RV32-NEXT: .LBB48_8: # %entry -; RV32-NEXT: li a2, 0 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: bnez a3, .LBB48_25 ; RV32-NEXT: .LBB48_9: # %entry ; RV32-NEXT: bgez a3, .LBB48_26 ; RV32-NEXT: .LBB48_10: # %entry -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bgeu a7, a1, .LBB48_27 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgeu a4, a1, .LBB48_27 ; RV32-NEXT: .LBB48_11: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a7, .LBB48_28 +; RV32-NEXT: bne a1, a4, .LBB48_28 ; RV32-NEXT: .LBB48_12: # %entry ; RV32-NEXT: bltz a3, .LBB48_29 ; RV32-NEXT: .LBB48_13: # %entry -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: bne a2, a6, .LBB48_30 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: bne a6, a2, .LBB48_30 ; RV32-NEXT: .LBB48_14: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bltz a3, .LBB48_31 ; RV32-NEXT: .LBB48_15: # %entry -; RV32-NEXT: bgeu a7, a1, .LBB48_32 +; RV32-NEXT: bgeu a4, a1, .LBB48_32 ; RV32-NEXT: .LBB48_16: # %entry -; RV32-NEXT: beq a2, a6, .LBB48_18 +; RV32-NEXT: beq a6, a2, .LBB48_18 ; RV32-NEXT: .LBB48_17: # %entry -; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB48_19: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: beq a1, a2, .LBB48_4 +; RV32-NEXT: li t0, -1 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beq a1, a6, .LBB48_4 ; RV32-NEXT: .LBB48_20: # %entry -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: beqz a0, .LBB48_5 +; RV32-NEXT: mv a5, t0 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: beqz t0, .LBB48_5 ; RV32-NEXT: .LBB48_21: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bltz a3, .LBB48_6 ; RV32-NEXT: .LBB48_22: # %entry -; RV32-NEXT: mv a4, a2 -; RV32-NEXT: bltu a1, a2, .LBB48_7 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: bltu a1, a6, .LBB48_7 ; RV32-NEXT: .LBB48_23: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: beqz a0, .LBB48_8 +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: beqz t0, .LBB48_8 ; RV32-NEXT: .LBB48_24: # %entry -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: li a2, 0 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: beqz a3, .LBB48_9 ; RV32-NEXT: .LBB48_25: # %entry -; RV32-NEXT: srai a0, a3, 31 -; RV32-NEXT: and a2, a0, t0 +; RV32-NEXT: srai a6, a3, 31 +; RV32-NEXT: and a6, a6, a0 ; RV32-NEXT: bltz a3, .LBB48_10 ; RV32-NEXT: .LBB48_26: # %entry ; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bltu a7, a1, .LBB48_11 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltu a4, a1, .LBB48_11 ; RV32-NEXT: .LBB48_27: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a7, 0 ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a7, .LBB48_12 +; RV32-NEXT: beq a1, a4, .LBB48_12 ; RV32-NEXT: .LBB48_28: # %entry -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a0, a7 ; RV32-NEXT: bgez a3, .LBB48_13 ; RV32-NEXT: .LBB48_29: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: beq a2, a6, .LBB48_14 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: beq a6, a2, .LBB48_14 ; RV32-NEXT: .LBB48_30: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bgez a3, .LBB48_15 ; RV32-NEXT: .LBB48_31: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: bltu a7, a1, .LBB48_16 +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: bltu a4, a1, .LBB48_16 ; RV32-NEXT: .LBB48_32: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a2, a6, .LBB48_17 +; RV32-NEXT: bne a6, a2, .LBB48_17 ; RV32-NEXT: j .LBB48_18 ; ; RV64-LABEL: stest_f32i64_mm: @@ -3886,109 +3886,109 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: lw a5, 8(sp) ; RV32-NEXT: lw a3, 20(sp) ; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: li a6, -1 -; RV32-NEXT: mv a4, a5 +; RV32-NEXT: li a2, -1 +; RV32-NEXT: mv a7, a5 ; RV32-NEXT: bltz a3, .LBB51_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a4, -1 +; RV32-NEXT: li a7, -1 ; RV32-NEXT: .LBB51_2: # %entry -; RV32-NEXT: lui a7, 524288 -; RV32-NEXT: addi a2, a7, -1 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bgeu a1, a2, .LBB51_19 +; RV32-NEXT: lui a4, 524288 +; RV32-NEXT: addi a6, a4, -1 +; RV32-NEXT: mv t0, a5 +; RV32-NEXT: bgeu a1, a6, .LBB51_19 ; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: bne a1, a2, .LBB51_20 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: bne a1, a6, .LBB51_20 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: bnez a0, .LBB51_21 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: bnez t0, .LBB51_21 ; RV32-NEXT: .LBB51_5: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bgez a3, .LBB51_22 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: bgeu a1, a2, .LBB51_23 +; RV32-NEXT: bgeu a1, a6, .LBB51_23 ; RV32-NEXT: .LBB51_7: # %entry -; RV32-NEXT: bnez a0, .LBB51_24 +; RV32-NEXT: bnez t0, .LBB51_24 ; RV32-NEXT: .LBB51_8: # %entry -; RV32-NEXT: li a2, 0 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: bnez a3, .LBB51_25 ; RV32-NEXT: .LBB51_9: # %entry ; RV32-NEXT: bgez a3, .LBB51_26 ; RV32-NEXT: .LBB51_10: # %entry -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bgeu a7, a1, .LBB51_27 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bgeu a4, a1, .LBB51_27 ; RV32-NEXT: .LBB51_11: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a7, .LBB51_28 +; RV32-NEXT: bne a1, a4, .LBB51_28 ; RV32-NEXT: .LBB51_12: # %entry ; RV32-NEXT: bltz a3, .LBB51_29 ; RV32-NEXT: .LBB51_13: # %entry -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: bne a2, a6, .LBB51_30 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: bne a6, a2, .LBB51_30 ; RV32-NEXT: .LBB51_14: # %entry -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bltz a3, .LBB51_31 ; RV32-NEXT: .LBB51_15: # %entry -; RV32-NEXT: bgeu a7, a1, .LBB51_32 +; RV32-NEXT: bgeu a4, a1, .LBB51_32 ; RV32-NEXT: .LBB51_16: # %entry -; RV32-NEXT: beq a2, a6, .LBB51_18 +; RV32-NEXT: beq a6, a2, .LBB51_18 ; RV32-NEXT: .LBB51_17: # %entry -; RV32-NEXT: mv a1, a4 +; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_18: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; RV32-NEXT: .LBB51_19: # %entry -; RV32-NEXT: li a0, -1 -; RV32-NEXT: lw t0, 16(sp) -; RV32-NEXT: beq a1, a2, .LBB51_4 +; RV32-NEXT: li t0, -1 +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: beq a1, a6, .LBB51_4 ; RV32-NEXT: .LBB51_20: # %entry -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: or a0, t0, a3 -; RV32-NEXT: beqz a0, .LBB51_5 +; RV32-NEXT: mv a5, t0 +; RV32-NEXT: or t0, a0, a3 +; RV32-NEXT: beqz t0, .LBB51_5 ; RV32-NEXT: .LBB51_21: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a7 +; RV32-NEXT: mv a7, a1 ; RV32-NEXT: bltz a3, .LBB51_6 ; RV32-NEXT: .LBB51_22: # %entry -; RV32-NEXT: mv a4, a2 -; RV32-NEXT: bltu a1, a2, .LBB51_7 +; RV32-NEXT: mv a7, a6 +; RV32-NEXT: bltu a1, a6, .LBB51_7 ; RV32-NEXT: .LBB51_23: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: beqz a0, .LBB51_8 +; RV32-NEXT: mv a1, a6 +; RV32-NEXT: beqz t0, .LBB51_8 ; RV32-NEXT: .LBB51_24: # %entry -; RV32-NEXT: mv a1, a4 -; RV32-NEXT: li a2, 0 +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: li a6, 0 ; RV32-NEXT: beqz a3, .LBB51_9 ; RV32-NEXT: .LBB51_25: # %entry -; RV32-NEXT: srai a0, a3, 31 -; RV32-NEXT: and a2, a0, t0 +; RV32-NEXT: srai a6, a3, 31 +; RV32-NEXT: and a6, a6, a0 ; RV32-NEXT: bltz a3, .LBB51_10 ; RV32-NEXT: .LBB51_26: # %entry ; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: bltu a7, a1, .LBB51_11 +; RV32-NEXT: mv a7, a5 +; RV32-NEXT: bltu a4, a1, .LBB51_11 ; RV32-NEXT: .LBB51_27: # %entry -; RV32-NEXT: li a4, 0 +; RV32-NEXT: li a7, 0 ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a7, .LBB51_12 +; RV32-NEXT: beq a1, a4, .LBB51_12 ; RV32-NEXT: .LBB51_28: # %entry -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a0, a7 ; RV32-NEXT: bgez a3, .LBB51_13 ; RV32-NEXT: .LBB51_29: # %entry ; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a2, a2, a3 -; RV32-NEXT: beq a2, a6, .LBB51_14 +; RV32-NEXT: and a6, a6, a3 +; RV32-NEXT: beq a6, a2, .LBB51_14 ; RV32-NEXT: .LBB51_30: # %entry ; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a1 ; RV32-NEXT: bgez a3, .LBB51_15 ; RV32-NEXT: .LBB51_31: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: bltu a7, a1, .LBB51_16 +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: bltu a4, a1, .LBB51_16 ; RV32-NEXT: .LBB51_32: # %entry ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a2, a6, .LBB51_17 +; RV32-NEXT: bne a6, a2, .LBB51_17 ; RV32-NEXT: j .LBB51_18 ; ; RV64-LABEL: stest_f16i64_mm: diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index 4cba269baaa51..6a144032e866a 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -118,56 +118,56 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NEXT: fmv.w.x ft2, a3 ; CHECK-NEXT: fmv.w.x ft0, a2 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: lui a6, 524288 -; CHECK-NEXT: addiw a5, a6, -1 +; CHECK-NEXT: lui a4, 524288 +; CHECK-NEXT: addiw a6, a4, -1 ; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB3_2 +; CHECK-NEXT: blt a2, a6, .LBB3_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: mv a2, a6 ; CHECK-NEXT: .LBB3_2: # %entry ; CHECK-NEXT: fmv.w.x ft1, a1 ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB3_11 +; CHECK-NEXT: bge a3, a6, .LBB3_11 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB3_12 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: bge a1, a6, .LBB3_12 ; CHECK-NEXT: .LBB3_4: # %entry -; CHECK-NEXT: bge a4, a5, .LBB3_13 +; CHECK-NEXT: bge a5, a6, .LBB3_13 ; CHECK-NEXT: .LBB3_5: # %entry -; CHECK-NEXT: bge a6, a4, .LBB3_14 +; CHECK-NEXT: bge a4, a5, .LBB3_14 ; CHECK-NEXT: .LBB3_6: # %entry -; CHECK-NEXT: bge a6, a1, .LBB3_15 +; CHECK-NEXT: bge a4, a1, .LBB3_15 ; CHECK-NEXT: .LBB3_7: # %entry -; CHECK-NEXT: bge a6, a3, .LBB3_16 +; CHECK-NEXT: bge a4, a3, .LBB3_16 ; CHECK-NEXT: .LBB3_8: # %entry -; CHECK-NEXT: blt a6, a2, .LBB3_10 +; CHECK-NEXT: blt a4, a2, .LBB3_10 ; CHECK-NEXT: .LBB3_9: # %entry ; CHECK-NEXT: lui a2, 524288 ; CHECK-NEXT: .LBB3_10: # %entry ; CHECK-NEXT: sw a2, 12(a0) ; CHECK-NEXT: sw a3, 8(a0) ; CHECK-NEXT: sw a1, 4(a0) -; CHECK-NEXT: sw a4, 0(a0) +; CHECK-NEXT: sw a5, 0(a0) ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB3_4 +; CHECK-NEXT: mv a3, a6 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: blt a1, a6, .LBB3_4 ; CHECK-NEXT: .LBB3_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB3_5 +; CHECK-NEXT: mv a1, a6 +; CHECK-NEXT: blt a5, a6, .LBB3_5 ; CHECK-NEXT: .LBB3_13: # %entry -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: blt a6, a4, .LBB3_6 +; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: blt a4, a5, .LBB3_6 ; CHECK-NEXT: .LBB3_14: # %entry -; CHECK-NEXT: lui a4, 524288 -; CHECK-NEXT: blt a6, a1, .LBB3_7 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: blt a4, a1, .LBB3_7 ; CHECK-NEXT: .LBB3_15: # %entry ; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: blt a6, a3, .LBB3_8 +; CHECK-NEXT: blt a4, a3, .LBB3_8 ; CHECK-NEXT: .LBB3_16: # %entry ; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: bge a6, a2, .LBB3_9 +; CHECK-NEXT: bge a4, a2, .LBB3_9 ; CHECK-NEXT: j .LBB3_10 entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -311,23 +311,23 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -338,10 +338,10 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a4 ; CHECK-NEXT: .LBB6_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: bge s0, a4, .LBB6_11 +; CHECK-NEXT: bge s3, a4, .LBB6_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: bge a2, a4, .LBB6_12 @@ -352,16 +352,16 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEXT: .LBB6_6: # %entry ; CHECK-NEXT: bge a1, a2, .LBB6_15 ; CHECK-NEXT: .LBB6_7: # %entry -; CHECK-NEXT: bge a1, s0, .LBB6_16 +; CHECK-NEXT: bge a1, s3, .LBB6_16 ; CHECK-NEXT: .LBB6_8: # %entry ; CHECK-NEXT: blt a1, a0, .LBB6_10 ; CHECK-NEXT: .LBB6_9: # %entry ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: .LBB6_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a2, 4(s3) -; CHECK-NEXT: sw a3, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a2, 4(s0) +; CHECK-NEXT: sw a3, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -371,7 +371,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB6_11: # %entry -; CHECK-NEXT: mv s0, a4 +; CHECK-NEXT: mv s3, a4 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: blt a2, a4, .LBB6_4 ; CHECK-NEXT: .LBB6_12: # %entry @@ -385,9 +385,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEXT: blt a1, a2, .LBB6_7 ; CHECK-NEXT: .LBB6_15: # %entry ; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: blt a1, s0, .LBB6_8 +; CHECK-NEXT: blt a1, s3, .LBB6_8 ; CHECK-NEXT: .LBB6_16: # %entry -; CHECK-NEXT: lui s0, 524288 +; CHECK-NEXT: lui s3, 524288 ; CHECK-NEXT: bge a1, a0, .LBB6_9 ; CHECK-NEXT: j .LBB6_10 entry: @@ -418,23 +418,23 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s3, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: fcvt.lu.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -445,10 +445,10 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz -; CHECK-NEXT: bgeu s1, a1, .LBB7_7 +; CHECK-NEXT: bgeu s3, a1, .LBB7_7 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bgeu a2, a1, .LBB7_8 @@ -459,7 +459,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEXT: .LBB7_6: # %entry ; CHECK-NEXT: sw a3, 12(s0) ; CHECK-NEXT: sw a2, 8(s0) -; CHECK-NEXT: sw s1, 4(s0) +; CHECK-NEXT: sw s3, 4(s0) ; CHECK-NEXT: sw a0, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -470,7 +470,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB7_7: # %entry -; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv s3, a1 ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bltu a2, a1, .LBB7_4 ; CHECK-NEXT: .LBB7_8: # %entry @@ -503,23 +503,23 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -530,10 +530,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: .LBB8_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft1, rtz -; CHECK-NEXT: bge s0, a3, .LBB8_11 +; CHECK-NEXT: bge s3, a3, .LBB8_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: bge a1, a3, .LBB8_12 @@ -544,16 +544,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: .LBB8_6: # %entry ; CHECK-NEXT: blez a1, .LBB8_15 ; CHECK-NEXT: .LBB8_7: # %entry -; CHECK-NEXT: blez s0, .LBB8_16 +; CHECK-NEXT: blez s3, .LBB8_16 ; CHECK-NEXT: .LBB8_8: # %entry ; CHECK-NEXT: bgtz a0, .LBB8_10 ; CHECK-NEXT: .LBB8_9: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB8_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a1, 4(s3) -; CHECK-NEXT: sw a2, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a1, 4(s0) +; CHECK-NEXT: sw a2, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -563,7 +563,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_11: # %entry -; CHECK-NEXT: mv s0, a3 +; CHECK-NEXT: mv s3, a3 ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: blt a1, a3, .LBB8_4 ; CHECK-NEXT: .LBB8_12: # %entry @@ -577,9 +577,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEXT: bgtz a1, .LBB8_7 ; CHECK-NEXT: .LBB8_15: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s0, .LBB8_8 +; CHECK-NEXT: bgtz s3, .LBB8_8 ; CHECK-NEXT: .LBB8_16: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li s3, 0 ; CHECK-NEXT: blez a0, .LBB8_9 ; CHECK-NEXT: j .LBB8_10 entry: @@ -903,7 +903,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -914,23 +913,19 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -945,81 +940,84 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB15_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB15_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB15_4 +; CHECK-NEXT: blt s7, a7, .LBB15_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB15_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB15_6 +; CHECK-NEXT: blt a1, a7, .LBB15_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB15_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB15_8 +; CHECK-NEXT: blt a2, a7, .LBB15_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB15_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB15_10 +; CHECK-NEXT: blt a3, a7, .LBB15_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB15_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB15_23 +; CHECK-NEXT: bge a4, a7, .LBB15_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB15_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB15_24 ; CHECK-NEXT: .LBB15_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB15_25 +; CHECK-NEXT: bge a6, a7, .LBB15_25 ; CHECK-NEXT: .LBB15_13: # %entry -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: bge s1, s0, .LBB15_26 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: bge a7, a6, .LBB15_26 ; CHECK-NEXT: .LBB15_14: # %entry -; CHECK-NEXT: bge s1, a5, .LBB15_27 +; CHECK-NEXT: bge a7, a5, .LBB15_27 ; CHECK-NEXT: .LBB15_15: # %entry -; CHECK-NEXT: bge s1, a4, .LBB15_28 +; CHECK-NEXT: bge a7, a4, .LBB15_28 ; CHECK-NEXT: .LBB15_16: # %entry -; CHECK-NEXT: bge s1, a3, .LBB15_29 +; CHECK-NEXT: bge a7, a3, .LBB15_29 ; CHECK-NEXT: .LBB15_17: # %entry -; CHECK-NEXT: bge s1, a2, .LBB15_30 +; CHECK-NEXT: bge a7, a2, .LBB15_30 ; CHECK-NEXT: .LBB15_18: # %entry -; CHECK-NEXT: bge s1, a1, .LBB15_31 +; CHECK-NEXT: bge a7, a1, .LBB15_31 ; CHECK-NEXT: .LBB15_19: # %entry -; CHECK-NEXT: bge s1, s9, .LBB15_32 +; CHECK-NEXT: bge a7, s7, .LBB15_32 ; CHECK-NEXT: .LBB15_20: # %entry -; CHECK-NEXT: blt s1, a0, .LBB15_22 +; CHECK-NEXT: blt a7, a0, .LBB15_22 ; CHECK-NEXT: .LBB15_21: # %entry ; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: .LBB15_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -1030,41 +1028,40 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB15_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB15_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB15_12 ; CHECK-NEXT: .LBB15_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB15_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB15_13 ; CHECK-NEXT: .LBB15_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: blt s1, s0, .LBB15_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: blt a7, a6, .LBB15_14 ; CHECK-NEXT: .LBB15_26: # %entry -; CHECK-NEXT: lui s0, 1048568 -; CHECK-NEXT: blt s1, a5, .LBB15_15 +; CHECK-NEXT: lui a6, 1048568 +; CHECK-NEXT: blt a7, a5, .LBB15_15 ; CHECK-NEXT: .LBB15_27: # %entry ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt s1, a4, .LBB15_16 +; CHECK-NEXT: blt a7, a4, .LBB15_16 ; CHECK-NEXT: .LBB15_28: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt s1, a3, .LBB15_17 +; CHECK-NEXT: blt a7, a3, .LBB15_17 ; CHECK-NEXT: .LBB15_29: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: blt s1, a2, .LBB15_18 +; CHECK-NEXT: blt a7, a2, .LBB15_18 ; CHECK-NEXT: .LBB15_30: # %entry ; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: blt s1, a1, .LBB15_19 +; CHECK-NEXT: blt a7, a1, .LBB15_19 ; CHECK-NEXT: .LBB15_31: # %entry ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt s1, s9, .LBB15_20 +; CHECK-NEXT: blt a7, s7, .LBB15_20 ; CHECK-NEXT: .LBB15_32: # %entry -; CHECK-NEXT: lui s9, 1048568 -; CHECK-NEXT: bge s1, a0, .LBB15_21 +; CHECK-NEXT: lui s7, 1048568 +; CHECK-NEXT: bge a7, a0, .LBB15_21 ; CHECK-NEXT: j .LBB15_22 entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -1102,20 +1099,17 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 ; CHECK-NEXT: lhu s6, 0(a1) -; CHECK-NEXT: lhu s2, 56(a1) -; CHECK-NEXT: lhu s3, 48(a1) -; CHECK-NEXT: lhu s4, 40(a1) -; CHECK-NEXT: lhu s5, 32(a1) -; CHECK-NEXT: lhu s7, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 48(a1) +; CHECK-NEXT: lhu s3, 40(a1) +; CHECK-NEXT: lhu s4, 32(a1) +; CHECK-NEXT: lhu s5, 24(a1) +; CHECK-NEXT: lhu s7, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s8, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 @@ -1131,64 +1125,67 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.lu.s s8, ft0, rtz +; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: bltu a6, a1, .LBB16_2 +; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addiw a1, a1, -1 +; CHECK-NEXT: bltu a0, a1, .LBB16_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB16_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 +; CHECK-NEXT: fmv.w.x ft1, s5 ; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.lu.s a2, ft0, rtz -; CHECK-NEXT: bltu s8, a1, .LBB16_4 +; CHECK-NEXT: bltu s7, a1, .LBB16_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s8, a1 +; CHECK-NEXT: mv s7, a1 ; CHECK-NEXT: .LBB16_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz ; CHECK-NEXT: bltu a2, a1, .LBB16_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB16_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz ; CHECK-NEXT: bltu a3, a1, .LBB16_8 ; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB16_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz ; CHECK-NEXT: bltu a4, a1, .LBB16_10 ; CHECK-NEXT: # %bb.9: # %entry ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB16_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: fmv.w.x ft1, s1 +; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz ; CHECK-NEXT: bgeu a5, a1, .LBB16_15 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: bgeu s1, a1, .LBB16_16 +; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: bgeu a6, a1, .LBB16_16 ; CHECK-NEXT: .LBB16_12: # %entry -; CHECK-NEXT: bltu a0, a1, .LBB16_14 +; CHECK-NEXT: bltu a7, a1, .LBB16_14 ; CHECK-NEXT: .LBB16_13: # %entry -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: .LBB16_14: # %entry -; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s1, 12(s0) +; CHECK-NEXT: sh a7, 14(s0) +; CHECK-NEXT: sh a6, 12(s0) ; CHECK-NEXT: sh a5, 10(s0) ; CHECK-NEXT: sh a4, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a2, 4(s0) -; CHECK-NEXT: sh s8, 2(s0) -; CHECK-NEXT: sh a6, 0(s0) +; CHECK-NEXT: sh s7, 2(s0) +; CHECK-NEXT: sh a0, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -1203,11 +1200,11 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB16_15: # %entry ; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: bltu s1, a1, .LBB16_12 +; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: bltu a6, a1, .LBB16_12 ; CHECK-NEXT: .LBB16_16: # %entry -; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: bgeu a0, a1, .LBB16_13 +; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: bgeu a7, a1, .LBB16_13 ; CHECK-NEXT: j .LBB16_14 entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -1232,7 +1229,6 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -1243,23 +1239,19 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -1274,55 +1266,58 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB17_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB17_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB17_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB17_4 +; CHECK-NEXT: blt s7, a7, .LBB17_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB17_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB17_6 +; CHECK-NEXT: blt a1, a7, .LBB17_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB17_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB17_8 +; CHECK-NEXT: blt a2, a7, .LBB17_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB17_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB17_10 +; CHECK-NEXT: blt a3, a7, .LBB17_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB17_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB17_23 +; CHECK-NEXT: bge a4, a7, .LBB17_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB17_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB17_24 ; CHECK-NEXT: .LBB17_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB17_25 +; CHECK-NEXT: bge a6, a7, .LBB17_25 ; CHECK-NEXT: .LBB17_13: # %entry -; CHECK-NEXT: blez s0, .LBB17_26 +; CHECK-NEXT: blez a6, .LBB17_26 ; CHECK-NEXT: .LBB17_14: # %entry ; CHECK-NEXT: blez a5, .LBB17_27 ; CHECK-NEXT: .LBB17_15: # %entry @@ -1334,20 +1329,20 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NEXT: .LBB17_18: # %entry ; CHECK-NEXT: blez a1, .LBB17_31 ; CHECK-NEXT: .LBB17_19: # %entry -; CHECK-NEXT: blez s9, .LBB17_32 +; CHECK-NEXT: blez s7, .LBB17_32 ; CHECK-NEXT: .LBB17_20: # %entry ; CHECK-NEXT: bgtz a0, .LBB17_22 ; CHECK-NEXT: .LBB17_21: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB17_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -1358,21 +1353,20 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB17_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB17_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB17_12 ; CHECK-NEXT: .LBB17_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB17_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB17_13 ; CHECK-NEXT: .LBB17_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: bgtz s0, .LBB17_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: bgtz a6, .LBB17_14 ; CHECK-NEXT: .LBB17_26: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: bgtz a5, .LBB17_15 ; CHECK-NEXT: .LBB17_27: # %entry ; CHECK-NEXT: li a5, 0 @@ -1388,9 +1382,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NEXT: bgtz a1, .LBB17_19 ; CHECK-NEXT: .LBB17_31: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s9, .LBB17_20 +; CHECK-NEXT: bgtz s7, .LBB17_20 ; CHECK-NEXT: .LBB17_32: # %entry -; CHECK-NEXT: li s9, 0 +; CHECK-NEXT: li s7, 0 ; CHECK-NEXT: blez a0, .LBB17_21 ; CHECK-NEXT: j .LBB17_22 entry: @@ -1418,11 +1412,11 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -1432,7 +1426,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: slti a4, a1, 0 ; CHECK-NEXT: bnez s1, .LBB18_4 ; CHECK-NEXT: .LBB18_2: -; CHECK-NEXT: sltu a5, s2, a3 +; CHECK-NEXT: sltu a5, s0, a3 ; CHECK-NEXT: beqz a5, .LBB18_5 ; CHECK-NEXT: j .LBB18_6 ; CHECK-NEXT: .LBB18_3: @@ -1443,7 +1437,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: bnez a5, .LBB18_6 ; CHECK-NEXT: .LBB18_5: # %entry ; CHECK-NEXT: li s1, 0 -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB18_6: # %entry ; CHECK-NEXT: beqz a4, .LBB18_10 ; CHECK-NEXT: # %bb.7: # %entry @@ -1453,7 +1447,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: slt a1, a0, a1 ; CHECK-NEXT: bne s1, a0, .LBB18_12 ; CHECK-NEXT: .LBB18_9: -; CHECK-NEXT: sltu a0, a3, s2 +; CHECK-NEXT: sltu a0, a3, s0 ; CHECK-NEXT: beqz a0, .LBB18_13 ; CHECK-NEXT: j .LBB18_14 ; CHECK-NEXT: .LBB18_10: # %entry @@ -1468,13 +1462,13 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: slt a0, a0, s1 ; CHECK-NEXT: bnez a0, .LBB18_14 ; CHECK-NEXT: .LBB18_13: # %entry -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB18_14: # %entry ; CHECK-NEXT: bnez a1, .LBB18_16 ; CHECK-NEXT: # %bb.15: # %entry ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB18_16: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1505,18 +1499,18 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv s2, a1 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: beqz a1, .LBB19_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB19_2: # %entry -; CHECK-NEXT: beqz s2, .LBB19_4 +; CHECK-NEXT: beqz s1, .LBB19_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB19_4: # %entry @@ -1548,12 +1542,12 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bgtz a1, .LBB20_7 @@ -1584,10 +1578,10 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: blez s1, .LBB20_4 ; CHECK-NEXT: .LBB20_10: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: bnez a3, .LBB20_5 ; CHECK-NEXT: .LBB20_11: -; CHECK-NEXT: snez a1, s2 +; CHECK-NEXT: snez a1, s0 ; CHECK-NEXT: beqz a2, .LBB20_6 ; CHECK-NEXT: .LBB20_12: # %entry ; CHECK-NEXT: sgtz a2, a2 @@ -1597,9 +1591,9 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: .LBB20_14: # %entry ; CHECK-NEXT: bnez a1, .LBB20_16 ; CHECK-NEXT: # %bb.15: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB20_16: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -1629,11 +1623,11 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -1643,7 +1637,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: slti a4, a1, 0 ; CHECK-NEXT: bnez s1, .LBB21_4 ; CHECK-NEXT: .LBB21_2: -; CHECK-NEXT: sltu a5, s2, a3 +; CHECK-NEXT: sltu a5, s0, a3 ; CHECK-NEXT: beqz a5, .LBB21_5 ; CHECK-NEXT: j .LBB21_6 ; CHECK-NEXT: .LBB21_3: @@ -1654,7 +1648,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: bnez a5, .LBB21_6 ; CHECK-NEXT: .LBB21_5: # %entry ; CHECK-NEXT: li s1, 0 -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB21_6: # %entry ; CHECK-NEXT: beqz a4, .LBB21_10 ; CHECK-NEXT: # %bb.7: # %entry @@ -1664,7 +1658,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: slt a1, a0, a1 ; CHECK-NEXT: bne s1, a0, .LBB21_12 ; CHECK-NEXT: .LBB21_9: -; CHECK-NEXT: sltu a0, a3, s2 +; CHECK-NEXT: sltu a0, a3, s0 ; CHECK-NEXT: beqz a0, .LBB21_13 ; CHECK-NEXT: j .LBB21_14 ; CHECK-NEXT: .LBB21_10: # %entry @@ -1679,13 +1673,13 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: slt a0, a0, s1 ; CHECK-NEXT: bnez a0, .LBB21_14 ; CHECK-NEXT: .LBB21_13: # %entry -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB21_14: # %entry ; CHECK-NEXT: bnez a1, .LBB21_16 ; CHECK-NEXT: # %bb.15: # %entry ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB21_16: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1716,18 +1710,18 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv s2, a1 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: beqz a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB22_2: # %entry -; CHECK-NEXT: beqz s2, .LBB22_4 +; CHECK-NEXT: beqz s1, .LBB22_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB22_4: # %entry @@ -1759,12 +1753,12 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bgtz a1, .LBB23_7 @@ -1795,10 +1789,10 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: blez s1, .LBB23_4 ; CHECK-NEXT: .LBB23_10: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: bnez a3, .LBB23_5 ; CHECK-NEXT: .LBB23_11: -; CHECK-NEXT: snez a1, s2 +; CHECK-NEXT: snez a1, s0 ; CHECK-NEXT: beqz a2, .LBB23_6 ; CHECK-NEXT: .LBB23_12: # %entry ; CHECK-NEXT: sgtz a2, a2 @@ -1808,9 +1802,9 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-NEXT: .LBB23_14: # %entry ; CHECK-NEXT: bnez a1, .LBB23_16 ; CHECK-NEXT: # %bb.15: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB23_16: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -1840,12 +1834,12 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a0 @@ -1856,7 +1850,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEXT: slti a4, a1, 0 ; CHECK-NEXT: bnez s1, .LBB24_4 ; CHECK-NEXT: .LBB24_2: -; CHECK-NEXT: sltu a5, s2, a3 +; CHECK-NEXT: sltu a5, s0, a3 ; CHECK-NEXT: beqz a5, .LBB24_5 ; CHECK-NEXT: j .LBB24_6 ; CHECK-NEXT: .LBB24_3: @@ -1867,7 +1861,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEXT: bnez a5, .LBB24_6 ; CHECK-NEXT: .LBB24_5: # %entry ; CHECK-NEXT: li s1, 0 -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB24_6: # %entry ; CHECK-NEXT: beqz a4, .LBB24_10 ; CHECK-NEXT: # %bb.7: # %entry @@ -1877,7 +1871,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEXT: slt a1, a0, a1 ; CHECK-NEXT: bne s1, a0, .LBB24_12 ; CHECK-NEXT: .LBB24_9: -; CHECK-NEXT: sltu a0, a3, s2 +; CHECK-NEXT: sltu a0, a3, s0 ; CHECK-NEXT: beqz a0, .LBB24_13 ; CHECK-NEXT: j .LBB24_14 ; CHECK-NEXT: .LBB24_10: # %entry @@ -1892,13 +1886,13 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEXT: slt a0, a0, s1 ; CHECK-NEXT: bnez a0, .LBB24_14 ; CHECK-NEXT: .LBB24_13: # %entry -; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: mv s0, a3 ; CHECK-NEXT: .LBB24_14: # %entry ; CHECK-NEXT: bnez a1, .LBB24_16 ; CHECK-NEXT: # %bb.15: # %entry ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB24_16: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -1929,20 +1923,20 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv s2, a1 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: beqz a1, .LBB25_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB25_2: # %entry -; CHECK-NEXT: beqz s2, .LBB25_4 +; CHECK-NEXT: beqz s1, .LBB25_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB25_4: # %entry @@ -1974,13 +1968,13 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 @@ -2012,10 +2006,10 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: blez s1, .LBB26_4 ; CHECK-NEXT: .LBB26_10: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: bnez a3, .LBB26_5 ; CHECK-NEXT: .LBB26_11: -; CHECK-NEXT: snez a1, s2 +; CHECK-NEXT: snez a1, s0 ; CHECK-NEXT: beqz a2, .LBB26_6 ; CHECK-NEXT: .LBB26_12: # %entry ; CHECK-NEXT: sgtz a2, a2 @@ -2025,9 +2019,9 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEXT: .LBB26_14: # %entry ; CHECK-NEXT: bnez a1, .LBB26_16 ; CHECK-NEXT: # %bb.15: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: .LBB26_16: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -2158,56 +2152,56 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: fmv.w.x ft2, a3 ; CHECK-NEXT: fmv.w.x ft0, a2 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: lui a6, 524288 -; CHECK-NEXT: addiw a5, a6, -1 +; CHECK-NEXT: lui a4, 524288 +; CHECK-NEXT: addiw a6, a4, -1 ; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB30_2 +; CHECK-NEXT: blt a2, a6, .LBB30_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: mv a2, a6 ; CHECK-NEXT: .LBB30_2: # %entry ; CHECK-NEXT: fmv.w.x ft1, a1 ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB30_11 +; CHECK-NEXT: bge a3, a6, .LBB30_11 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB30_12 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: bge a1, a6, .LBB30_12 ; CHECK-NEXT: .LBB30_4: # %entry -; CHECK-NEXT: bge a4, a5, .LBB30_13 +; CHECK-NEXT: bge a5, a6, .LBB30_13 ; CHECK-NEXT: .LBB30_5: # %entry -; CHECK-NEXT: bge a6, a4, .LBB30_14 +; CHECK-NEXT: bge a4, a5, .LBB30_14 ; CHECK-NEXT: .LBB30_6: # %entry -; CHECK-NEXT: bge a6, a1, .LBB30_15 +; CHECK-NEXT: bge a4, a1, .LBB30_15 ; CHECK-NEXT: .LBB30_7: # %entry -; CHECK-NEXT: bge a6, a3, .LBB30_16 +; CHECK-NEXT: bge a4, a3, .LBB30_16 ; CHECK-NEXT: .LBB30_8: # %entry -; CHECK-NEXT: blt a6, a2, .LBB30_10 +; CHECK-NEXT: blt a4, a2, .LBB30_10 ; CHECK-NEXT: .LBB30_9: # %entry ; CHECK-NEXT: lui a2, 524288 ; CHECK-NEXT: .LBB30_10: # %entry ; CHECK-NEXT: sw a2, 12(a0) ; CHECK-NEXT: sw a3, 8(a0) ; CHECK-NEXT: sw a1, 4(a0) -; CHECK-NEXT: sw a4, 0(a0) +; CHECK-NEXT: sw a5, 0(a0) ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB30_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB30_4 +; CHECK-NEXT: mv a3, a6 +; CHECK-NEXT: fcvt.l.s a5, ft1, rtz +; CHECK-NEXT: blt a1, a6, .LBB30_4 ; CHECK-NEXT: .LBB30_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB30_5 +; CHECK-NEXT: mv a1, a6 +; CHECK-NEXT: blt a5, a6, .LBB30_5 ; CHECK-NEXT: .LBB30_13: # %entry -; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: blt a6, a4, .LBB30_6 +; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: blt a4, a5, .LBB30_6 ; CHECK-NEXT: .LBB30_14: # %entry -; CHECK-NEXT: lui a4, 524288 -; CHECK-NEXT: blt a6, a1, .LBB30_7 +; CHECK-NEXT: lui a5, 524288 +; CHECK-NEXT: blt a4, a1, .LBB30_7 ; CHECK-NEXT: .LBB30_15: # %entry ; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: blt a6, a3, .LBB30_8 +; CHECK-NEXT: blt a4, a3, .LBB30_8 ; CHECK-NEXT: .LBB30_16: # %entry ; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: bge a6, a2, .LBB30_9 +; CHECK-NEXT: bge a4, a2, .LBB30_9 ; CHECK-NEXT: j .LBB30_10 entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -2346,23 +2340,23 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -2373,10 +2367,10 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a4 ; CHECK-NEXT: .LBB33_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: bge s0, a4, .LBB33_11 +; CHECK-NEXT: bge s3, a4, .LBB33_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: bge a2, a4, .LBB33_12 @@ -2387,16 +2381,16 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: .LBB33_6: # %entry ; CHECK-NEXT: bge a1, a2, .LBB33_15 ; CHECK-NEXT: .LBB33_7: # %entry -; CHECK-NEXT: bge a1, s0, .LBB33_16 +; CHECK-NEXT: bge a1, s3, .LBB33_16 ; CHECK-NEXT: .LBB33_8: # %entry ; CHECK-NEXT: blt a1, a0, .LBB33_10 ; CHECK-NEXT: .LBB33_9: # %entry ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: .LBB33_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a2, 4(s3) -; CHECK-NEXT: sw a3, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a2, 4(s0) +; CHECK-NEXT: sw a3, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -2406,7 +2400,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB33_11: # %entry -; CHECK-NEXT: mv s0, a4 +; CHECK-NEXT: mv s3, a4 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz ; CHECK-NEXT: blt a2, a4, .LBB33_4 ; CHECK-NEXT: .LBB33_12: # %entry @@ -2420,9 +2414,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: blt a1, a2, .LBB33_7 ; CHECK-NEXT: .LBB33_15: # %entry ; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: blt a1, s0, .LBB33_8 +; CHECK-NEXT: blt a1, s3, .LBB33_8 ; CHECK-NEXT: .LBB33_16: # %entry -; CHECK-NEXT: lui s0, 524288 +; CHECK-NEXT: lui s3, 524288 ; CHECK-NEXT: bge a1, a0, .LBB33_9 ; CHECK-NEXT: j .LBB33_10 entry: @@ -2451,23 +2445,23 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s3, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: fcvt.lu.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -2478,10 +2472,10 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB34_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz -; CHECK-NEXT: bgeu s1, a1, .LBB34_7 +; CHECK-NEXT: bgeu s3, a1, .LBB34_7 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bgeu a2, a1, .LBB34_8 @@ -2492,7 +2486,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: .LBB34_6: # %entry ; CHECK-NEXT: sw a3, 12(s0) ; CHECK-NEXT: sw a2, 8(s0) -; CHECK-NEXT: sw s1, 4(s0) +; CHECK-NEXT: sw s3, 4(s0) ; CHECK-NEXT: sw a0, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2503,7 +2497,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB34_7: # %entry -; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv s3, a1 ; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz ; CHECK-NEXT: bltu a2, a1, .LBB34_4 ; CHECK-NEXT: .LBB34_8: # %entry @@ -2535,23 +2529,23 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: .cfi_offset s3, -40 ; CHECK-NEXT: .cfi_offset s4, -48 ; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s4, 0(a1) -; CHECK-NEXT: lhu s0, 8(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s4 +; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s4, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s3 ; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fcvt.l.s s0, ft0, rtz +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fcvt.l.s s3, ft0, rtz ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 @@ -2562,10 +2556,10 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a3 ; CHECK-NEXT: .LBB35_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: fmv.w.x ft0, s1 ; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft1, rtz -; CHECK-NEXT: bge s0, a3, .LBB35_11 +; CHECK-NEXT: bge s3, a3, .LBB35_11 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: bge a1, a3, .LBB35_12 @@ -2576,16 +2570,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: .LBB35_6: # %entry ; CHECK-NEXT: blez a1, .LBB35_15 ; CHECK-NEXT: .LBB35_7: # %entry -; CHECK-NEXT: blez s0, .LBB35_16 +; CHECK-NEXT: blez s3, .LBB35_16 ; CHECK-NEXT: .LBB35_8: # %entry ; CHECK-NEXT: bgtz a0, .LBB35_10 ; CHECK-NEXT: .LBB35_9: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB35_10: # %entry -; CHECK-NEXT: sw a0, 12(s3) -; CHECK-NEXT: sw s0, 8(s3) -; CHECK-NEXT: sw a1, 4(s3) -; CHECK-NEXT: sw a2, 0(s3) +; CHECK-NEXT: sw a0, 12(s0) +; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw a1, 4(s0) +; CHECK-NEXT: sw a2, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -2595,7 +2589,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB35_11: # %entry -; CHECK-NEXT: mv s0, a3 +; CHECK-NEXT: mv s3, a3 ; CHECK-NEXT: fcvt.l.s a2, ft0, rtz ; CHECK-NEXT: blt a1, a3, .LBB35_4 ; CHECK-NEXT: .LBB35_12: # %entry @@ -2609,9 +2603,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEXT: bgtz a1, .LBB35_7 ; CHECK-NEXT: .LBB35_15: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s0, .LBB35_8 +; CHECK-NEXT: bgtz s3, .LBB35_8 ; CHECK-NEXT: .LBB35_16: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li s3, 0 ; CHECK-NEXT: blez a0, .LBB35_9 ; CHECK-NEXT: j .LBB35_10 entry: @@ -2923,7 +2917,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -2934,23 +2927,19 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -2965,81 +2954,84 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB42_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB42_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB42_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB42_4 +; CHECK-NEXT: blt s7, a7, .LBB42_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB42_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB42_6 +; CHECK-NEXT: blt a1, a7, .LBB42_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB42_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB42_8 +; CHECK-NEXT: blt a2, a7, .LBB42_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB42_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB42_10 +; CHECK-NEXT: blt a3, a7, .LBB42_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB42_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB42_23 +; CHECK-NEXT: bge a4, a7, .LBB42_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB42_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB42_24 ; CHECK-NEXT: .LBB42_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB42_25 +; CHECK-NEXT: bge a6, a7, .LBB42_25 ; CHECK-NEXT: .LBB42_13: # %entry -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: bge s1, s0, .LBB42_26 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: bge a7, a6, .LBB42_26 ; CHECK-NEXT: .LBB42_14: # %entry -; CHECK-NEXT: bge s1, a5, .LBB42_27 +; CHECK-NEXT: bge a7, a5, .LBB42_27 ; CHECK-NEXT: .LBB42_15: # %entry -; CHECK-NEXT: bge s1, a4, .LBB42_28 +; CHECK-NEXT: bge a7, a4, .LBB42_28 ; CHECK-NEXT: .LBB42_16: # %entry -; CHECK-NEXT: bge s1, a3, .LBB42_29 +; CHECK-NEXT: bge a7, a3, .LBB42_29 ; CHECK-NEXT: .LBB42_17: # %entry -; CHECK-NEXT: bge s1, a2, .LBB42_30 +; CHECK-NEXT: bge a7, a2, .LBB42_30 ; CHECK-NEXT: .LBB42_18: # %entry -; CHECK-NEXT: bge s1, a1, .LBB42_31 +; CHECK-NEXT: bge a7, a1, .LBB42_31 ; CHECK-NEXT: .LBB42_19: # %entry -; CHECK-NEXT: bge s1, s9, .LBB42_32 +; CHECK-NEXT: bge a7, s7, .LBB42_32 ; CHECK-NEXT: .LBB42_20: # %entry -; CHECK-NEXT: blt s1, a0, .LBB42_22 +; CHECK-NEXT: blt a7, a0, .LBB42_22 ; CHECK-NEXT: .LBB42_21: # %entry ; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: .LBB42_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -3050,41 +3042,40 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB42_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB42_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB42_12 ; CHECK-NEXT: .LBB42_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB42_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB42_13 ; CHECK-NEXT: .LBB42_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: lui s1, 1048568 -; CHECK-NEXT: blt s1, s0, .LBB42_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: lui a7, 1048568 +; CHECK-NEXT: blt a7, a6, .LBB42_14 ; CHECK-NEXT: .LBB42_26: # %entry -; CHECK-NEXT: lui s0, 1048568 -; CHECK-NEXT: blt s1, a5, .LBB42_15 +; CHECK-NEXT: lui a6, 1048568 +; CHECK-NEXT: blt a7, a5, .LBB42_15 ; CHECK-NEXT: .LBB42_27: # %entry ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt s1, a4, .LBB42_16 +; CHECK-NEXT: blt a7, a4, .LBB42_16 ; CHECK-NEXT: .LBB42_28: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt s1, a3, .LBB42_17 +; CHECK-NEXT: blt a7, a3, .LBB42_17 ; CHECK-NEXT: .LBB42_29: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: blt s1, a2, .LBB42_18 +; CHECK-NEXT: blt a7, a2, .LBB42_18 ; CHECK-NEXT: .LBB42_30: # %entry ; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: blt s1, a1, .LBB42_19 +; CHECK-NEXT: blt a7, a1, .LBB42_19 ; CHECK-NEXT: .LBB42_31: # %entry ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt s1, s9, .LBB42_20 +; CHECK-NEXT: blt a7, s7, .LBB42_20 ; CHECK-NEXT: .LBB42_32: # %entry -; CHECK-NEXT: lui s9, 1048568 -; CHECK-NEXT: bge s1, a0, .LBB42_21 +; CHECK-NEXT: lui s7, 1048568 +; CHECK-NEXT: bge a7, a0, .LBB42_21 ; CHECK-NEXT: j .LBB42_22 entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -3109,7 +3100,6 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -3120,25 +3110,21 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s5, 0(a1) -; CHECK-NEXT: lhu s2, 56(a1) -; CHECK-NEXT: lhu s3, 48(a1) -; CHECK-NEXT: lhu s4, 40(a1) -; CHECK-NEXT: lhu s6, 32(a1) -; CHECK-NEXT: lhu s7, 24(a1) -; CHECK-NEXT: lhu s1, 16(a1) +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 48(a1) +; CHECK-NEXT: lhu s3, 40(a1) +; CHECK-NEXT: lhu s4, 32(a1) +; CHECK-NEXT: lhu s6, 24(a1) +; CHECK-NEXT: lhu s7, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s8, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s6, a0 @@ -3151,72 +3137,75 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s6 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz ; CHECK-NEXT: fmv.w.x ft0, s8 ; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz -; CHECK-NEXT: sext.w s7, a0 +; CHECK-NEXT: sext.w s6, a0 ; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz -; CHECK-NEXT: sext.w a6, a0 -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addiw a1, a0, -1 -; CHECK-NEXT: bltu a6, a1, .LBB43_2 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: lui a1, 16 +; CHECK-NEXT: addiw a1, a1, -1 +; CHECK-NEXT: bltu a0, a1, .LBB43_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB43_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s6 -; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: sext.w a2, s1 -; CHECK-NEXT: bltu s7, a1, .LBB43_4 +; CHECK-NEXT: fmv.w.x ft0, s4 +; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz +; CHECK-NEXT: sext.w a2, s7 +; CHECK-NEXT: bltu s6, a1, .LBB43_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s7, a1 +; CHECK-NEXT: mv s6, a1 ; CHECK-NEXT: .LBB43_4: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz -; CHECK-NEXT: sext.w a3, a0 +; CHECK-NEXT: sext.w a3, a3 ; CHECK-NEXT: bltu a2, a1, .LBB43_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB43_6: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz +; CHECK-NEXT: fmv.w.x ft0, s2 +; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz ; CHECK-NEXT: sext.w a4, a4 ; CHECK-NEXT: bltu a3, a1, .LBB43_8 ; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB43_8: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 -; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz -; CHECK-NEXT: sext.w a5, a0 +; CHECK-NEXT: fmv.w.x ft1, s1 +; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz +; CHECK-NEXT: sext.w a5, a5 ; CHECK-NEXT: bltu a4, a1, .LBB43_10 ; CHECK-NEXT: # %bb.9: # %entry ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB43_10: # %entry -; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz -; CHECK-NEXT: sext.w s1, s1 +; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: sext.w a6, a6 ; CHECK-NEXT: bgeu a5, a1, .LBB43_15 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: sext.w a0, a0 -; CHECK-NEXT: bgeu s1, a1, .LBB43_16 +; CHECK-NEXT: sext.w a7, a7 +; CHECK-NEXT: bgeu a6, a1, .LBB43_16 ; CHECK-NEXT: .LBB43_12: # %entry -; CHECK-NEXT: bltu a0, a1, .LBB43_14 +; CHECK-NEXT: bltu a7, a1, .LBB43_14 ; CHECK-NEXT: .LBB43_13: # %entry -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a7, a1 ; CHECK-NEXT: .LBB43_14: # %entry -; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s1, 12(s0) +; CHECK-NEXT: sh a7, 14(s0) +; CHECK-NEXT: sh a6, 12(s0) ; CHECK-NEXT: sh a5, 10(s0) ; CHECK-NEXT: sh a4, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a2, 4(s0) -; CHECK-NEXT: sh s7, 2(s0) -; CHECK-NEXT: sh a6, 0(s0) +; CHECK-NEXT: sh s6, 2(s0) +; CHECK-NEXT: sh a0, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -3227,16 +3216,15 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB43_15: # %entry ; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: sext.w a0, a0 -; CHECK-NEXT: bltu s1, a1, .LBB43_12 +; CHECK-NEXT: sext.w a7, a7 +; CHECK-NEXT: bltu a6, a1, .LBB43_12 ; CHECK-NEXT: .LBB43_16: # %entry -; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: bgeu a0, a1, .LBB43_13 +; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: bgeu a7, a1, .LBB43_13 ; CHECK-NEXT: j .LBB43_14 entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -3260,7 +3248,6 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -3271,23 +3258,19 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 ; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s3, 8(a1) -; CHECK-NEXT: lhu s4, 16(a1) -; CHECK-NEXT: lhu s5, 24(a1) -; CHECK-NEXT: lhu s1, 32(a1) -; CHECK-NEXT: lhu s0, 40(a1) +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 8(a1) +; CHECK-NEXT: lhu s3, 16(a1) +; CHECK-NEXT: lhu s4, 24(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 -; CHECK-NEXT: mv a0, s0 -; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s7, a0 ; CHECK-NEXT: mv a0, s5 @@ -3302,55 +3285,58 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: fmv.w.x ft0, s0 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.l.s s9, ft0, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __gnu_h2f_ieee@plt +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: fmv.w.x ft0, s7 +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x ft0, s8 +; CHECK-NEXT: fcvt.l.s s7, ft0, rtz ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.l.s a0, ft0, rtz ; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addiw s1, a1, -1 -; CHECK-NEXT: blt a0, s1, .LBB44_2 +; CHECK-NEXT: addiw a7, a1, -1 +; CHECK-NEXT: blt a0, a7, .LBB44_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: mv a0, a7 ; CHECK-NEXT: .LBB44_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s7 -; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: fmv.w.x ft1, s5 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload ; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s9, s1, .LBB44_4 +; CHECK-NEXT: blt s7, a7, .LBB44_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s9, s1 +; CHECK-NEXT: mv s7, a7 ; CHECK-NEXT: .LBB44_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fmv.w.x ft0, s4 ; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, s1, .LBB44_6 +; CHECK-NEXT: blt a1, a7, .LBB44_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, s1 +; CHECK-NEXT: mv a1, a7 ; CHECK-NEXT: .LBB44_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fmv.w.x ft1, s3 ; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, s1, .LBB44_8 +; CHECK-NEXT: blt a2, a7, .LBB44_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, s1 +; CHECK-NEXT: mv a2, a7 ; CHECK-NEXT: .LBB44_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fmv.w.x ft0, s2 ; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, s1, .LBB44_10 +; CHECK-NEXT: blt a3, a7, .LBB44_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, s1 +; CHECK-NEXT: mv a3, a7 ; CHECK-NEXT: .LBB44_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fmv.w.x ft1, s1 ; CHECK-NEXT: fcvt.l.s a5, ft0, rtz -; CHECK-NEXT: bge a4, s1, .LBB44_23 +; CHECK-NEXT: bge a4, a7, .LBB44_23 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: bge a5, s1, .LBB44_24 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: bge a5, a7, .LBB44_24 ; CHECK-NEXT: .LBB44_12: # %entry -; CHECK-NEXT: bge s0, s1, .LBB44_25 +; CHECK-NEXT: bge a6, a7, .LBB44_25 ; CHECK-NEXT: .LBB44_13: # %entry -; CHECK-NEXT: blez s0, .LBB44_26 +; CHECK-NEXT: blez a6, .LBB44_26 ; CHECK-NEXT: .LBB44_14: # %entry ; CHECK-NEXT: blez a5, .LBB44_27 ; CHECK-NEXT: .LBB44_15: # %entry @@ -3362,20 +3348,20 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: .LBB44_18: # %entry ; CHECK-NEXT: blez a1, .LBB44_31 ; CHECK-NEXT: .LBB44_19: # %entry -; CHECK-NEXT: blez s9, .LBB44_32 +; CHECK-NEXT: blez s7, .LBB44_32 ; CHECK-NEXT: .LBB44_20: # %entry ; CHECK-NEXT: bgtz a0, .LBB44_22 ; CHECK-NEXT: .LBB44_21: # %entry ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: .LBB44_22: # %entry -; CHECK-NEXT: sh a0, 14(s8) -; CHECK-NEXT: sh s9, 12(s8) -; CHECK-NEXT: sh a1, 10(s8) -; CHECK-NEXT: sh a2, 8(s8) -; CHECK-NEXT: sh a3, 6(s8) -; CHECK-NEXT: sh a4, 4(s8) -; CHECK-NEXT: sh a5, 2(s8) -; CHECK-NEXT: sh s0, 0(s8) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh a1, 10(s0) +; CHECK-NEXT: sh a2, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a4, 4(s0) +; CHECK-NEXT: sh a5, 2(s0) +; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload @@ -3386,21 +3372,20 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB44_23: # %entry -; CHECK-NEXT: mv a4, s1 -; CHECK-NEXT: fcvt.l.s s0, ft1, rtz -; CHECK-NEXT: blt a5, s1, .LBB44_12 +; CHECK-NEXT: mv a4, a7 +; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: blt a5, a7, .LBB44_12 ; CHECK-NEXT: .LBB44_24: # %entry -; CHECK-NEXT: mv a5, s1 -; CHECK-NEXT: blt s0, s1, .LBB44_13 +; CHECK-NEXT: mv a5, a7 +; CHECK-NEXT: blt a6, a7, .LBB44_13 ; CHECK-NEXT: .LBB44_25: # %entry -; CHECK-NEXT: mv s0, s1 -; CHECK-NEXT: bgtz s0, .LBB44_14 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: bgtz a6, .LBB44_14 ; CHECK-NEXT: .LBB44_26: # %entry -; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: bgtz a5, .LBB44_15 ; CHECK-NEXT: .LBB44_27: # %entry ; CHECK-NEXT: li a5, 0 @@ -3416,9 +3401,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: bgtz a1, .LBB44_19 ; CHECK-NEXT: .LBB44_31: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s9, .LBB44_20 +; CHECK-NEXT: bgtz s7, .LBB44_20 ; CHECK-NEXT: .LBB44_32: # %entry -; CHECK-NEXT: li s9, 0 +; CHECK-NEXT: li s7, 0 ; CHECK-NEXT: blez a0, .LBB44_21 ; CHECK-NEXT: j .LBB44_22 entry: @@ -3559,12 +3544,12 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunsdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a1 @@ -3581,7 +3566,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: .LBB46_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB46_6 ; CHECK-NEXT: .LBB46_5: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: .LBB46_6: # %entry ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3593,7 +3578,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: beq s1, a1, .LBB46_4 ; CHECK-NEXT: .LBB46_8: # %entry -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB46_5 ; CHECK-NEXT: j .LBB46_6 entry: @@ -3616,11 +3601,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixdfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: li a5, 1 @@ -3673,11 +3658,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: blez s1, .LBB47_5 ; CHECK-NEXT: .LBB47_16: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: beq s1, a5, .LBB47_6 ; CHECK-NEXT: .LBB47_17: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bgtz a4, .LBB47_7 ; CHECK-NEXT: .LBB47_18: # %entry @@ -3827,12 +3812,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __fixunssfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a1 @@ -3849,7 +3834,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: .LBB49_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB49_6 ; CHECK-NEXT: .LBB49_5: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: .LBB49_6: # %entry ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -3861,7 +3846,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: beq s1, a1, .LBB49_4 ; CHECK-NEXT: .LBB49_8: # %entry -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB49_5 ; CHECK-NEXT: j .LBB49_6 entry: @@ -3884,11 +3869,11 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: li a5, 1 @@ -3941,11 +3926,11 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: blez s1, .LBB50_5 ; CHECK-NEXT: .LBB50_16: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: beq s1, a5, .LBB50_6 ; CHECK-NEXT: .LBB50_17: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bgtz a4, .LBB50_7 ; CHECK-NEXT: .LBB50_18: # %entry @@ -4097,13 +4082,13 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv a2, a0 @@ -4121,7 +4106,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: .LBB52_4: # %entry ; CHECK-NEXT: beq s1, a4, .LBB52_6 ; CHECK-NEXT: .LBB52_5: # %entry -; CHECK-NEXT: mv a1, s2 +; CHECK-NEXT: mv a1, s0 ; CHECK-NEXT: .LBB52_6: # %entry ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -4133,7 +4118,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: beq s1, a1, .LBB52_4 ; CHECK-NEXT: .LBB52_8: # %entry -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: mv s0, a1 ; CHECK-NEXT: bne s1, a4, .LBB52_5 ; CHECK-NEXT: j .LBB52_6 entry: @@ -4156,12 +4141,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s0 +; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 @@ -4215,11 +4200,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: blez s1, .LBB53_5 ; CHECK-NEXT: .LBB53_16: # %entry -; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: li s0, 0 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: beq s1, a5, .LBB53_6 ; CHECK-NEXT: .LBB53_17: # %entry -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bgtz a4, .LBB53_7 ; CHECK-NEXT: .LBB53_18: # %entry diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index a3dd3af884554..02e87dc3883ed 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -31,13 +31,13 @@ define half @fadd_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -57,13 +57,13 @@ define half @fadd_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -97,13 +97,13 @@ define half @fsub_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -123,13 +123,13 @@ define half @fsub_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -163,13 +163,13 @@ define half @fmul_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -189,13 +189,13 @@ define half @fmul_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -229,13 +229,13 @@ define half @fdiv_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -255,13 +255,13 @@ define half @fdiv_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -461,27 +461,27 @@ define half @fsgnjn_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s3, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lui a1, 1048568 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: slli a1, s2, 17 +; RV32I-NEXT: slli a1, s1, 17 ; RV32I-NEXT: srli a1, a1, 17 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -500,27 +500,27 @@ define half @fsgnjn_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a1 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s3, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: lui a1, 1048568 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: slli a1, s2, 49 +; RV64I-NEXT: slli a1, s1, 49 ; RV64I-NEXT: srli a1, a1, 49 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -562,25 +562,25 @@ define half @fabs_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt @@ -599,25 +599,25 @@ define half @fabs_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: slli a0, a0, 33 ; RV64I-NEXT: srli a0, a0, 33 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt @@ -654,13 +654,13 @@ define half @fmin_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -680,13 +680,13 @@ define half @fmin_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -722,13 +722,13 @@ define half @fmax_s(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -748,13 +748,13 @@ define half @fmax_s(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -791,20 +791,20 @@ define half @fmadd_s(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt @@ -824,20 +824,20 @@ define half @fmadd_s(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt @@ -875,33 +875,32 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, a2, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s1, s0 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s4, s0 +; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -909,7 +908,6 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -921,33 +919,32 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, a2, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s1, s0 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s4, s0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -955,7 +952,6 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %c_ = fadd half 0.0, %c ; avoid negation using xor @@ -990,44 +986,44 @@ define half @fnmadd_s(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui s4, 524288 ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s2, s1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1048,44 +1044,44 @@ define half @fnmadd_s(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a2 -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui s4, 524288 ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s2, s1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1130,44 +1126,44 @@ define half @fnmadd_s_2(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a2 -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a2 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s1, a0, -1 -; RV32I-NEXT: and a0, a1, s1 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui s4, 524288 ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: xor a0, a0, s4 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s2, s1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1188,44 +1184,44 @@ define half @fnmadd_s_2(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a2 -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a2 +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s1, a0, -1 -; RV64I-NEXT: and a0, a1, s1 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui s4, 524288 ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: xor a0, a0, s4 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s2, s1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1267,32 +1263,31 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s1, s0 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s4, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1300,7 +1295,6 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1312,32 +1306,31 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s1, s0 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s4, s0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s3 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1345,7 +1338,6 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %a_ = fadd half 0.0, %a @@ -1377,33 +1369,32 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, a1, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s1, s0 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s4, s0 +; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s3 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1411,7 +1402,6 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1423,33 +1413,32 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, a1, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s1, s0 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s4, s0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s3 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1457,7 +1446,6 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %b_ = fadd half 0.0, %b @@ -1485,26 +1473,26 @@ define half @fmadd_s_contract(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1523,26 +1511,26 @@ define half @fmadd_s_contract(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1580,32 +1568,32 @@ define half @fmsub_s_contract(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi s0, a0, -1 -; RV32I-NEXT: and a0, a2, s0 +; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: and a0, a2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s3, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1624,32 +1612,32 @@ define half @fmsub_s_contract(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 16 -; RV64I-NEXT: addiw s0, a0, -1 -; RV64I-NEXT: and a0, a2, s0 +; RV64I-NEXT: addiw s3, a0, -1 +; RV64I-NEXT: and a0, a2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s3, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1692,50 +1680,49 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s3, s1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s4, s1 +; RV32I-NEXT: and a0, s2, s3 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1743,7 +1730,6 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1755,50 +1741,49 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s4, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s3, s1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s4, s1 +; RV64I-NEXT: and a0, s2, s3 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1806,7 +1791,6 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %a_ = fadd half 0.0, %a ; avoid negation using xor @@ -1843,39 +1827,39 @@ define half @fnmsub_s_contract(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s3, s0 +; RV32I-NEXT: and a0, s2, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1894,39 +1878,39 @@ define half @fnmsub_s_contract(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s3, s0 +; RV64I-NEXT: and a0, s2, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __subsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index 5c68be119ba09..6b9a2c6feb5a6 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -336,17 +336,17 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s2, a1, -1 -; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: and a0, a0, s0 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __fixunssfsi@plt ; RV32I-NEXT: li s3, 0 -; RV32I-NEXT: bltz s0, .LBB3_2 +; RV32I-NEXT: bltz s2, .LBB3_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB3_2: # %start @@ -356,9 +356,9 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: bgtz a0, .LBB3_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv s0, s3 ; RV32I-NEXT: .LBB3_4: # %start -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -376,17 +376,17 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s2, a1, -1 -; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: and a0, a0, s0 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __fixunssfdi@plt ; RV64I-NEXT: li s3, 0 -; RV64I-NEXT: bltz s0, .LBB3_2 +; RV64I-NEXT: bltz s2, .LBB3_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB3_2: # %start @@ -396,9 +396,9 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: bgtz a0, .LBB3_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s0, s3 ; RV64I-NEXT: .LBB3_4: # %start -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1378,25 +1378,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi@plt -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: li s5, 0 -; RV32I-NEXT: bltz s1, .LBB12_2 +; RV32I-NEXT: bltz s2, .LBB12_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: .LBB12_2: # %start ; RV32I-NEXT: lui a0, 391168 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: addi s4, a0, -1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: li s2, -1 ; RV32I-NEXT: li s3, -1 -; RV32I-NEXT: li s4, -1 ; RV32I-NEXT: bgtz a0, .LBB12_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s4, s5 +; RV32I-NEXT: mv s3, s5 ; RV32I-NEXT: .LBB12_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 @@ -1404,17 +1404,17 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32I-NEXT: li s5, 0 ; RV32I-NEXT: bltz a0, .LBB12_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s5, s2 +; RV32I-NEXT: mv s5, s1 ; RV32I-NEXT: .LBB12_6: # %start ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s4 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: bgtz a0, .LBB12_8 ; RV32I-NEXT: # %bb.7: # %start -; RV32I-NEXT: mv s3, s5 +; RV32I-NEXT: mv s2, s5 ; RV32I-NEXT: .LBB12_8: # %start -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -2507,12 +2507,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 815104 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: lui s3, 1048568 -; RV32I-NEXT: bltz s2, .LBB32_2 +; RV32I-NEXT: bltz s1, .LBB32_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB32_2: # %start @@ -2520,7 +2520,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32I-NEXT: addi a1, a0, -512 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: bge s1, a0, .LBB32_4 +; RV32I-NEXT: bge s2, a0, .LBB32_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: lui a0, 8 ; RV32I-NEXT: addi s3, a0, -1 @@ -2528,11 +2528,11 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB32_6 +; RV32I-NEXT: bne a0, s2, .LBB32_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s3 +; RV32I-NEXT: mv s2, s3 ; RV32I-NEXT: .LBB32_6: # %start -; RV32I-NEXT: slli a0, s1, 16 +; RV32I-NEXT: slli a0, s2, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -2556,12 +2556,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 815104 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: lui s3, 1048568 -; RV64I-NEXT: bltz s2, .LBB32_2 +; RV64I-NEXT: bltz s1, .LBB32_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB32_2: # %start @@ -2569,7 +2569,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64I-NEXT: addiw a1, a0, -512 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: bge s1, a0, .LBB32_4 +; RV64I-NEXT: bge s2, a0, .LBB32_4 ; RV64I-NEXT: # %bb.3: ; RV64I-NEXT: lui a0, 8 ; RV64I-NEXT: addiw s3, a0, -1 @@ -2577,11 +2577,11 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB32_6 +; RV64I-NEXT: bne a0, s2, .LBB32_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s3 +; RV64I-NEXT: mv s2, s3 ; RV64I-NEXT: .LBB32_6: # %start -; RV64I-NEXT: slli a0, s1, 48 +; RV64I-NEXT: slli a0, s2, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2909,31 +2909,31 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind { ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 798720 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfsi@plt -; RV32I-NEXT: li s1, 0 +; RV32I-NEXT: li s2, 0 ; RV32I-NEXT: li s3, -128 -; RV32I-NEXT: bltz s2, .LBB36_2 +; RV32I-NEXT: bltz s1, .LBB36_2 ; RV32I-NEXT: # %bb.1: # %start ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: .LBB36_2: # %start ; RV32I-NEXT: lui a1, 274400 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: li s2, 127 -; RV32I-NEXT: blt s1, a0, .LBB36_4 +; RV32I-NEXT: li s1, 127 +; RV32I-NEXT: blt s2, a0, .LBB36_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: mv s2, s3 +; RV32I-NEXT: mv s1, s3 ; RV32I-NEXT: .LBB36_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt -; RV32I-NEXT: bne a0, s1, .LBB36_6 +; RV32I-NEXT: bne a0, s2, .LBB36_6 ; RV32I-NEXT: # %bb.5: # %start -; RV32I-NEXT: mv s1, s2 +; RV32I-NEXT: mv s2, s1 ; RV32I-NEXT: .LBB36_6: # %start -; RV32I-NEXT: slli a0, s1, 24 +; RV32I-NEXT: slli a0, s2, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -2957,31 +2957,31 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind { ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lui a1, 798720 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixsfdi@plt -; RV64I-NEXT: li s1, 0 +; RV64I-NEXT: li s2, 0 ; RV64I-NEXT: li s3, -128 -; RV64I-NEXT: bltz s2, .LBB36_2 +; RV64I-NEXT: bltz s1, .LBB36_2 ; RV64I-NEXT: # %bb.1: # %start ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: .LBB36_2: # %start ; RV64I-NEXT: lui a1, 274400 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: li s2, 127 -; RV64I-NEXT: blt s1, a0, .LBB36_4 +; RV64I-NEXT: li s1, 127 +; RV64I-NEXT: blt s2, a0, .LBB36_4 ; RV64I-NEXT: # %bb.3: # %start -; RV64I-NEXT: mv s2, s3 +; RV64I-NEXT: mv s1, s3 ; RV64I-NEXT: .LBB36_4: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __unordsf2@plt -; RV64I-NEXT: bne a0, s1, .LBB36_6 +; RV64I-NEXT: bne a0, s2, .LBB36_6 ; RV64I-NEXT: # %bb.5: # %start -; RV64I-NEXT: mv s1, s2 +; RV64I-NEXT: mv s2, s1 ; RV64I-NEXT: .LBB36_6: # %start -; RV64I-NEXT: slli a0, s1, 56 +; RV64I-NEXT: slli a0, s2, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index acde022135e37..0d4e4ac322e1a 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -398,24 +398,24 @@ define half @sincos_f16(half %a) nounwind { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: call sinf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call cosf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: and a0, s1, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -433,24 +433,24 @@ define half @sincos_f16(half %a) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: call sinf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call cosf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: and a0, s1, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -523,13 +523,13 @@ define half @pow_f16(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -549,13 +549,13 @@ define half @pow_f16(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -982,20 +982,20 @@ define half @fma_f16(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 +; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: and a0, s1, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt @@ -1015,20 +1015,20 @@ define half @fma_f16(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 +; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: and a0, s1, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: call fmaf@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt @@ -1074,26 +1074,26 @@ define half @fmuladd_f16(half %a, half %b, half %c) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s3, s1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: and a0, s2, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: and a0, s1, s3 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1112,26 +1112,26 @@ define half @fmuladd_f16(half %a, half %b, half %c) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv s3, a1 +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s3, s1 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __mulsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: and a0, s2, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: and a0, s1, s3 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1213,13 +1213,13 @@ define half @minnum_f16(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -1239,13 +1239,13 @@ define half @minnum_f16(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 @@ -1291,13 +1291,13 @@ define half @maxnum_f16(half %a, half %b) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 @@ -1317,13 +1317,13 @@ define half @maxnum_f16(half %a, half %b) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 411369557ff34..3884c67d399da 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1141,50 +1141,50 @@ define i64 @muli64_m3840(i64 %a) nounwind { define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-LABEL: muli128_m3840: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a2, 4(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a3, 8(a1) -; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: srli a6, a2, 20 -; RV32I-NEXT: slli a5, a3, 12 -; RV32I-NEXT: or a6, a5, a6 -; RV32I-NEXT: srli a7, a2, 24 -; RV32I-NEXT: slli a5, a3, 8 -; RV32I-NEXT: or a7, a5, a7 -; RV32I-NEXT: sltu t0, a7, a6 -; RV32I-NEXT: srli t1, a3, 20 -; RV32I-NEXT: slli a5, a1, 12 -; RV32I-NEXT: or a5, a5, t1 -; RV32I-NEXT: srli a3, a3, 24 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sub t2, a1, a5 +; RV32I-NEXT: lw a6, 0(a1) +; RV32I-NEXT: lw a5, 12(a1) ; RV32I-NEXT: srli a1, a4, 20 -; RV32I-NEXT: slli a3, a2, 12 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: srli a1, a4, 24 -; RV32I-NEXT: slli a2, a2, 8 -; RV32I-NEXT: or a5, a2, a1 -; RV32I-NEXT: slli t1, a4, 12 -; RV32I-NEXT: slli t3, a4, 8 -; RV32I-NEXT: sltu t4, t3, t1 -; RV32I-NEXT: sub t0, t2, t0 -; RV32I-NEXT: mv a2, t4 +; RV32I-NEXT: slli a2, a3, 12 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a4, 24 +; RV32I-NEXT: slli a7, a3, 8 +; RV32I-NEXT: or a2, a7, a2 +; RV32I-NEXT: sltu t0, a2, a1 +; RV32I-NEXT: srli a7, a3, 20 +; RV32I-NEXT: slli t1, a5, 12 +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: sub t1, a3, a7 +; RV32I-NEXT: srli a3, a6, 20 +; RV32I-NEXT: slli a5, a4, 12 +; RV32I-NEXT: or a3, a5, a3 +; RV32I-NEXT: srli a5, a6, 24 +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a5, a4, a5 +; RV32I-NEXT: slli a4, a6, 12 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: sltu a7, a6, a4 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: mv t1, a7 ; RV32I-NEXT: beq a5, a3, .LBB30_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a2, a5, a3 +; RV32I-NEXT: sltu t1, a5, a3 ; RV32I-NEXT: .LBB30_2: -; RV32I-NEXT: sub a1, a7, a6 -; RV32I-NEXT: sltu a4, a1, a2 -; RV32I-NEXT: sub a4, t0, a4 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sub a2, a5, a3 -; RV32I-NEXT: sub a2, a2, t4 -; RV32I-NEXT: sub a3, t3, t1 -; RV32I-NEXT: sw a3, 0(a0) -; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sltu a2, a1, t1 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sub a1, a1, t1 +; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: sw a4, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m3840: @@ -1192,56 +1192,54 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32IM-NEXT: addi sp, sp, -16 ; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a6, 12(a1) -; RV32IM-NEXT: lw a7, 8(a1) +; RV32IM-NEXT: lw a2, 12(a1) +; RV32IM-NEXT: lw a3, 8(a1) ; RV32IM-NEXT: lw a4, 0(a1) ; RV32IM-NEXT: lw a1, 4(a1) ; RV32IM-NEXT: lui a5, 1048575 ; RV32IM-NEXT: addi a5, a5, 256 -; RV32IM-NEXT: mulhu a2, a4, a5 -; RV32IM-NEXT: mul a3, a1, a5 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: sltu t0, a2, a3 -; RV32IM-NEXT: mulhu a3, a1, a5 -; RV32IM-NEXT: add t5, a3, t0 -; RV32IM-NEXT: sub t0, a2, a4 -; RV32IM-NEXT: neg t4, a4 -; RV32IM-NEXT: sltu t1, t0, t4 +; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: mul a7, a1, a5 +; RV32IM-NEXT: add a6, a7, a6 +; RV32IM-NEXT: sltu a7, a6, a7 +; RV32IM-NEXT: mulhu t0, a1, a5 +; RV32IM-NEXT: add a7, t0, a7 +; RV32IM-NEXT: sub a6, a6, a4 +; RV32IM-NEXT: neg t0, a4 +; RV32IM-NEXT: sltu t1, a6, t0 ; RV32IM-NEXT: li t2, -1 ; RV32IM-NEXT: mulhu t3, a4, t2 -; RV32IM-NEXT: add a2, t3, t1 -; RV32IM-NEXT: add t1, t5, a2 -; RV32IM-NEXT: sub a3, t1, a1 -; RV32IM-NEXT: mul a2, a7, a5 -; RV32IM-NEXT: sub a2, a2, a4 -; RV32IM-NEXT: add t6, a3, a2 -; RV32IM-NEXT: sltu s2, t6, a3 +; RV32IM-NEXT: add t1, t3, t1 +; RV32IM-NEXT: add t1, a7, t1 +; RV32IM-NEXT: sub t4, t1, a1 +; RV32IM-NEXT: mul t5, a3, a5 +; RV32IM-NEXT: sub t5, t5, a4 +; RV32IM-NEXT: add t6, t4, t5 +; RV32IM-NEXT: sltu s0, t6, t4 ; RV32IM-NEXT: neg s1, a1 -; RV32IM-NEXT: sltu a3, a3, s1 -; RV32IM-NEXT: sltu s1, t1, t5 -; RV32IM-NEXT: mulhu s0, a1, t2 -; RV32IM-NEXT: add s1, s0, s1 -; RV32IM-NEXT: add a3, s1, a3 -; RV32IM-NEXT: sltu a2, a2, t4 -; RV32IM-NEXT: mul s1, a6, a5 -; RV32IM-NEXT: mulhu s0, a7, a5 -; RV32IM-NEXT: sub s0, s0, a7 -; RV32IM-NEXT: add s1, s0, s1 -; RV32IM-NEXT: sub s0, t3, a4 -; RV32IM-NEXT: sub a1, s0, a1 -; RV32IM-NEXT: add a1, a1, s1 +; RV32IM-NEXT: sltu t4, t4, s1 +; RV32IM-NEXT: sltu a7, t1, a7 +; RV32IM-NEXT: mulhu t1, a1, t2 +; RV32IM-NEXT: add a7, t1, a7 +; RV32IM-NEXT: add a7, a7, t4 +; RV32IM-NEXT: sltu t0, t5, t0 +; RV32IM-NEXT: mul a2, a2, a5 +; RV32IM-NEXT: mulhu t1, a3, a5 +; RV32IM-NEXT: sub a3, t1, a3 +; RV32IM-NEXT: add a2, a3, a2 +; RV32IM-NEXT: sub a3, t3, a4 +; RV32IM-NEXT: sub a1, a3, a1 ; RV32IM-NEXT: add a1, a1, a2 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: add a1, a1, s2 +; RV32IM-NEXT: add a1, a1, t0 +; RV32IM-NEXT: add a1, a7, a1 +; RV32IM-NEXT: add a1, a1, s0 ; RV32IM-NEXT: mul a2, a4, a5 ; RV32IM-NEXT: sw a2, 0(a0) -; RV32IM-NEXT: sw t0, 4(a0) +; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) ; RV32IM-NEXT: sw a1, 12(a0) ; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 16 ; RV32IM-NEXT: ret ; @@ -1279,39 +1277,39 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32I-LABEL: muli128_m63: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a5, 12(a1) +; RV32I-NEXT: lw a7, 8(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: slli a6, a2, 6 -; RV32I-NEXT: sltu a7, a2, a6 -; RV32I-NEXT: srli a1, a2, 26 -; RV32I-NEXT: slli a5, a3, 6 -; RV32I-NEXT: or t2, a5, a1 -; RV32I-NEXT: mv t3, a7 -; RV32I-NEXT: beq a3, t2, .LBB31_2 +; RV32I-NEXT: slli a1, a2, 6 +; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: srli a6, a2, 26 +; RV32I-NEXT: slli t0, a3, 6 +; RV32I-NEXT: or a6, t0, a6 +; RV32I-NEXT: mv t0, a4 +; RV32I-NEXT: beq a3, a6, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t3, a3, t2 +; RV32I-NEXT: sltu t0, a3, a6 ; RV32I-NEXT: .LBB31_2: ; RV32I-NEXT: srli t1, a3, 26 -; RV32I-NEXT: slli a1, a4, 6 -; RV32I-NEXT: or a1, a1, t1 -; RV32I-NEXT: sub a5, a4, a1 -; RV32I-NEXT: sltu t1, a5, t3 -; RV32I-NEXT: sltu t4, a4, a1 -; RV32I-NEXT: srli a4, a4, 26 -; RV32I-NEXT: slli a1, t0, 6 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: sub a1, t0, a1 -; RV32I-NEXT: sub a1, a1, t4 -; RV32I-NEXT: sub a1, a1, t1 -; RV32I-NEXT: sub a4, a5, t3 -; RV32I-NEXT: sub a3, a3, t2 -; RV32I-NEXT: sub a3, a3, a7 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: slli t2, a7, 6 +; RV32I-NEXT: or t1, t2, t1 +; RV32I-NEXT: sub t2, a7, t1 +; RV32I-NEXT: sltu t3, t2, t0 +; RV32I-NEXT: sltu t1, a7, t1 +; RV32I-NEXT: srli a7, a7, 26 +; RV32I-NEXT: slli t4, a5, 6 +; RV32I-NEXT: or a7, t4, a7 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a5, a5, t1 +; RV32I-NEXT: sub a5, a5, t3 +; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: sub a3, a3, a6 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a7, 8(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m63: @@ -1319,55 +1317,55 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32IM-NEXT: addi sp, sp, -16 ; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a7, 12(a1) +; RV32IM-NEXT: lw a2, 12(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a4, 4(a1) -; RV32IM-NEXT: lw t5, 8(a1) -; RV32IM-NEXT: li a6, -63 -; RV32IM-NEXT: mulhu a5, a3, a6 -; RV32IM-NEXT: slli a2, a4, 6 -; RV32IM-NEXT: sub a2, a2, a4 -; RV32IM-NEXT: sub a5, a5, a2 -; RV32IM-NEXT: neg a2, a2 -; RV32IM-NEXT: sltu t0, a5, a2 -; RV32IM-NEXT: mulhu a2, a4, a6 -; RV32IM-NEXT: add t4, a2, t0 -; RV32IM-NEXT: sub t0, a5, a3 -; RV32IM-NEXT: neg t1, a3 -; RV32IM-NEXT: sltu a5, t0, t1 +; RV32IM-NEXT: lw a1, 8(a1) +; RV32IM-NEXT: li a5, -63 +; RV32IM-NEXT: mulhu a6, a3, a5 +; RV32IM-NEXT: slli a7, a4, 6 +; RV32IM-NEXT: sub a7, a7, a4 +; RV32IM-NEXT: sub a6, a6, a7 +; RV32IM-NEXT: neg a7, a7 +; RV32IM-NEXT: sltu a7, a6, a7 +; RV32IM-NEXT: mulhu t0, a4, a5 +; RV32IM-NEXT: add a7, t0, a7 +; RV32IM-NEXT: sub a6, a6, a3 +; RV32IM-NEXT: neg t0, a3 +; RV32IM-NEXT: sltu t1, a6, t0 ; RV32IM-NEXT: li t2, -1 ; RV32IM-NEXT: mulhu t3, a3, t2 -; RV32IM-NEXT: add a5, t3, a5 -; RV32IM-NEXT: add a5, t4, a5 -; RV32IM-NEXT: sub a2, a5, a4 -; RV32IM-NEXT: slli a1, t5, 6 -; RV32IM-NEXT: sub a1, a1, t5 -; RV32IM-NEXT: add a1, a1, a3 -; RV32IM-NEXT: sub t6, a2, a1 -; RV32IM-NEXT: sltu s0, t6, a2 +; RV32IM-NEXT: add t1, t3, t1 +; RV32IM-NEXT: add t1, a7, t1 +; RV32IM-NEXT: sub t4, t1, a4 +; RV32IM-NEXT: slli t5, a1, 6 +; RV32IM-NEXT: sub t5, t5, a1 +; RV32IM-NEXT: add t5, t5, a3 +; RV32IM-NEXT: sub t6, t4, t5 +; RV32IM-NEXT: sltu s0, t6, t4 ; RV32IM-NEXT: neg s1, a4 -; RV32IM-NEXT: sltu a2, a2, s1 -; RV32IM-NEXT: sltu a5, a5, t4 -; RV32IM-NEXT: mulhu s1, a4, t2 -; RV32IM-NEXT: add a5, s1, a5 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: slli a5, a7, 6 -; RV32IM-NEXT: sub a5, a7, a5 -; RV32IM-NEXT: mulhu s1, t5, a6 -; RV32IM-NEXT: sub s1, s1, t5 -; RV32IM-NEXT: add a5, s1, a5 -; RV32IM-NEXT: sub s1, t3, a3 -; RV32IM-NEXT: sub a4, s1, a4 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: neg a1, a1 -; RV32IM-NEXT: sltu a1, a1, t1 -; RV32IM-NEXT: add a1, a4, a1 +; RV32IM-NEXT: sltu t4, t4, s1 +; RV32IM-NEXT: sltu a7, t1, a7 +; RV32IM-NEXT: mulhu t1, a4, t2 +; RV32IM-NEXT: add a7, t1, a7 +; RV32IM-NEXT: add a7, a7, t4 +; RV32IM-NEXT: slli t1, a2, 6 +; RV32IM-NEXT: sub a2, a2, t1 +; RV32IM-NEXT: mulhu a5, a1, a5 +; RV32IM-NEXT: sub a1, a5, a1 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: sub a2, t3, a3 +; RV32IM-NEXT: sub a2, a2, a4 ; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: neg a2, t5 +; RV32IM-NEXT: sltu a2, a2, t0 +; RV32IM-NEXT: add a1, a1, a2 +; RV32IM-NEXT: add a1, a7, a1 ; RV32IM-NEXT: add a1, a1, s0 ; RV32IM-NEXT: slli a2, a3, 6 ; RV32IM-NEXT: sub a2, a3, a2 ; RV32IM-NEXT: sw a2, 0(a0) -; RV32IM-NEXT: sw t0, 4(a0) +; RV32IM-NEXT: sw a6, 4(a0) ; RV32IM-NEXT: sw t6, 8(a0) ; RV32IM-NEXT: sw a1, 12(a0) ; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload @@ -1417,60 +1415,60 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a3 -; RV32I-NEXT: mv s5, a2 -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a3 +; RV32I-NEXT: mv s3, a2 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: srai s4, a3, 31 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s5 +; RV32I-NEXT: mv a2, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add s1, a0, s1 -; RV32I-NEXT: sltu a0, s1, a0 +; RV32I-NEXT: add s5, a0, s5 +; RV32I-NEXT: sltu a0, s5, a0 ; RV32I-NEXT: add s7, a1, a0 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add a2, a0, s1 +; RV32I-NEXT: add a2, a0, s5 ; RV32I-NEXT: sltu a0, a2, a0 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: add s8, s7, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: mv s9, a0 +; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: mv s6, a1 -; RV32I-NEXT: add s1, a0, s8 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: add s9, a0, s8 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s4 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: add a3, a0, s0 -; RV32I-NEXT: add a2, s1, a3 -; RV32I-NEXT: sltu a4, a2, s1 -; RV32I-NEXT: sltu a5, s1, s9 -; RV32I-NEXT: sltu s1, s8, s7 -; RV32I-NEXT: add s1, s6, s1 -; RV32I-NEXT: add a5, s1, a5 -; RV32I-NEXT: add a1, a1, s5 +; RV32I-NEXT: add a3, a0, s2 +; RV32I-NEXT: add a2, s9, a3 +; RV32I-NEXT: sltu a4, a2, s9 +; RV32I-NEXT: sltu a5, s9, s5 +; RV32I-NEXT: sltu a6, s8, s7 +; RV32I-NEXT: add a6, s6, a6 +; RV32I-NEXT: add a5, a6, a5 +; RV32I-NEXT: add a1, a1, s3 ; RV32I-NEXT: sltu a0, a3, a0 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: add a0, a5, a0 @@ -1492,36 +1490,36 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; ; RV32IM-LABEL: mulhsu_i64: ; RV32IM: # %bb.0: -; RV32IM-NEXT: srai a7, a3, 31 -; RV32IM-NEXT: mulhu a6, a0, a2 -; RV32IM-NEXT: mul a5, a1, a2 -; RV32IM-NEXT: add a4, a5, a6 -; RV32IM-NEXT: sltu a5, a4, a5 +; RV32IM-NEXT: srai a4, a3, 31 +; RV32IM-NEXT: mulhu a5, a0, a2 +; RV32IM-NEXT: mul a6, a1, a2 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: sltu a6, a5, a6 ; RV32IM-NEXT: mulhu a2, a1, a2 -; RV32IM-NEXT: add a6, a2, a5 +; RV32IM-NEXT: add a6, a2, a6 ; RV32IM-NEXT: mul a2, a0, a3 -; RV32IM-NEXT: add a4, a2, a4 -; RV32IM-NEXT: sltu a2, a4, a2 -; RV32IM-NEXT: mulhu a4, a0, a3 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: add a4, a6, a2 -; RV32IM-NEXT: mul a5, a1, a3 -; RV32IM-NEXT: add a2, a5, a4 -; RV32IM-NEXT: mul t1, a7, a0 -; RV32IM-NEXT: add t0, a2, t1 -; RV32IM-NEXT: sltu t2, t0, a2 -; RV32IM-NEXT: sltu a2, a2, a5 -; RV32IM-NEXT: sltu a4, a4, a6 +; RV32IM-NEXT: add a5, a2, a5 +; RV32IM-NEXT: sltu a2, a5, a2 +; RV32IM-NEXT: mulhu a5, a0, a3 +; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: add a5, a6, a2 +; RV32IM-NEXT: mul a7, a1, a3 +; RV32IM-NEXT: add t0, a7, a5 +; RV32IM-NEXT: mul t1, a4, a0 +; RV32IM-NEXT: add a2, t0, t1 +; RV32IM-NEXT: sltu t2, a2, t0 +; RV32IM-NEXT: sltu a7, t0, a7 +; RV32IM-NEXT: sltu a5, a5, a6 ; RV32IM-NEXT: mulhu a3, a1, a3 -; RV32IM-NEXT: add a3, a3, a4 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: mul a1, a7, a1 -; RV32IM-NEXT: mulhu a0, a7, a0 +; RV32IM-NEXT: add a3, a3, a5 +; RV32IM-NEXT: add a3, a3, a7 +; RV32IM-NEXT: mul a1, a4, a1 +; RV32IM-NEXT: mulhu a0, a4, a0 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: add a0, a0, t1 -; RV32IM-NEXT: add a0, a2, a0 +; RV32IM-NEXT: add a0, a3, a0 ; RV32IM-NEXT: add a1, a0, t2 -; RV32IM-NEXT: mv a0, t0 +; RV32IM-NEXT: mv a0, a2 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: mulhsu_i64: diff --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll index 46f7a4dbda60c..b6375a91413b8 100644 --- a/llvm/test/CodeGen/RISCV/remat.ll +++ b/llvm/test/CodeGen/RISCV/remat.ll @@ -37,16 +37,16 @@ define i32 @test() nounwind { ; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui s6, %hi(a) -; RV32I-NEXT: lw a0, %lo(a)(s6) +; RV32I-NEXT: lui s0, %hi(a) +; RV32I-NEXT: lw a0, %lo(a)(s0) ; RV32I-NEXT: beqz a0, .LBB0_11 ; RV32I-NEXT: # %bb.1: # %for.body.preheader -; RV32I-NEXT: lui s2, %hi(l) -; RV32I-NEXT: lui s3, %hi(k) -; RV32I-NEXT: lui s4, %hi(j) -; RV32I-NEXT: lui s5, %hi(i) -; RV32I-NEXT: lui s1, %hi(d) -; RV32I-NEXT: lui s0, %hi(e) +; RV32I-NEXT: lui s1, %hi(l) +; RV32I-NEXT: lui s2, %hi(k) +; RV32I-NEXT: lui s3, %hi(j) +; RV32I-NEXT: lui s4, %hi(i) +; RV32I-NEXT: lui s5, %hi(d) +; RV32I-NEXT: lui s6, %hi(e) ; RV32I-NEXT: lui s7, %hi(f) ; RV32I-NEXT: lui s8, %hi(g) ; RV32I-NEXT: lui s9, %hi(h) @@ -55,56 +55,56 @@ define i32 @test() nounwind { ; RV32I-NEXT: j .LBB0_3 ; RV32I-NEXT: .LBB0_2: # %for.inc ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(a)(s6) +; RV32I-NEXT: lw a0, %lo(a)(s0) ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: sw a0, %lo(a)(s6) +; RV32I-NEXT: sw a0, %lo(a)(s0) ; RV32I-NEXT: beqz a0, .LBB0_11 ; RV32I-NEXT: .LBB0_3: # %for.body ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: lw a1, %lo(l)(s2) +; RV32I-NEXT: lw a1, %lo(l)(s1) ; RV32I-NEXT: beqz a1, .LBB0_5 ; RV32I-NEXT: # %bb.4: # %if.then ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 ; RV32I-NEXT: lw a1, %lo(b)(s11) ; RV32I-NEXT: lw a2, %lo(c)(s10) -; RV32I-NEXT: lw a3, %lo(d)(s1) -; RV32I-NEXT: lw a4, %lo(e)(s0) +; RV32I-NEXT: lw a3, %lo(d)(s5) +; RV32I-NEXT: lw a4, %lo(e)(s6) ; RV32I-NEXT: li a5, 32 ; RV32I-NEXT: call foo@plt ; RV32I-NEXT: .LBB0_5: # %if.end ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(k)(s3) +; RV32I-NEXT: lw a0, %lo(k)(s2) ; RV32I-NEXT: beqz a0, .LBB0_7 ; RV32I-NEXT: # %bb.6: # %if.then3 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 ; RV32I-NEXT: lw a0, %lo(b)(s11) ; RV32I-NEXT: lw a1, %lo(c)(s10) -; RV32I-NEXT: lw a2, %lo(d)(s1) -; RV32I-NEXT: lw a3, %lo(e)(s0) +; RV32I-NEXT: lw a2, %lo(d)(s5) +; RV32I-NEXT: lw a3, %lo(e)(s6) ; RV32I-NEXT: lw a4, %lo(f)(s7) ; RV32I-NEXT: li a5, 64 ; RV32I-NEXT: call foo@plt ; RV32I-NEXT: .LBB0_7: # %if.end5 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(j)(s4) +; RV32I-NEXT: lw a0, %lo(j)(s3) ; RV32I-NEXT: beqz a0, .LBB0_9 ; RV32I-NEXT: # %bb.8: # %if.then7 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 ; RV32I-NEXT: lw a0, %lo(c)(s10) -; RV32I-NEXT: lw a1, %lo(d)(s1) -; RV32I-NEXT: lw a2, %lo(e)(s0) +; RV32I-NEXT: lw a1, %lo(d)(s5) +; RV32I-NEXT: lw a2, %lo(e)(s6) ; RV32I-NEXT: lw a3, %lo(f)(s7) ; RV32I-NEXT: lw a4, %lo(g)(s8) ; RV32I-NEXT: li a5, 32 ; RV32I-NEXT: call foo@plt ; RV32I-NEXT: .LBB0_9: # %if.end9 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(i)(s5) +; RV32I-NEXT: lw a0, %lo(i)(s4) ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.10: # %if.then11 ; RV32I-NEXT: # in Loop: Header=BB0_3 Depth=1 -; RV32I-NEXT: lw a0, %lo(d)(s1) -; RV32I-NEXT: lw a1, %lo(e)(s0) +; RV32I-NEXT: lw a0, %lo(d)(s5) +; RV32I-NEXT: lw a1, %lo(e)(s6) ; RV32I-NEXT: lw a2, %lo(f)(s7) ; RV32I-NEXT: lw a3, %lo(g)(s8) ; RV32I-NEXT: lw a4, %lo(h)(s9) diff --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll index 45a6666d489c7..9659ba49bcbc6 100644 --- a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll +++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll @@ -19,18 +19,18 @@ define half @half_test(half %a, half %b) nounwind { ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s1, a1, -1 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: addi s2, a1, -1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: and a0, s0, s1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __divsf3@plt @@ -51,18 +51,18 @@ define half @half_test(half %a, half %b) nounwind { ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s1, a1, -1 -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: addiw s2, a1, -1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s2, a0 -; RV64I-NEXT: and a0, s0, s1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: and a0, a0, s2 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __divsf3@plt diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll index aab1817aff782..68b355981e2c6 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -183,18 +183,18 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a6, a4, a2 +; RV32I-NEXT: sll a5, a4, a2 ; RV32I-NEXT: bnez a3, .LBB7_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB7_4: ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: not a5, a2 -; RV32I-NEXT: srl a1, a1, a5 -; RV32I-NEXT: or a3, a6, a1 +; RV32I-NEXT: not a6, a2 +; RV32I-NEXT: srl a1, a1, a6 +; RV32I-NEXT: or a3, a5, a1 ; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: srli a1, a4, 1 -; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: srl a1, a1, a6 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: ret @@ -208,18 +208,18 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a4, a0 ; RV32ZBB-NEXT: .LBB7_2: -; RV32ZBB-NEXT: sll a6, a4, a2 +; RV32ZBB-NEXT: sll a5, a4, a2 ; RV32ZBB-NEXT: bnez a3, .LBB7_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: mv a0, a1 ; RV32ZBB-NEXT: .LBB7_4: ; RV32ZBB-NEXT: srli a1, a0, 1 -; RV32ZBB-NEXT: not a5, a2 -; RV32ZBB-NEXT: srl a1, a1, a5 -; RV32ZBB-NEXT: or a3, a6, a1 +; RV32ZBB-NEXT: not a6, a2 +; RV32ZBB-NEXT: srl a1, a1, a6 +; RV32ZBB-NEXT: or a3, a5, a1 ; RV32ZBB-NEXT: sll a0, a0, a2 ; RV32ZBB-NEXT: srli a1, a4, 1 -; RV32ZBB-NEXT: srl a1, a1, a5 +; RV32ZBB-NEXT: srl a1, a1, a6 ; RV32ZBB-NEXT: or a1, a0, a1 ; RV32ZBB-NEXT: mv a0, a3 ; RV32ZBB-NEXT: ret @@ -233,18 +233,18 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; RV32ZBP-NEXT: # %bb.1: ; RV32ZBP-NEXT: mv a4, a0 ; RV32ZBP-NEXT: .LBB7_2: -; RV32ZBP-NEXT: sll a6, a4, a2 +; RV32ZBP-NEXT: sll a5, a4, a2 ; RV32ZBP-NEXT: bnez a3, .LBB7_4 ; RV32ZBP-NEXT: # %bb.3: ; RV32ZBP-NEXT: mv a0, a1 ; RV32ZBP-NEXT: .LBB7_4: ; RV32ZBP-NEXT: srli a1, a0, 1 -; RV32ZBP-NEXT: not a5, a2 -; RV32ZBP-NEXT: srl a1, a1, a5 -; RV32ZBP-NEXT: or a3, a6, a1 +; RV32ZBP-NEXT: not a6, a2 +; RV32ZBP-NEXT: srl a1, a1, a6 +; RV32ZBP-NEXT: or a3, a5, a1 ; RV32ZBP-NEXT: sll a0, a0, a2 ; RV32ZBP-NEXT: srli a1, a4, 1 -; RV32ZBP-NEXT: srl a1, a1, a5 +; RV32ZBP-NEXT: srl a1, a1, a6 ; RV32ZBP-NEXT: or a1, a0, a1 ; RV32ZBP-NEXT: mv a0, a3 ; RV32ZBP-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index f4e5efaaa36d9..d5d6b3ab0b893 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -73,8 +73,8 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 @@ -88,14 +88,14 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s1, a1, 819 -; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -103,12 +103,12 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s0, a1, 257 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, s4, 1 -; RV32I-NEXT: or a0, s4, a0 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: srli a0, s2, 1 +; RV32I-NEXT: or a0, s2, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -119,24 +119,24 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s3, .LBB1_2 +; RV32I-NEXT: bnez s0, .LBB1_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB1_3 ; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s1, 24 ; RV32I-NEXT: .LBB1_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -227,21 +227,21 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s3, a1 -; RV32I-NEXT: mv s4, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: not a1, s4 +; RV32I-NEXT: not a1, s2 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s5, a2, 1365 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: addi s4, a2, 1365 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s5, a1, 819 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -249,32 +249,32 @@ define i64 @cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: addi s6, a1, -241 ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: addi s3, a1, 257 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: addi a0, s3, -1 -; RV32I-NEXT: not a1, s3 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: addi a0, s1, -1 +; RV32I-NEXT: not a1, s1 ; RV32I-NEXT: and a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s5 +; RV32I-NEXT: and a1, a1, s4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: and a1, a0, s5 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s5 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s4, .LBB3_2 +; RV32I-NEXT: bnez s2, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srli a0, s2, 24 +; RV32I-NEXT: srli a0, s0, 24 ; RV32I-NEXT: .LBB3_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -356,17 +356,17 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: srli a0, a1, 1 ; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: addi s2, a2, 1365 +; RV32I-NEXT: and a0, a0, s2 ; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s0, a1, 819 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: addi s3, a1, 819 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 @@ -378,12 +378,12 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: call __mulsi3@plt ; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s2, a0 -; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: srli a0, s0, 1 +; RV32I-NEXT: and a0, a0, s2 +; RV32I-NEXT: sub a0, s0, a0 +; RV32I-NEXT: and a1, a0, s3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll index 026a27b691196..1df371c407d77 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -39,18 +39,18 @@ define i64 @gorc1_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 1 ; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc1_i64: @@ -102,18 +102,18 @@ define i64 @gorc2_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 2 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc2_i64: @@ -181,34 +181,34 @@ define i64 @gorc3_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a0, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srli a5, a1, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 2 ; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 2 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc3_i64: @@ -266,18 +266,18 @@ define i64 @gorc4_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc4_i64: @@ -345,34 +345,34 @@ define i64 @gorc5_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a0, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srli a5, a1, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 4 ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc5_i64: @@ -446,34 +446,34 @@ define i64 @gorc6_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a0, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 2 ; RV32I-NEXT: srli a5, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 4 ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc6_i64: @@ -563,50 +563,50 @@ define i64 @gorc7_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 1 ; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: slli a2, a1, 2 ; RV32I-NEXT: slli a3, a0, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a0, 2 ; RV32I-NEXT: srli a5, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: slli a2, a0, 4 ; RV32I-NEXT: slli a3, a1, 4 ; RV32I-NEXT: lui a4, 986895 ; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 4 ; RV32I-NEXT: srli a5, a0, 4 -; RV32I-NEXT: lui a3, 61681 -; RV32I-NEXT: addi a3, a3, -241 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 61681 +; RV32I-NEXT: addi a6, a6, -241 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc7_i64: @@ -670,18 +670,18 @@ define i64 @gorc8_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a1, 8 ; RV32I-NEXT: lui a4, 1044496 ; RV32I-NEXT: addi a4, a4, -256 -; RV32I-NEXT: and a6, a3, a4 +; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 8 ; RV32I-NEXT: srli a5, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: addi a3, a3, 255 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 4080 +; RV32I-NEXT: addi a6, a6, 255 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc8_i64: @@ -830,30 +830,30 @@ define i64 @gorc2b_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a0, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 -; RV32I-NEXT: and a6, a3, a4 -; RV32I-NEXT: and a7, a2, a4 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: srli a3, a1, 2 -; RV32I-NEXT: lui a2, 209715 -; RV32I-NEXT: addi a2, a2, 819 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: and a5, a5, a2 +; RV32I-NEXT: srli a6, a1, 2 +; RV32I-NEXT: lui a7, 209715 +; RV32I-NEXT: addi a7, a7, 819 +; RV32I-NEXT: and a6, a6, a7 +; RV32I-NEXT: and a5, a5, a7 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: slli a3, a0, 2 -; RV32I-NEXT: slli a5, a1, 2 -; RV32I-NEXT: and a6, a5, a4 +; RV32I-NEXT: or a1, a6, a1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 2 ; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: and a5, a5, a2 -; RV32I-NEXT: and a2, a4, a2 -; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: and a5, a5, a7 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc2b_i64: @@ -941,46 +941,46 @@ define i64 @gorc3b_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: lui a4, 699051 ; RV32I-NEXT: addi a4, a4, -1366 -; RV32I-NEXT: and a6, a3, a4 -; RV32I-NEXT: and a7, a2, a4 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a5, a1, 1 -; RV32I-NEXT: srli a3, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi a2, a2, 1365 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: and a5, a5, a2 +; RV32I-NEXT: srli a6, a0, 1 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a6, a6, a7 +; RV32I-NEXT: and a5, a5, a7 ; RV32I-NEXT: or a1, a5, a1 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: slli a6, a1, 2 -; RV32I-NEXT: slli a5, a0, 2 -; RV32I-NEXT: lui a3, 838861 -; RV32I-NEXT: addi a3, a3, -820 -; RV32I-NEXT: and a7, a5, a3 -; RV32I-NEXT: and a6, a6, a3 -; RV32I-NEXT: srli t0, a0, 2 -; RV32I-NEXT: srli a3, a1, 2 -; RV32I-NEXT: lui a5, 209715 -; RV32I-NEXT: addi a5, a5, 819 +; RV32I-NEXT: or a0, a6, a0 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a2, a1, 2 +; RV32I-NEXT: slli a3, a0, 2 +; RV32I-NEXT: lui a5, 838861 +; RV32I-NEXT: addi a5, a5, -820 ; RV32I-NEXT: and a3, a3, a5 -; RV32I-NEXT: and a5, t0, a5 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: srli a5, a0, 2 +; RV32I-NEXT: srli a6, a1, 2 +; RV32I-NEXT: lui t0, 209715 +; RV32I-NEXT: addi t0, t0, 819 +; RV32I-NEXT: and a6, a6, t0 +; RV32I-NEXT: and a5, a5, t0 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: or a0, a0, a7 -; RV32I-NEXT: slli a3, a0, 1 -; RV32I-NEXT: slli a5, a1, 1 -; RV32I-NEXT: and a6, a5, a4 +; RV32I-NEXT: or a1, a6, a1 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a4, a1, 1 ; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: and a5, a5, a2 -; RV32I-NEXT: and a2, a4, a2 -; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: and a5, a5, a7 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc3b_i64: @@ -1818,20 +1818,20 @@ define i64 @grev2b_i64(i64 %a) nounwind { ; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: slli a6, a1, 2 +; RV32I-NEXT: slli a2, a1, 2 ; RV32I-NEXT: slli a3, a0, 2 -; RV32I-NEXT: lui a2, 838861 -; RV32I-NEXT: addi a2, a2, -820 -; RV32I-NEXT: and a7, a3, a2 -; RV32I-NEXT: and a2, a6, a2 +; RV32I-NEXT: lui a6, 838861 +; RV32I-NEXT: addi a6, a6, -820 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: and a2, a2, a6 ; RV32I-NEXT: srli a1, a1, 2 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: or a1, a2, a1 -; RV32I-NEXT: or a0, a7, a0 +; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: slli a2, a0, 1 ; RV32I-NEXT: slli a3, a1, 1 ; RV32I-NEXT: and a3, a3, a4 @@ -1945,40 +1945,40 @@ define i64 @grev0_i64(i64 %a) nounwind { ; RV32I-NEXT: and a1, a1, a5 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: slli a6, a0, 2 +; RV32I-NEXT: slli a2, a0, 2 ; RV32I-NEXT: slli a3, a1, 2 -; RV32I-NEXT: lui a2, 838861 -; RV32I-NEXT: addi a2, a2, -820 -; RV32I-NEXT: and a7, a3, a2 -; RV32I-NEXT: and a6, a6, a2 +; RV32I-NEXT: lui a6, 838861 +; RV32I-NEXT: addi a6, a6, -820 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: and a2, a2, a6 ; RV32I-NEXT: srli a0, a0, 2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 -; RV32I-NEXT: or t0, a6, a0 -; RV32I-NEXT: or a1, a7, a1 -; RV32I-NEXT: slli a6, a1, 1 -; RV32I-NEXT: slli a0, t0, 1 -; RV32I-NEXT: and a7, a0, a4 -; RV32I-NEXT: and a4, a6, a4 +; RV32I-NEXT: lui a7, 209715 +; RV32I-NEXT: addi a7, a7, 819 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: slli a2, a1, 1 +; RV32I-NEXT: slli a3, a0, 1 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: and a2, a2, a4 ; RV32I-NEXT: srli a1, a1, 1 -; RV32I-NEXT: srli a0, t0, 1 +; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: or a0, a7, a0 -; RV32I-NEXT: slli a4, a0, 2 -; RV32I-NEXT: slli a5, a1, 2 -; RV32I-NEXT: and a5, a5, a2 -; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a3, a1, 2 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: and a2, a2, a6 ; RV32I-NEXT: srli a0, a0, 2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: grev0_i64: @@ -2275,13 +2275,13 @@ define i64 @bitreverse_i64(i64 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: srli a2, a1, 8 ; RV32I-NEXT: lui a3, 16 -; RV32I-NEXT: addi a7, a3, -256 -; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: slli a4, a1, 8 -; RV32I-NEXT: lui a6, 4080 -; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: lui a5, 4080 +; RV32I-NEXT: and a4, a4, a5 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: or a1, a1, a2 @@ -2293,27 +2293,27 @@ define i64 @bitreverse_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a1, a1, 4 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: srli a2, a1, 2 -; RV32I-NEXT: lui a5, 209715 -; RV32I-NEXT: addi a5, a5, 819 -; RV32I-NEXT: and a2, a2, a5 -; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: srli a2, a1, 1 -; RV32I-NEXT: lui a3, 349525 -; RV32I-NEXT: addi a3, a3, 1365 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: and a1, a1, a7 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or t0, a2, a1 +; RV32I-NEXT: or a2, a2, a1 ; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: and a1, a1, a7 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: and a3, a3, a5 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: and a1, a1, a4 @@ -2321,16 +2321,16 @@ define i64 @bitreverse_i64(i64 %a) nounwind { ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 -; RV32I-NEXT: and a1, a1, a5 -; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: and a0, a0, a6 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: or a1, a1, a0 -; RV32I-NEXT: mv a0, t0 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: bitreverse_i64: @@ -2462,13 +2462,13 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV32I: # %bb.0: ; RV32I-NEXT: srli a3, a1, 8 ; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a7, a2, -256 -; RV32I-NEXT: and a3, a3, a7 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a3, a3, a2 ; RV32I-NEXT: srli a4, a1, 24 ; RV32I-NEXT: or a4, a3, a4 ; RV32I-NEXT: slli a5, a1, 8 -; RV32I-NEXT: lui a6, 4080 -; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: and a5, a5, a3 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a5 ; RV32I-NEXT: or a1, a1, a4 @@ -2480,58 +2480,58 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV32I-NEXT: slli a1, a1, 4 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: srli a4, a1, 2 -; RV32I-NEXT: lui a3, 209715 -; RV32I-NEXT: addi a3, a3, 819 -; RV32I-NEXT: and a4, a4, a3 -; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: slli a1, a1, 2 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: srli a4, a1, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi a2, a2, 1365 -; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: and a1, a1, a7 ; RV32I-NEXT: slli a1, a1, 1 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: srli a4, a0, 8 -; RV32I-NEXT: and t0, a4, a7 -; RV32I-NEXT: srli a4, a0, 24 -; RV32I-NEXT: or t0, t0, a4 -; RV32I-NEXT: slli a4, a0, 8 -; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: srli t0, a0, 24 +; RV32I-NEXT: or a4, a4, t0 +; RV32I-NEXT: slli t0, a0, 8 +; RV32I-NEXT: and t0, t0, a3 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: srli a4, a0, 4 ; RV32I-NEXT: and a4, a4, a5 ; RV32I-NEXT: and a0, a0, a5 ; RV32I-NEXT: slli a0, a0, 4 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: srli a4, a0, 2 -; RV32I-NEXT: and a4, a4, a3 -; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a0, a0, a6 ; RV32I-NEXT: slli a0, a0, 2 ; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: srli a3, a0, 1 -; RV32I-NEXT: and a3, a3, a2 -; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a4, a0, 1 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: srli a2, a0, 8 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: slli a3, a0, 8 -; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a5, a0, 8 +; RV32I-NEXT: and a5, a5, a3 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: srli a2, a1, 8 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: srli a3, a1, 24 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: slli a3, a1, 8 -; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srli a4, a1, 8 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: and a3, a4, a3 ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: or a1, a1, a2 @@ -2584,16 +2584,16 @@ define i64 @shfl1_i64(i64 %a, i64 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 629146 ; RV32I-NEXT: addi a2, a2, -1639 -; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a3, a0, a2 ; RV32I-NEXT: and a2, a1, a2 ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: slli a5, a0, 1 -; RV32I-NEXT: lui a3, 279620 -; RV32I-NEXT: addi a3, a3, 1092 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: or a3, a6, a5 +; RV32I-NEXT: lui a6, 279620 +; RV32I-NEXT: addi a6, a6, 1092 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: srli a1, a1, 1 ; RV32I-NEXT: lui a4, 139810 @@ -2656,16 +2656,16 @@ define i64 @shfl2_i64(i64 %a, i64 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 801852 ; RV32I-NEXT: addi a2, a2, 963 -; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a3, a0, a2 ; RV32I-NEXT: and a2, a1, a2 ; RV32I-NEXT: slli a4, a1, 2 ; RV32I-NEXT: slli a5, a0, 2 -; RV32I-NEXT: lui a3, 197379 -; RV32I-NEXT: addi a3, a3, 48 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: or a3, a6, a5 +; RV32I-NEXT: lui a6, 197379 +; RV32I-NEXT: addi a6, a6, 48 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: srli a0, a0, 2 ; RV32I-NEXT: srli a1, a1, 2 ; RV32I-NEXT: lui a4, 49345 @@ -2728,24 +2728,24 @@ define i64 @shfl4_i64(i64 %a, i64 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 983295 ; RV32I-NEXT: addi a2, a2, 15 -; RV32I-NEXT: and a6, a1, a2 +; RV32I-NEXT: and a3, a1, a2 ; RV32I-NEXT: and a2, a0, a2 ; RV32I-NEXT: slli a4, a1, 4 ; RV32I-NEXT: slli a5, a0, 4 -; RV32I-NEXT: lui a3, 61441 -; RV32I-NEXT: addi a3, a3, -256 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: lui a6, 61441 +; RV32I-NEXT: addi a6, a6, -256 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 ; RV32I-NEXT: srli a1, a1, 4 ; RV32I-NEXT: srli a0, a0, 4 -; RV32I-NEXT: lui a4, 3840 -; RV32I-NEXT: addi a4, a4, 240 -; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: and a1, a1, a4 -; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: lui a6, 3840 +; RV32I-NEXT: addi a6, a6, 240 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a1, a1, a3 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: shfl4_i64: @@ -2799,22 +2799,22 @@ define i64 @shfl8_i64(i64 %a, i64 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 1044480 ; RV32I-NEXT: addi a2, a2, 255 -; RV32I-NEXT: and a6, a0, a2 +; RV32I-NEXT: and a3, a0, a2 ; RV32I-NEXT: and a2, a1, a2 ; RV32I-NEXT: slli a4, a0, 8 ; RV32I-NEXT: slli a5, a1, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a5, a5, a3 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: lui a6, 4080 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: and a4, a4, a6 ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: lui a4, 16 -; RV32I-NEXT: addi a4, a4, -256 -; RV32I-NEXT: and a0, a0, a4 -; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: lui a6, 16 +; RV32I-NEXT: addi a6, a6, -256 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: and a1, a1, a6 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a6 -; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rv32zbt.ll b/llvm/test/CodeGen/RISCV/rv32zbt.ll index 1582b4f62265b..2e6c7da552e8d 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbt.ll @@ -341,14 +341,14 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: fshl_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: srli a5, a4, 5 -; RV32I-NEXT: andi a5, a5, 1 -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: bnez a5, .LBB13_2 +; RV32I-NEXT: andi a6, a5, 1 +; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: bnez a6, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a6, a0 +; RV32I-NEXT: mv a5, a0 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: sll a7, a6, a4 -; RV32I-NEXT: bnez a5, .LBB13_4 +; RV32I-NEXT: sll a7, a5, a4 +; RV32I-NEXT: bnez a6, .LBB13_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: .LBB13_4: @@ -356,12 +356,12 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-NEXT: not a3, a4 ; RV32I-NEXT: srl a2, a2, a3 ; RV32I-NEXT: or a2, a7, a2 -; RV32I-NEXT: bnez a5, .LBB13_6 +; RV32I-NEXT: bnez a6, .LBB13_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB13_6: ; RV32I-NEXT: sll a0, a0, a4 -; RV32I-NEXT: srli a1, a6, 1 +; RV32I-NEXT: srli a1, a5, 1 ; RV32I-NEXT: srl a1, a1, a3 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 @@ -420,24 +420,24 @@ define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srl a6, a2, a4 +; RV32I-NEXT: srl a2, a2, a4 ; RV32I-NEXT: beqz a5, .LBB15_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: mv a3, a0 ; RV32I-NEXT: .LBB15_4: ; RV32I-NEXT: slli a7, a3, 1 -; RV32I-NEXT: not t0, a4 -; RV32I-NEXT: sll a2, a7, t0 -; RV32I-NEXT: or a6, a2, a6 +; RV32I-NEXT: not a6, a4 +; RV32I-NEXT: sll a7, a7, a6 +; RV32I-NEXT: or a2, a7, a2 ; RV32I-NEXT: srl a3, a3, a4 ; RV32I-NEXT: beqz a5, .LBB15_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB15_6: ; RV32I-NEXT: slli a0, a0, 1 -; RV32I-NEXT: sll a0, a0, t0 +; RV32I-NEXT: sll a0, a0, a6 ; RV32I-NEXT: or a1, a0, a3 -; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV32ZBT-LABEL: fshr_i64: diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll index 198aea298be2b..a99bd2843f735 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll @@ -13,15 +13,15 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind { ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; CHECK-NEXT: srli s2, a0, 32 +; CHECK-NEXT: srli s0, a0, 32 ; CHECK-NEXT: srli s1, a1, 32 ; CHECK-NEXT: call __addsf3@plt -; CHECK-NEXT: mv s0, a0 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, s1 ; CHECK-NEXT: call __addsf3@plt ; CHECK-NEXT: slli a0, a0, 32 -; CHECK-NEXT: slli a1, s0, 32 +; CHECK-NEXT: slli a1, s2, 32 ; CHECK-NEXT: srli a1, a1, 32 ; CHECK-NEXT: or a0, a0, a1 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll index 674ffcff180de..260892285643a 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -2408,38 +2408,38 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV64I-LABEL: bitreverse_bswap_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a6, 4080 -; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 ; RV64I-NEXT: srli a3, a0, 8 ; RV64I-NEXT: li a4, 255 -; RV64I-NEXT: slli a7, a4, 24 -; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: slli a5, a4, 24 +; RV64I-NEXT: and a3, a3, a5 ; RV64I-NEXT: or a1, a3, a1 ; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: lui a2, 16 -; RV64I-NEXT: addiw a2, a2, -256 -; RV64I-NEXT: and a3, a3, a2 -; RV64I-NEXT: srli a5, a0, 56 -; RV64I-NEXT: or a3, a3, a5 +; RV64I-NEXT: lui a6, 16 +; RV64I-NEXT: addiw a6, a6, -256 +; RV64I-NEXT: and a3, a3, a6 +; RV64I-NEXT: srli a7, a0, 56 +; RV64I-NEXT: or a3, a3, a7 ; RV64I-NEXT: or a1, a1, a3 ; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: slli t0, a4, 40 -; RV64I-NEXT: and a3, a3, t0 -; RV64I-NEXT: srliw a5, a0, 24 -; RV64I-NEXT: slli a5, a5, 32 -; RV64I-NEXT: or a3, a3, a5 -; RV64I-NEXT: slli a5, a0, 40 +; RV64I-NEXT: slli a7, a4, 40 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: srliw t0, a0, 24 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a3, a3, t0 +; RV64I-NEXT: slli t0, a0, 40 ; RV64I-NEXT: slli a4, a4, 48 -; RV64I-NEXT: and a5, a5, a4 +; RV64I-NEXT: and t0, t0, a4 ; RV64I-NEXT: slli a0, a0, 56 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: lui a5, %hi(.LCPI68_0) -; RV64I-NEXT: ld a5, %lo(.LCPI68_0)(a5) +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: lui t0, %hi(.LCPI68_0) +; RV64I-NEXT: ld t0, %lo(.LCPI68_0)(t0) ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: and a1, a1, a5 -; RV64I-NEXT: and a0, a0, a5 +; RV64I-NEXT: and a1, a1, t0 +; RV64I-NEXT: and a0, a0, t0 ; RV64I-NEXT: lui a3, %hi(.LCPI68_1) ; RV64I-NEXT: ld a3, %lo(.LCPI68_1)(a3) ; RV64I-NEXT: slli a0, a0, 4 @@ -2457,17 +2457,17 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 40 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 56 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 24 -; RV64I-NEXT: and a2, a2, a6 +; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: and a2, a3, a2 ; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: and a3, a3, a5 ; RV64I-NEXT: or a2, a3, a2 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: slli a2, a0, 24 -; RV64I-NEXT: and a2, a2, t0 +; RV64I-NEXT: and a2, a2, a7 ; RV64I-NEXT: srliw a3, a0, 24 ; RV64I-NEXT: slli a3, a3, 32 ; RV64I-NEXT: or a2, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index 0955eba777f01..ca0abc5e40031 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -103,76 +103,76 @@ define fastcc @ret_split_nxv128i32(* %x ; CHECK-NEXT: slli a2, a2, 5 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a6, a2, 3 -; CHECK-NEXT: add a4, a1, a6 +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a4, a1, a3 ; CHECK-NEXT: vl8re32.v v8, (a4) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: li a4, 24 -; CHECK-NEXT: mul a3, a3, a4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: slli a7, a2, 4 -; CHECK-NEXT: add a5, a1, a7 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: li a5, 24 +; CHECK-NEXT: mul a4, a4, a5 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: add a5, a1, a4 ; CHECK-NEXT: vl8re32.v v8, (a5) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 4 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul t1, a2, a5 -; CHECK-NEXT: add a3, a1, t1 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: slli t3, a2, 5 -; CHECK-NEXT: add a4, a1, t3 -; CHECK-NEXT: vl8re32.v v8, (a4) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: li a4, 40 -; CHECK-NEXT: mul a4, a2, a4 -; CHECK-NEXT: add t0, a1, a4 -; CHECK-NEXT: li a5, 48 ; CHECK-NEXT: mul a5, a2, a5 -; CHECK-NEXT: add t2, a1, a5 -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: add a6, a1, a5 +; CHECK-NEXT: vl8re32.v v8, (a6) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 3 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: slli a6, a2, 5 +; CHECK-NEXT: add a7, a1, a6 +; CHECK-NEXT: vl8re32.v v8, (a7) +; CHECK-NEXT: addi a7, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill +; CHECK-NEXT: li a7, 40 +; CHECK-NEXT: mul a7, a2, a7 +; CHECK-NEXT: add t0, a1, a7 +; CHECK-NEXT: li t1, 48 +; CHECK-NEXT: mul t1, a2, t1 +; CHECK-NEXT: add t2, a1, t1 +; CHECK-NEXT: li t3, 56 +; CHECK-NEXT: mul a2, a2, t3 +; CHECK-NEXT: add t3, a1, a2 ; CHECK-NEXT: vl8re32.v v8, (a1) ; CHECK-NEXT: vl8re32.v v0, (t0) -; CHECK-NEXT: vl8re32.v v16, (a3) +; CHECK-NEXT: vl8re32.v v16, (t3) ; CHECK-NEXT: vl8re32.v v24, (t2) ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: add a1, a0, a2 ; CHECK-NEXT: vs8r.v v16, (a1) -; CHECK-NEXT: add a1, a0, a5 +; CHECK-NEXT: add a1, a0, t1 ; CHECK-NEXT: vs8r.v v24, (a1) -; CHECK-NEXT: add a1, a0, a4 +; CHECK-NEXT: add a1, a0, a7 ; CHECK-NEXT: vs8r.v v0, (a1) -; CHECK-NEXT: add a1, a0, t3 +; CHECK-NEXT: add a1, a0, a6 ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: add a1, a0, t1 +; CHECK-NEXT: add a1, a0, a5 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: add a1, a0, a7 +; CHECK-NEXT: add a1, a0, a4 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: add a0, a0, a6 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll index e563b0834d607..bde5c515a1425 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll @@ -32,17 +32,17 @@ define void @gather(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) ; CHECK-ASM-LABEL: gather: ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, 5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB0_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-ASM-NEXT: vlse8.v v8, (a1), a4 -; CHECK-ASM-NEXT: add a3, a0, a2 -; CHECK-ASM-NEXT: vle8.v v9, (a3) +; CHECK-ASM-NEXT: add a6, a0, a2 +; CHECK-ASM-NEXT: vle8.v v9, (a6) ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 -; CHECK-ASM-NEXT: vse8.v v8, (a3) +; CHECK-ASM-NEXT: vse8.v v8, (a6) ; CHECK-ASM-NEXT: addi a2, a2, 32 ; CHECK-ASM-NEXT: addi a1, a1, 160 ; CHECK-ASM-NEXT: bne a2, a5, .LBB0_1 @@ -101,18 +101,18 @@ define void @gather_masked(i8* noalias nocapture %A, i8* noalias nocapture reado ; CHECK-ASM-NEXT: addiw a3, a3, 873 ; CHECK-ASM-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; CHECK-ASM-NEXT: vmv.s.x v0, a3 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, 5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB1_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-ASM-NEXT: vmv1r.v v9, v8 ; CHECK-ASM-NEXT: vlse8.v v9, (a1), a4, v0.t -; CHECK-ASM-NEXT: add a3, a0, a2 -; CHECK-ASM-NEXT: vle8.v v10, (a3) +; CHECK-ASM-NEXT: add a6, a0, a2 +; CHECK-ASM-NEXT: vle8.v v10, (a6) ; CHECK-ASM-NEXT: vadd.vv v9, v10, v9 -; CHECK-ASM-NEXT: vse8.v v9, (a3) +; CHECK-ASM-NEXT: vse8.v v9, (a6) ; CHECK-ASM-NEXT: addi a2, a2, 32 ; CHECK-ASM-NEXT: addi a1, a1, 160 ; CHECK-ASM-NEXT: bne a2, a5, .LBB1_1 @@ -168,17 +168,17 @@ define void @gather_negative_stride(i8* noalias nocapture %A, i8* noalias nocapt ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 ; CHECK-ASM-NEXT: addi a1, a1, 155 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, -5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB2_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu ; CHECK-ASM-NEXT: vlse8.v v8, (a1), a4 -; CHECK-ASM-NEXT: add a3, a0, a2 -; CHECK-ASM-NEXT: vle8.v v9, (a3) +; CHECK-ASM-NEXT: add a6, a0, a2 +; CHECK-ASM-NEXT: vle8.v v9, (a6) ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 -; CHECK-ASM-NEXT: vse8.v v8, (a3) +; CHECK-ASM-NEXT: vse8.v v8, (a6) ; CHECK-ASM-NEXT: addi a2, a2, 32 ; CHECK-ASM-NEXT: addi a1, a1, 160 ; CHECK-ASM-NEXT: bne a2, a5, .LBB2_1 @@ -303,14 +303,14 @@ define void @scatter(i8* noalias nocapture %A, i8* noalias nocapture readonly %B ; CHECK-ASM-LABEL: scatter: ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: li a4, 5 ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB4_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: add a3, a1, a2 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu -; CHECK-ASM-NEXT: vle8.v v8, (a3) +; CHECK-ASM-NEXT: add a6, a1, a2 +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu +; CHECK-ASM-NEXT: vle8.v v8, (a6) ; CHECK-ASM-NEXT: vlse8.v v9, (a0), a4 ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 ; CHECK-ASM-NEXT: vsse8.v v8, (a0), a4 @@ -369,7 +369,7 @@ define void @scatter_masked(i8* noalias nocapture %A, i8* noalias nocapture read ; CHECK-ASM-LABEL: scatter_masked: ; CHECK-ASM: # %bb.0: # %entry ; CHECK-ASM-NEXT: li a2, 0 -; CHECK-ASM-NEXT: li a6, 32 +; CHECK-ASM-NEXT: li a3, 32 ; CHECK-ASM-NEXT: lui a4, 983765 ; CHECK-ASM-NEXT: addiw a4, a4, 873 ; CHECK-ASM-NEXT: vsetivli zero, 1, e32, mf2, ta, mu @@ -378,9 +378,9 @@ define void @scatter_masked(i8* noalias nocapture %A, i8* noalias nocapture read ; CHECK-ASM-NEXT: li a5, 1024 ; CHECK-ASM-NEXT: .LBB5_1: # %vector.body ; CHECK-ASM-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: add a3, a1, a2 -; CHECK-ASM-NEXT: vsetvli zero, a6, e8, m1, ta, mu -; CHECK-ASM-NEXT: vle8.v v9, (a3) +; CHECK-ASM-NEXT: add a6, a1, a2 +; CHECK-ASM-NEXT: vsetvli zero, a3, e8, m1, ta, mu +; CHECK-ASM-NEXT: vle8.v v9, (a6) ; CHECK-ASM-NEXT: vmv1r.v v10, v8 ; CHECK-ASM-NEXT: vlse8.v v10, (a0), a4, v0.t ; CHECK-ASM-NEXT: vadd.vv v9, v10, v9 @@ -1047,43 +1047,43 @@ define void @strided_load_startval_add_with_splat(i8* noalias nocapture %0, i8* ; CHECK-ASM-NEXT: # %bb.2: ; CHECK-ASM-NEXT: slli a3, a4, 32 ; CHECK-ASM-NEXT: srli a3, a3, 32 -; CHECK-ASM-NEXT: addi a6, a3, 1 -; CHECK-ASM-NEXT: andi a7, a6, -32 -; CHECK-ASM-NEXT: add a3, a7, a2 -; CHECK-ASM-NEXT: slli a4, a2, 2 -; CHECK-ASM-NEXT: add a4, a4, a2 +; CHECK-ASM-NEXT: addi a4, a3, 1 +; CHECK-ASM-NEXT: andi a5, a4, -32 +; CHECK-ASM-NEXT: add a3, a5, a2 +; CHECK-ASM-NEXT: slli a6, a2, 2 +; CHECK-ASM-NEXT: add a6, a6, a2 ; CHECK-ASM-NEXT: add a2, a0, a2 -; CHECK-ASM-NEXT: add a4, a1, a4 -; CHECK-ASM-NEXT: li t0, 32 -; CHECK-ASM-NEXT: li t1, 5 -; CHECK-ASM-NEXT: mv a5, a7 +; CHECK-ASM-NEXT: add a6, a1, a6 +; CHECK-ASM-NEXT: li a7, 32 +; CHECK-ASM-NEXT: li t0, 5 +; CHECK-ASM-NEXT: mv t1, a5 ; CHECK-ASM-NEXT: .LBB12_3: # =>This Inner Loop Header: Depth=1 -; CHECK-ASM-NEXT: vsetvli zero, t0, e8, m1, ta, mu -; CHECK-ASM-NEXT: vlse8.v v8, (a4), t1 +; CHECK-ASM-NEXT: vsetvli zero, a7, e8, m1, ta, mu +; CHECK-ASM-NEXT: vlse8.v v8, (a6), t0 ; CHECK-ASM-NEXT: vle8.v v9, (a2) ; CHECK-ASM-NEXT: vadd.vv v8, v9, v8 ; CHECK-ASM-NEXT: vse8.v v8, (a2) -; CHECK-ASM-NEXT: addi a5, a5, -32 +; CHECK-ASM-NEXT: addi t1, t1, -32 ; CHECK-ASM-NEXT: addi a2, a2, 32 -; CHECK-ASM-NEXT: addi a4, a4, 160 -; CHECK-ASM-NEXT: bnez a5, .LBB12_3 +; CHECK-ASM-NEXT: addi a6, a6, 160 +; CHECK-ASM-NEXT: bnez t1, .LBB12_3 ; CHECK-ASM-NEXT: # %bb.4: -; CHECK-ASM-NEXT: beq a6, a7, .LBB12_7 +; CHECK-ASM-NEXT: beq a4, a5, .LBB12_7 ; CHECK-ASM-NEXT: .LBB12_5: ; CHECK-ASM-NEXT: slli a2, a3, 2 ; CHECK-ASM-NEXT: add a2, a2, a3 ; CHECK-ASM-NEXT: add a1, a1, a2 -; CHECK-ASM-NEXT: li a6, 1024 +; CHECK-ASM-NEXT: li a2, 1024 ; CHECK-ASM-NEXT: .LBB12_6: # =>This Inner Loop Header: Depth=1 ; CHECK-ASM-NEXT: lb a4, 0(a1) ; CHECK-ASM-NEXT: add a5, a0, a3 -; CHECK-ASM-NEXT: lb a2, 0(a5) -; CHECK-ASM-NEXT: addw a2, a2, a4 -; CHECK-ASM-NEXT: sb a2, 0(a5) -; CHECK-ASM-NEXT: addiw a2, a3, 1 +; CHECK-ASM-NEXT: lb a6, 0(a5) +; CHECK-ASM-NEXT: addw a4, a6, a4 +; CHECK-ASM-NEXT: sb a4, 0(a5) +; CHECK-ASM-NEXT: addiw a4, a3, 1 ; CHECK-ASM-NEXT: addi a3, a3, 1 ; CHECK-ASM-NEXT: addi a1, a1, 5 -; CHECK-ASM-NEXT: bne a2, a6, .LBB12_6 +; CHECK-ASM-NEXT: bne a4, a2, .LBB12_6 ; CHECK-ASM-NEXT: .LBB12_7: ; CHECK-ASM-NEXT: ret %4 = icmp eq i32 %2, 1024 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index 65b8911749b36..bde7f07d1b53b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -866,8 +866,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV32-NEXT: lui a6, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV32-NEXT: lui a3, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 @@ -886,10 +886,10 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: lui a3, 349525 -; LMULMAX1-RV32-NEXT: addi a3, a3, 1365 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV32-NEXT: lui a6, 349525 +; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 @@ -897,7 +897,7 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 @@ -912,8 +912,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) @@ -933,8 +933,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: lui a6, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: lui a3, 4080 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 @@ -953,10 +953,10 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: lui a3, 349525 -; LMULMAX1-RV64-NEXT: addiw a3, a3, 1365 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: lui a6, 349525 +; LMULMAX1-RV64-NEXT: addiw a6, a6, 1365 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 @@ -964,7 +964,7 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 @@ -979,8 +979,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a6 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) @@ -1153,23 +1153,23 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a4 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v12, 24 -; LMULMAX1-RV32-NEXT: lui a6, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a6 -; LMULMAX1-RV32-NEXT: li a5, 5 +; LMULMAX1-RV32-NEXT: lui a5, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a5 +; LMULMAX1-RV32-NEXT: li a6, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5 +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: lui a5, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a5, v0 +; LMULMAX1-RV32-NEXT: lui a6, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a6, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 8 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v9 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 ; LMULMAX1-RV32-NEXT: vor.vv v13, v11, v10 -; LMULMAX1-RV32-NEXT: li a5, 255 +; LMULMAX1-RV32-NEXT: li a6, 255 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a6 ; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsll.vi v11, v12, 8 @@ -1183,7 +1183,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vor.vv v14, v15, v14 ; LMULMAX1-RV32-NEXT: vsll.vx v15, v12, a3 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v16, a6 +; LMULMAX1-RV32-NEXT: vmv.v.x v16, a5 ; LMULMAX1-RV32-NEXT: vmerge.vim v16, v16, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v16 @@ -1192,30 +1192,30 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v14 ; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 4 -; LMULMAX1-RV32-NEXT: lui a5, 61681 -; LMULMAX1-RV32-NEXT: addi a5, a5, -241 +; LMULMAX1-RV32-NEXT: lui a6, 61681 +; LMULMAX1-RV32-NEXT: addi a6, a6, -241 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v14, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v14, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v14 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v14 ; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 4 ; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 2 -; LMULMAX1-RV32-NEXT: lui a5, 209715 -; LMULMAX1-RV32-NEXT: addi a5, a5, 819 +; LMULMAX1-RV32-NEXT: lui a6, 209715 +; LMULMAX1-RV32-NEXT: addi a6, a6, 819 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v15, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v15, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v15 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v15 ; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 2 ; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v12, 1 -; LMULMAX1-RV32-NEXT: lui a5, 349525 -; LMULMAX1-RV32-NEXT: addi a5, a5, 1365 +; LMULMAX1-RV32-NEXT: lui a6, 349525 +; LMULMAX1-RV32-NEXT: addi a6, a6, 1365 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v17, a5 +; LMULMAX1-RV32-NEXT: vmv.v.x v17, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v17 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v17 @@ -1226,7 +1226,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a4 ; LMULMAX1-RV32-NEXT: vor.vv v13, v18, v13 ; LMULMAX1-RV32-NEXT: vsrl.vi v18, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a6 +; LMULMAX1-RV32-NEXT: vand.vx v18, v18, a5 ; LMULMAX1-RV32-NEXT: vsrl.vi v19, v8, 8 ; LMULMAX1-RV32-NEXT: vand.vv v9, v19, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v18 @@ -1264,99 +1264,99 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-LABEL: bitreverse_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi a7, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a7) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: li t0, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, t0 -; LMULMAX1-RV64-NEXT: li t1, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, t1 +; LMULMAX1-RV64-NEXT: li a2, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV64-NEXT: li a3, 40 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, a3 ; LMULMAX1-RV64-NEXT: lui a4, 16 -; LMULMAX1-RV64-NEXT: addiw t2, a4, -256 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t2 +; LMULMAX1-RV64-NEXT: addiw a4, a4, -256 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: lui a6, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: lui a5, 4080 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: li a5, 255 -; LMULMAX1-RV64-NEXT: slli t3, a5, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t3 +; LMULMAX1-RV64-NEXT: li a6, 255 +; LMULMAX1-RV64-NEXT: slli a7, a6, 24 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: slli t4, a5, 32 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 +; LMULMAX1-RV64-NEXT: slli t0, a6, 32 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: slli a3, a5, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 +; LMULMAX1-RV64-NEXT: slli t1, a6, 40 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, t0 -; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, t1 -; LMULMAX1-RV64-NEXT: slli a5, a5, 48 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: slli a6, a6, 48 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 -; LMULMAX1-RV64-NEXT: lui a4, %hi(.LCPI5_0) -; LMULMAX1-RV64-NEXT: ld a4, %lo(.LCPI5_0)(a4) +; LMULMAX1-RV64-NEXT: lui t2, %hi(.LCPI5_0) +; LMULMAX1-RV64-NEXT: ld t2, %lo(.LCPI5_0)(t2) ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a4 -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI5_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI5_1)(a1) +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t2 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t2 +; LMULMAX1-RV64-NEXT: lui t3, %hi(.LCPI5_1) +; LMULMAX1-RV64-NEXT: ld t3, %lo(.LCPI5_1)(t3) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 4 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a1 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI5_2) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI5_2)(a2) +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t3 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t3 +; LMULMAX1-RV64-NEXT: lui t4, %hi(.LCPI5_2) +; LMULMAX1-RV64-NEXT: ld t4, %lo(.LCPI5_2)(t4) ; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t4 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, t4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, t0 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, t1 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t2 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t3 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t4 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a3 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, t0 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, t1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a4 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a4 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t2 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t2 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 4 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 2 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a1 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t3 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t3 ; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vand.vx v10, v10, t4 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, t4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a7) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index 9441accb0631a..b27469939cf3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -595,8 +595,8 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a6, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a6 +; LMULMAX1-RV32-NEXT: li a2, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 ; LMULMAX1-RV32-NEXT: li a3, 40 ; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3 ; LMULMAX1-RV32-NEXT: lui a4, 16 @@ -606,21 +606,21 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV32-NEXT: li a2, 5 +; LMULMAX1-RV32-NEXT: li a6, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.i v12, 0 -; LMULMAX1-RV32-NEXT: lui a2, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v12, v12, a2, v0 +; LMULMAX1-RV32-NEXT: lui a6, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v12, v12, a6, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 8 ; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 ; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: li a2, 255 +; LMULMAX1-RV32-NEXT: li a6, 255 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a6 ; LMULMAX1-RV32-NEXT: vmerge.vim v11, v11, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsll.vi v13, v9, 8 @@ -638,11 +638,11 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vmerge.vim v16, v16, 0, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vand.vv v15, v15, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a6 +; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a2 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v15 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v13 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a6 +; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 ; LMULMAX1-RV32-NEXT: vsrl.vx v13, v8, a3 ; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a4 ; LMULMAX1-RV32-NEXT: vor.vv v10, v13, v10 @@ -659,7 +659,7 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a3 ; LMULMAX1-RV32-NEXT: vand.vv v12, v12, v16 -; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a6 +; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a2 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 @@ -670,63 +670,63 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-LABEL: bswap_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: addi t1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (t1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a7, 56 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a7 -; LMULMAX1-RV64-NEXT: li t0, 40 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, t0 +; LMULMAX1-RV64-NEXT: li a2, 56 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 +; LMULMAX1-RV64-NEXT: li a3, 40 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v8, a3 ; LMULMAX1-RV64-NEXT: lui a4, 16 ; LMULMAX1-RV64-NEXT: addiw a4, a4, -256 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV64-NEXT: lui a6, 4080 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: lui a5, 4080 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v8, 8 -; LMULMAX1-RV64-NEXT: li a5, 255 -; LMULMAX1-RV64-NEXT: slli a2, a5, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: li a6, 255 +; LMULMAX1-RV64-NEXT: slli a7, a6, 24 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 8 -; LMULMAX1-RV64-NEXT: slli a3, a5, 32 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: slli t0, a6, 32 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v8, 24 -; LMULMAX1-RV64-NEXT: slli a1, a5, 40 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a1 +; LMULMAX1-RV64-NEXT: slli t1, a6, 40 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a7 -; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, t0 -; LMULMAX1-RV64-NEXT: slli a5, a5, 48 -; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v8, v8, a3 +; LMULMAX1-RV64-NEXT: slli a6, a6, 48 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a7 -; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, t0 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 +; LMULMAX1-RV64-NEXT: vsrl.vx v11, v9, a3 ; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a4 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a6 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV64-NEXT: vsrl.vi v12, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a7 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 ; LMULMAX1-RV64-NEXT: vor.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 8 -; LMULMAX1-RV64-NEXT: vand.vx v11, v11, a3 +; LMULMAX1-RV64-NEXT: vand.vx v11, v11, t0 ; LMULMAX1-RV64-NEXT: vsll.vi v12, v9, 24 -; LMULMAX1-RV64-NEXT: vand.vx v12, v12, a1 +; LMULMAX1-RV64-NEXT: vand.vx v12, v12, t1 ; LMULMAX1-RV64-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a7 -; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, t0 -; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV64-NEXT: vsll.vx v12, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vx v9, v9, a3 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vor.vv v9, v12, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (t1) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index 10bb341a17668..b53641f5cb33c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -1746,8 +1746,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 16 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: li a6, 32 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 32 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) @@ -1763,12 +1763,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) ; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) +; LMULMAX1-RV64-NEXT: lui a6, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a6, %lo(.LCPI7_3)(a6) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: li a7, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1781,7 +1781,7 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 16 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1794,7 +1794,7 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 727e7cd63b86e..2a7680a22acab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -1030,8 +1030,8 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind { ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v8, (a1) ; LMULMAX1-RV32-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a6, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV32-NEXT: li a2, 1 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 @@ -1050,10 +1050,10 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind { ; LMULMAX1-RV32-NEXT: lui a5, 1 ; LMULMAX1-RV32-NEXT: addi a5, a5, -241 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: li a2, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: li a6, 257 +; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 @@ -1066,7 +1066,7 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind { ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 8 ; LMULMAX1-RV32-NEXT: vse16.v v9, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v8, (a1) @@ -1078,8 +1078,8 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind { ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle16.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a6, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 1 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 @@ -1098,10 +1098,10 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind { ; LMULMAX1-RV64-NEXT: lui a5, 1 ; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: li a2, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: li a6, 257 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 8 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1114,7 +1114,7 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 8 ; LMULMAX1-RV64-NEXT: vse16.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v8, (a1) @@ -1228,8 +1228,8 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind { ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) ; LMULMAX1-RV32-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a6, 1 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV32-NEXT: li a2, 1 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 @@ -1248,11 +1248,11 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind { ; LMULMAX1-RV32-NEXT: lui a5, 61681 ; LMULMAX1-RV32-NEXT: addi a5, a5, -241 ; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a2, 257 -; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: lui a6, 4112 +; LMULMAX1-RV32-NEXT: addi a6, a6, 257 +; LMULMAX1-RV32-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV32-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV32-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 1 @@ -1265,7 +1265,7 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind { ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV32-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 24 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) @@ -1277,8 +1277,8 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind { ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle32.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a6, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 1 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 1 @@ -1297,11 +1297,11 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind { ; LMULMAX1-RV64-NEXT: lui a5, 61681 ; LMULMAX1-RV64-NEXT: addiw a5, a5, -241 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: lui a2, 4112 -; LMULMAX1-RV64-NEXT: addiw a2, a2, 257 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: lui a6, 4112 +; LMULMAX1-RV64-NEXT: addiw a6, a6, 257 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1314,7 +1314,7 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vi v9, v9, 24 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) @@ -1512,8 +1512,8 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) ; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: li a6, 1 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a6 +; LMULMAX1-RV64-NEXT: li a2, 1 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v8, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v8, v8, -1 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI7_0) @@ -1529,15 +1529,15 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX1-RV64-NEXT: lui a5, %hi(.LCPI7_2) ; LMULMAX1-RV64-NEXT: ld a5, %lo(.LCPI7_2)(a5) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI7_3) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI7_3)(a2) +; LMULMAX1-RV64-NEXT: lui a6, %hi(.LCPI7_3) +; LMULMAX1-RV64-NEXT: ld a6, %lo(.LCPI7_3)(a6) ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v8, v8, a6 ; LMULMAX1-RV64-NEXT: li a7, 56 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v8, a7 -; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a6 +; LMULMAX1-RV64-NEXT: vsub.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vxor.vi v9, v9, -1 ; LMULMAX1-RV64-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 1 @@ -1550,7 +1550,7 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind { ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a5 -; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vmul.vx v9, v9, a6 ; LMULMAX1-RV64-NEXT: vsrl.vx v9, v9, a7 ; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll index cbccf73c32ed3..d84679b8b0c45 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll @@ -29,25 +29,25 @@ define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; RV32-LABEL: add_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: lw a2, 8(a0) -; RV32-NEXT: lw a6, 12(a0) +; RV32-NEXT: lw a3, 12(a0) ; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a7, 4(a0) -; RV32-NEXT: lw a3, 4(a1) -; RV32-NEXT: lw a5, 0(a1) +; RV32-NEXT: lw a5, 4(a0) +; RV32-NEXT: lw a6, 4(a1) +; RV32-NEXT: lw a7, 0(a1) ; RV32-NEXT: lw t0, 8(a1) ; RV32-NEXT: lw a1, 12(a1) -; RV32-NEXT: add a3, a7, a3 -; RV32-NEXT: add a5, a4, a5 -; RV32-NEXT: sltu a4, a5, a4 -; RV32-NEXT: add a3, a3, a4 -; RV32-NEXT: add a1, a6, a1 -; RV32-NEXT: add a4, a2, t0 -; RV32-NEXT: sltu a2, a4, a2 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a6, a4, a7 +; RV32-NEXT: sltu a4, a6, a4 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a3, a2, t0 +; RV32-NEXT: sltu a2, a3, a2 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: sw a4, 8(a0) -; RV32-NEXT: sw a5, 0(a0) +; RV32-NEXT: sw a3, 8(a0) +; RV32-NEXT: sw a6, 0(a0) ; RV32-NEXT: sw a1, 12(a0) -; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw a4, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: add_v2i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll index 0697170f8d886..9e3988caba6fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll @@ -63,24 +63,24 @@ define void @gather_const_v64f16(<64 x half>* %x) { ; ; LMULMAX1-LABEL: gather_const_v64f16: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a6, a0, 16 -; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a2, a0, 48 ; LMULMAX1-NEXT: addi a3, a0, 32 ; LMULMAX1-NEXT: addi a4, a0, 80 ; LMULMAX1-NEXT: addi a5, a0, 94 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-NEXT: vlse16.v v8, (a5), zero ; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: addi a2, a0, 96 -; LMULMAX1-NEXT: vse16.v v8, (a2) -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: addi a6, a0, 112 +; LMULMAX1-NEXT: addi a7, a0, 96 +; LMULMAX1-NEXT: vse16.v v8, (a7) +; LMULMAX1-NEXT: vse16.v v8, (a6) ; LMULMAX1-NEXT: vse16.v v8, (a5) ; LMULMAX1-NEXT: vse16.v v8, (a4) ; LMULMAX1-NEXT: vse16.v v8, (a3) -; LMULMAX1-NEXT: vse16.v v8, (a7) +; LMULMAX1-NEXT: vse16.v v8, (a2) ; LMULMAX1-NEXT: vse16.v v8, (a0) -; LMULMAX1-NEXT: vse16.v v8, (a6) +; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <64 x half>, <64 x half>* %x %b = extractelement <64 x half> %a, i32 47 @@ -102,24 +102,24 @@ define void @gather_const_v32f32(<32 x float>* %x) { ; ; LMULMAX1-LABEL: gather_const_v32f32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a6, a0, 16 -; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a2, a0, 48 ; LMULMAX1-NEXT: addi a3, a0, 32 ; LMULMAX1-NEXT: addi a4, a0, 80 ; LMULMAX1-NEXT: addi a5, a0, 68 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-NEXT: vlse32.v v8, (a5), zero ; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: addi a2, a0, 96 -; LMULMAX1-NEXT: vse32.v v8, (a2) -; LMULMAX1-NEXT: vse32.v v8, (a1) +; LMULMAX1-NEXT: addi a6, a0, 112 +; LMULMAX1-NEXT: addi a7, a0, 96 +; LMULMAX1-NEXT: vse32.v v8, (a7) +; LMULMAX1-NEXT: vse32.v v8, (a6) ; LMULMAX1-NEXT: vse32.v v8, (a5) ; LMULMAX1-NEXT: vse32.v v8, (a4) ; LMULMAX1-NEXT: vse32.v v8, (a3) -; LMULMAX1-NEXT: vse32.v v8, (a7) +; LMULMAX1-NEXT: vse32.v v8, (a2) ; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: vse32.v v8, (a6) +; LMULMAX1-NEXT: vse32.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <32 x float>, <32 x float>* %x %b = extractelement <32 x float> %a, i32 17 @@ -140,23 +140,23 @@ define void @gather_const_v16f64(<16 x double>* %x) { ; ; LMULMAX1-LABEL: gather_const_v16f64: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi a6, a0, 16 -; LMULMAX1-NEXT: addi a7, a0, 48 +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: addi a2, a0, 48 ; LMULMAX1-NEXT: addi a3, a0, 32 ; LMULMAX1-NEXT: addi a4, a0, 80 ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-NEXT: vlse64.v v8, (a4), zero ; LMULMAX1-NEXT: addi a5, a0, 64 -; LMULMAX1-NEXT: addi a1, a0, 112 -; LMULMAX1-NEXT: addi a2, a0, 96 -; LMULMAX1-NEXT: vse64.v v8, (a2) -; LMULMAX1-NEXT: vse64.v v8, (a1) +; LMULMAX1-NEXT: addi a6, a0, 112 +; LMULMAX1-NEXT: addi a7, a0, 96 +; LMULMAX1-NEXT: vse64.v v8, (a7) +; LMULMAX1-NEXT: vse64.v v8, (a6) ; LMULMAX1-NEXT: vse64.v v8, (a5) ; LMULMAX1-NEXT: vse64.v v8, (a4) ; LMULMAX1-NEXT: vse64.v v8, (a3) -; LMULMAX1-NEXT: vse64.v v8, (a7) +; LMULMAX1-NEXT: vse64.v v8, (a2) ; LMULMAX1-NEXT: vse64.v v8, (a0) -; LMULMAX1-NEXT: vse64.v v8, (a6) +; LMULMAX1-NEXT: vse64.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <16 x double>, <16 x double>* %x %b = extractelement <16 x double> %a, i32 10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 32c073d59241c..6402c25d068cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -548,16 +548,16 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r ; RV32-NEXT: andi a3, a2, 1 ; RV32-NEXT: beqz a3, .LBB8_2 ; RV32-NEXT: # %bb.1: # %cond.load -; RV32-NEXT: lbu a6, 1(a0) -; RV32-NEXT: lbu a7, 0(a0) +; RV32-NEXT: lbu a3, 1(a0) +; RV32-NEXT: lbu a4, 0(a0) ; RV32-NEXT: lbu a5, 3(a0) -; RV32-NEXT: lbu a3, 2(a0) -; RV32-NEXT: slli a4, a6, 8 -; RV32-NEXT: or a4, a4, a7 -; RV32-NEXT: slli a5, a5, 8 -; RV32-NEXT: or a3, a5, a3 -; RV32-NEXT: slli a3, a3, 16 +; RV32-NEXT: lbu a6, 2(a0) +; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: or a3, a3, a4 +; RV32-NEXT: slli a4, a5, 8 +; RV32-NEXT: or a4, a4, a6 +; RV32-NEXT: slli a4, a4, 16 +; RV32-NEXT: or a3, a4, a3 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: andi a2, a2, 2 @@ -608,16 +608,16 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r ; RV64-NEXT: andi a3, a2, 1 ; RV64-NEXT: beqz a3, .LBB8_2 ; RV64-NEXT: # %bb.1: # %cond.load -; RV64-NEXT: lbu a6, 1(a0) -; RV64-NEXT: lbu a7, 0(a0) +; RV64-NEXT: lbu a3, 1(a0) +; RV64-NEXT: lbu a4, 0(a0) ; RV64-NEXT: lb a5, 3(a0) -; RV64-NEXT: lbu a3, 2(a0) -; RV64-NEXT: slli a4, a6, 8 -; RV64-NEXT: or a4, a4, a7 -; RV64-NEXT: slli a5, a5, 8 -; RV64-NEXT: or a3, a5, a3 -; RV64-NEXT: slli a3, a3, 16 +; RV64-NEXT: lbu a6, 2(a0) +; RV64-NEXT: slli a3, a3, 8 ; RV64-NEXT: or a3, a3, a4 +; RV64-NEXT: slli a4, a5, 8 +; RV64-NEXT: or a4, a4, a6 +; RV64-NEXT: slli a4, a4, 16 +; RV64-NEXT: or a3, a4, a3 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; RV64-NEXT: vmv.v.x v8, a3 ; RV64-NEXT: andi a2, a2, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 7ba839edb7527..ad21c399c15f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -421,18 +421,18 @@ define <33 x double> @vpload_v33f64(<33 x double>* %ptr, <33 x i1> %m, i32 zeroe ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: bltu a5, a6, .LBB32_4 +; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: bltu a5, a4, .LBB32_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB32_4: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a4, a1, 256 +; CHECK-NEXT: addi a6, a1, 256 ; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: li a4, 32 -; CHECK-NEXT: bltu a2, a4, .LBB32_6 +; CHECK-NEXT: vle64.v v16, (a6), v0.t +; CHECK-NEXT: li a5, 32 +; CHECK-NEXT: bltu a2, a5, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB32_6: @@ -443,10 +443,10 @@ define <33 x double> @vpload_v33f64(<33 x double>* %ptr, <33 x i1> %m, i32 zeroe ; CHECK-NEXT: .LBB32_8: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu -; CHECK-NEXT: vle64.v v24, (a4), v0.t -; CHECK-NEXT: bltu a2, a6, .LBB32_10 +; CHECK-NEXT: vle64.v v24, (a5), v0.t +; CHECK-NEXT: bltu a2, a4, .LBB32_10 ; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_10: diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll index 78dc3900ebe2c..8ac3073199689 100644 --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -27,13 +27,13 @@ define signext i32 @foo(i32 signext %aa) #0 { ; CHECK-NEXT: lw a6, 24(s1) ; CHECK-NEXT: lw a7, 20(s1) ; CHECK-NEXT: lw t1, 16(s1) -; CHECK-NEXT: lw t2, 12(s1) -; CHECK-NEXT: lw a1, 8(s1) +; CHECK-NEXT: lw a1, 12(s1) +; CHECK-NEXT: lw t2, 8(s1) ; CHECK-NEXT: sw a0, 52(s1) ; CHECK-NEXT: sw a0, 48(s1) ; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd a1, 16(sp) -; CHECK-NEXT: sd t2, 8(sp) +; CHECK-NEXT: sd t2, 16(sp) +; CHECK-NEXT: sd a1, 8(sp) ; CHECK-NEXT: addi a1, s1, 48 ; CHECK-NEXT: sd t1, 0(sp) ; CHECK-NEXT: mv a0, t0 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 7f4d2ec0b8f1e..2acb185d59ead 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -257,33 +257,33 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_mul_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_mul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB7_2 +; CHECK-NEXT: bgeu a3, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB7_5 ; CHECK-NEXT: .LBB7_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB7_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vmul.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB7_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB7_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB7_7 +; CHECK-NEXT: beqz a4, .LBB7_7 ; CHECK-NEXT: .LBB7_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB7_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -350,33 +350,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_add_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_add_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB8_2 +; CHECK-NEXT: bgeu a3, a2, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB8_5 ; CHECK-NEXT: .LBB8_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB8_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vadd.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB8_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB8_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB8_7 +; CHECK-NEXT: beqz a4, .LBB8_7 ; CHECK-NEXT: .LBB8_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB8_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -443,33 +443,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_sub_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB9_2 +; CHECK-NEXT: bgeu a3, a2, .LBB9_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB9_5 ; CHECK-NEXT: .LBB9_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB9_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsub.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB9_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB9_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB9_7 +; CHECK-NEXT: beqz a4, .LBB9_7 ; CHECK-NEXT: .LBB9_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB9_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -536,33 +536,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_rsub_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_rsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB10_2 +; CHECK-NEXT: bgeu a3, a2, .LBB10_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB10_5 ; CHECK-NEXT: .LBB10_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB10_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vrsub.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB10_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB10_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB10_7 +; CHECK-NEXT: beqz a4, .LBB10_7 ; CHECK-NEXT: .LBB10_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB10_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -629,33 +629,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_and_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_and_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB11_2 +; CHECK-NEXT: bgeu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB11_5 ; CHECK-NEXT: .LBB11_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB11_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB11_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB11_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB11_7 +; CHECK-NEXT: beqz a4, .LBB11_7 ; CHECK-NEXT: .LBB11_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB11_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -722,33 +722,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_or_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_or_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB12_2 +; CHECK-NEXT: bgeu a3, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB12_5 ; CHECK-NEXT: .LBB12_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB12_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vor.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB12_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB12_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB12_7 +; CHECK-NEXT: beqz a4, .LBB12_7 ; CHECK-NEXT: .LBB12_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB12_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -815,33 +815,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_xor_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_xor_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB13_2 +; CHECK-NEXT: bgeu a3, a2, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB13_5 ; CHECK-NEXT: .LBB13_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB13_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB13_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB13_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB13_7 +; CHECK-NEXT: beqz a4, .LBB13_7 ; CHECK-NEXT: .LBB13_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB13_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1016,33 +1016,33 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_shl_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_shl_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB17_2 +; CHECK-NEXT: bgeu a3, a2, .LBB17_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB17_5 ; CHECK-NEXT: .LBB17_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB17_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsll.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB17_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB17_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB17_7 +; CHECK-NEXT: beqz a4, .LBB17_7 ; CHECK-NEXT: .LBB17_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB17_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1109,33 +1109,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_lshr_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_lshr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB18_2 +; CHECK-NEXT: bgeu a3, a2, .LBB18_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB18_5 ; CHECK-NEXT: .LBB18_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB18_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vsrl.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB18_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB18_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB18_7 +; CHECK-NEXT: beqz a4, .LBB18_7 ; CHECK-NEXT: .LBB18_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB18_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1202,30 +1202,30 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_ashr_scalable(i32* nocapture %a) { ; CHECK-LABEL: sink_splat_ashr_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: srli a7, a3, 1 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: srli a1, a5, 1 ; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a7, .LBB19_2 +; CHECK-NEXT: bgeu a2, a1, .LBB19_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB19_5 ; CHECK-NEXT: .LBB19_2: # %vector.ph ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: remu a6, a2, a7 -; CHECK-NEXT: sub a2, a2, a6 -; CHECK-NEXT: slli a5, a3, 1 -; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: remu a3, a2, a1 +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB19_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a3) -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m2, ta, mu ; CHECK-NEXT: vsra.vi v8, v8, 2 -; CHECK-NEXT: vs2r.v v8, (a3) -; CHECK-NEXT: add a4, a4, a7 -; CHECK-NEXT: add a3, a3, a5 +; CHECK-NEXT: vs2r.v v8, (a6) +; CHECK-NEXT: add a4, a4, a1 +; CHECK-NEXT: add a6, a6, a5 ; CHECK-NEXT: bne a4, a2, .LBB19_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB19_7 +; CHECK-NEXT: beqz a3, .LBB19_7 ; CHECK-NEXT: .LBB19_5: # %for.body.preheader ; CHECK-NEXT: addi a1, a2, -1024 ; CHECK-NEXT: slli a2, a2, 2 @@ -1517,30 +1517,30 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB26_2 +; CHECK-NEXT: bgeu a6, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB26_5 ; CHECK-NEXT: .LBB26_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB26_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmul.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB26_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB26_7 +; CHECK-NEXT: beqz a4, .LBB26_7 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1610,30 +1610,30 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB27_2 +; CHECK-NEXT: bgeu a6, a3, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB27_5 ; CHECK-NEXT: .LBB27_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB27_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB27_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB27_7 +; CHECK-NEXT: beqz a4, .LBB27_7 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1703,30 +1703,30 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB28_2 +; CHECK-NEXT: bgeu a6, a3, .LBB28_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB28_5 ; CHECK-NEXT: .LBB28_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB28_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB28_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB28_7 +; CHECK-NEXT: beqz a4, .LBB28_7 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1796,30 +1796,30 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB29_2 +; CHECK-NEXT: bgeu a6, a3, .LBB29_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB29_5 ; CHECK-NEXT: .LBB29_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB29_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB29_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB29_7 +; CHECK-NEXT: beqz a4, .LBB29_7 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1889,30 +1889,30 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB30_2 +; CHECK-NEXT: bgeu a6, a3, .LBB30_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB30_5 ; CHECK-NEXT: .LBB30_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB30_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsub.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB30_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB30_7 +; CHECK-NEXT: beqz a4, .LBB30_7 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -1982,30 +1982,30 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli a3, a7, 2 -; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a6, 1024 ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a4, a3, .LBB31_2 +; CHECK-NEXT: bgeu a6, a3, .LBB31_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB31_5 ; CHECK-NEXT: .LBB31_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a4, a3 -; CHECK-NEXT: sub a1, a4, a6 -; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: remu a4, a6, a3 +; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB31_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl1re32.v v8, (a4) -; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vl1re32.v v8, (a6) +; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu ; CHECK-NEXT: vfrsub.vf v8, v8, ft0 -; CHECK-NEXT: vs1r.v v8, (a4) +; CHECK-NEXT: vs1r.v v8, (a6) ; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a4, a4, a7 +; CHECK-NEXT: add a6, a6, a2 ; CHECK-NEXT: bne a5, a1, .LBB31_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB31_7 +; CHECK-NEXT: beqz a4, .LBB31_7 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader ; CHECK-NEXT: addi a2, a1, -1024 ; CHECK-NEXT: slli a1, a1, 2 @@ -2159,36 +2159,36 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli t1, a7, 2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: li t0, 1024 ; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: bgeu t0, t1, .LBB34_2 +; CHECK-NEXT: bgeu t0, a4, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: remu a6, t0, t1 -; CHECK-NEXT: sub t0, t0, a6 +; CHECK-NEXT: li a6, 0 +; CHECK-NEXT: li a7, 0 +; CHECK-NEXT: remu a5, t0, a4 +; CHECK-NEXT: sub a2, t0, a5 ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a2, a0, a5 -; CHECK-NEXT: vl1re32.v v8, (a2) -; CHECK-NEXT: add a4, a1, a5 -; CHECK-NEXT: vl1re32.v v9, (a4) -; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu +; CHECK-NEXT: add t0, a0, a6 +; CHECK-NEXT: vl1re32.v v8, (t0) +; CHECK-NEXT: add t1, a1, a6 +; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmacc.vf v9, ft0, v8 -; CHECK-NEXT: vs1r.v v9, (a2) -; CHECK-NEXT: add a3, a3, t1 -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: bne a3, t0, .LBB34_3 +; CHECK-NEXT: vs1r.v v9, (t0) +; CHECK-NEXT: add a7, a7, a4 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bne a7, a2, .LBB34_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB34_7 +; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: addi a3, t0, -1024 -; CHECK-NEXT: slli a2, t0, 2 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB34_6: # %for.body @@ -2263,36 +2263,36 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: srli t1, a7, 2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: li t0, 1024 ; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: bgeu t0, t1, .LBB35_2 +; CHECK-NEXT: bgeu t0, a4, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph -; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: remu a6, t0, t1 -; CHECK-NEXT: sub t0, t0, a6 +; CHECK-NEXT: li a6, 0 +; CHECK-NEXT: li a7, 0 +; CHECK-NEXT: remu a5, t0, a4 +; CHECK-NEXT: sub a2, t0, a5 ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add a2, a0, a5 -; CHECK-NEXT: vl1re32.v v8, (a2) -; CHECK-NEXT: add a4, a1, a5 -; CHECK-NEXT: vl1re32.v v9, (a4) -; CHECK-NEXT: vsetvli a4, zero, e32, m1, ta, mu +; CHECK-NEXT: add t0, a0, a6 +; CHECK-NEXT: vl1re32.v v8, (t0) +; CHECK-NEXT: add t1, a1, a6 +; CHECK-NEXT: vl1re32.v v9, (t1) +; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu ; CHECK-NEXT: vfmacc.vf v9, ft0, v8 -; CHECK-NEXT: vs1r.v v9, (a2) -; CHECK-NEXT: add a3, a3, t1 -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: bne a3, t0, .LBB35_3 +; CHECK-NEXT: vs1r.v v9, (t0) +; CHECK-NEXT: add a7, a7, a4 +; CHECK-NEXT: add a6, a6, a3 +; CHECK-NEXT: bne a7, a2, .LBB35_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB35_7 +; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: addi a3, t0, -1024 -; CHECK-NEXT: slli a2, t0, 2 +; CHECK-NEXT: addi a3, a2, -1024 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB35_6: # %for.body @@ -2593,33 +2593,33 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_udiv_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_udiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB42_2 +; CHECK-NEXT: bgeu a3, a2, .LBB42_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB42_5 ; CHECK-NEXT: .LBB42_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB42_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vdivu.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB42_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB42_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB42_7 +; CHECK-NEXT: beqz a4, .LBB42_7 ; CHECK-NEXT: .LBB42_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB42_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2686,33 +2686,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_sdiv_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_sdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB43_2 +; CHECK-NEXT: bgeu a3, a2, .LBB43_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB43_5 ; CHECK-NEXT: .LBB43_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB43_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vdiv.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB43_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB43_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB43_7 +; CHECK-NEXT: beqz a4, .LBB43_7 ; CHECK-NEXT: .LBB43_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB43_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2779,33 +2779,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_urem_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_urem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB44_2 +; CHECK-NEXT: bgeu a3, a2, .LBB44_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB44_5 ; CHECK-NEXT: .LBB44_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB44_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vremu.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB44_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB44_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB44_7 +; CHECK-NEXT: beqz a4, .LBB44_7 ; CHECK-NEXT: .LBB44_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB44_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2872,33 +2872,33 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_srem_scalable(i32* nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_srem_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a7, a2, 1 +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: srli a2, a6, 1 ; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a7, .LBB45_2 +; CHECK-NEXT: bgeu a3, a2, .LBB45_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li t0, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB45_5 ; CHECK-NEXT: .LBB45_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a6, a3, a7 -; CHECK-NEXT: sub t0, a3, a6 -; CHECK-NEXT: slli a4, a2, 1 -; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 +; CHECK-NEXT: slli a6, a6, 1 +; CHECK-NEXT: mv a7, a0 ; CHECK-NEXT: .LBB45_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vl2re32.v v8, (a2) -; CHECK-NEXT: vsetvli a3, zero, e32, m2, ta, mu +; CHECK-NEXT: vl2re32.v v8, (a7) +; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, mu ; CHECK-NEXT: vrem.vx v8, v8, a1 -; CHECK-NEXT: vs2r.v v8, (a2) -; CHECK-NEXT: add a5, a5, a7 -; CHECK-NEXT: add a2, a2, a4 -; CHECK-NEXT: bne a5, t0, .LBB45_3 +; CHECK-NEXT: vs2r.v v8, (a7) +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a7, a6 +; CHECK-NEXT: bne a5, a3, .LBB45_3 ; CHECK-NEXT: # %bb.4: # %middle.block -; CHECK-NEXT: beqz a6, .LBB45_7 +; CHECK-NEXT: beqz a4, .LBB45_7 ; CHECK-NEXT: .LBB45_5: # %for.body.preheader -; CHECK-NEXT: addi a2, t0, -1024 -; CHECK-NEXT: slli a3, t0, 2 +; CHECK-NEXT: addi a2, a3, -1024 +; CHECK-NEXT: slli a3, a3, 2 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB45_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index f5cb93cc7da40..e909ffe6a025a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -495,48 +495,48 @@ define @vpload_nxv17f64(* %ptr, %val, % ; RV64-LABEL: vpreduce_umax_nxv32i32: ; RV64: # %bb.0: ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: srli a6, a3, 2 +; RV64-NEXT: srli a2, a3, 2 ; RV64-NEXT: slli a4, a0, 32 ; RV64-NEXT: slli a0, a3, 1 ; RV64-NEXT: srli a3, a4, 32 @@ -1195,8 +1195,8 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, % ; RV64-NEXT: mv a4, a0 ; RV64-NEXT: .LBB67_2: ; RV64-NEXT: li a5, 0 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, mu -; RV64-NEXT: vslidedown.vx v24, v0, a6 +; RV64-NEXT: vsetvli a6, zero, e8, mf2, ta, mu +; RV64-NEXT: vslidedown.vx v24, v0, a2 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu ; RV64-NEXT: vmv.s.x v25, a3 ; RV64-NEXT: vsetvli zero, a4, e32, m8, tu, mu diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll index ff40c03c10b0f..c9da3934b010d 100644 --- a/llvm/test/CodeGen/RISCV/sadd_sat.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll @@ -160,12 +160,12 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32IZbbZbt-NEXT: add a0, a4, a0 ; RV32IZbbZbt-NEXT: srai a4, a0, 31 ; RV32IZbbZbt-NEXT: lui a5, 524288 -; RV32IZbbZbt-NEXT: xor a6, a4, a5 -; RV32IZbbZbt-NEXT: xor a5, a1, a0 +; RV32IZbbZbt-NEXT: xor a5, a4, a5 +; RV32IZbbZbt-NEXT: xor a6, a1, a0 ; RV32IZbbZbt-NEXT: xor a1, a1, a3 -; RV32IZbbZbt-NEXT: andn a1, a5, a1 +; RV32IZbbZbt-NEXT: andn a1, a6, a1 ; RV32IZbbZbt-NEXT: slti a3, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a3, a6, a0 +; RV32IZbbZbt-NEXT: cmov a1, a3, a5, a0 ; RV32IZbbZbt-NEXT: cmov a0, a3, a4, a2 ; RV32IZbbZbt-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll index b266b22bd8012..47c904108126e 100644 --- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll @@ -168,13 +168,13 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32IZbbZbt-NEXT: add a0, a2, a0 ; RV32IZbbZbt-NEXT: srai a2, a0, 31 ; RV32IZbbZbt-NEXT: lui a4, 524288 -; RV32IZbbZbt-NEXT: xor a6, a2, a4 -; RV32IZbbZbt-NEXT: xor a4, a1, a0 +; RV32IZbbZbt-NEXT: xor a4, a2, a4 +; RV32IZbbZbt-NEXT: xor a6, a1, a0 ; RV32IZbbZbt-NEXT: xor a1, a1, a5 -; RV32IZbbZbt-NEXT: andn a1, a4, a1 -; RV32IZbbZbt-NEXT: slti a4, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a4, a6, a0 -; RV32IZbbZbt-NEXT: cmov a0, a4, a2, a3 +; RV32IZbbZbt-NEXT: andn a1, a6, a1 +; RV32IZbbZbt-NEXT: slti a5, a1, 0 +; RV32IZbbZbt-NEXT: cmov a1, a5, a4, a0 +; RV32IZbbZbt-NEXT: cmov a0, a5, a2, a3 ; RV32IZbbZbt-NEXT: ret ; ; RV64IZbbZbt-LABEL: func64: diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll index 495555c67e877..891a08a0ed495 100644 --- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll +++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll @@ -65,20 +65,20 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind { ; RV32I-NEXT: .LBB1_2: # %entry ; RV32I-NEXT: beqz a1, .LBB1_5 ; RV32I-NEXT: # %bb.3: # %entry -; RV32I-NEXT: addi a7, a4, 4 +; RV32I-NEXT: addi a5, a4, 4 ; RV32I-NEXT: bnez a1, .LBB1_6 ; RV32I-NEXT: .LBB1_4: -; RV32I-NEXT: addi a5, a3, 8 +; RV32I-NEXT: addi a6, a3, 8 ; RV32I-NEXT: j .LBB1_7 ; RV32I-NEXT: .LBB1_5: -; RV32I-NEXT: addi a7, a3, 4 +; RV32I-NEXT: addi a5, a3, 4 ; RV32I-NEXT: beqz a1, .LBB1_4 ; RV32I-NEXT: .LBB1_6: # %entry -; RV32I-NEXT: addi a5, a4, 8 +; RV32I-NEXT: addi a6, a4, 8 ; RV32I-NEXT: .LBB1_7: # %entry -; RV32I-NEXT: lw a6, 0(a2) -; RV32I-NEXT: lw a7, 0(a7) -; RV32I-NEXT: lw a2, 0(a5) +; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: lw a5, 0(a5) +; RV32I-NEXT: lw a6, 0(a6) ; RV32I-NEXT: beqz a1, .LBB1_9 ; RV32I-NEXT: # %bb.8: # %entry ; RV32I-NEXT: addi a1, a4, 12 @@ -88,25 +88,25 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind { ; RV32I-NEXT: .LBB1_10: # %entry ; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a7, 4(a0) -; RV32I-NEXT: sw a6, 0(a0) +; RV32I-NEXT: sw a6, 8(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: ret ; ; RV32IBT-LABEL: cmovcc128: ; RV32IBT: # %bb.0: # %entry -; RV32IBT-NEXT: addi a6, a3, 12 -; RV32IBT-NEXT: addi a7, a4, 12 -; RV32IBT-NEXT: addi t0, a3, 8 -; RV32IBT-NEXT: addi t1, a4, 8 -; RV32IBT-NEXT: addi t2, a3, 4 -; RV32IBT-NEXT: addi a5, a4, 4 +; RV32IBT-NEXT: addi a5, a3, 12 +; RV32IBT-NEXT: addi a6, a4, 12 +; RV32IBT-NEXT: addi a7, a3, 8 +; RV32IBT-NEXT: addi t0, a4, 8 +; RV32IBT-NEXT: addi t1, a3, 4 +; RV32IBT-NEXT: addi t2, a4, 4 ; RV32IBT-NEXT: xori a1, a1, 123 ; RV32IBT-NEXT: or a1, a1, a2 ; RV32IBT-NEXT: cmov a2, a1, a4, a3 -; RV32IBT-NEXT: cmov a3, a1, a5, t2 -; RV32IBT-NEXT: cmov a4, a1, t1, t0 -; RV32IBT-NEXT: cmov a1, a1, a7, a6 +; RV32IBT-NEXT: cmov a3, a1, t2, t1 +; RV32IBT-NEXT: cmov a4, a1, t0, a7 +; RV32IBT-NEXT: cmov a1, a1, a6, a5 ; RV32IBT-NEXT: lw a1, 0(a1) ; RV32IBT-NEXT: lw a4, 0(a4) ; RV32IBT-NEXT: lw a3, 0(a3) @@ -192,20 +192,20 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { ; RV32I-NEXT: .LBB3_2: # %entry ; RV32I-NEXT: bnez a1, .LBB3_5 ; RV32I-NEXT: # %bb.3: # %entry -; RV32I-NEXT: addi a7, a3, 4 +; RV32I-NEXT: addi a5, a3, 4 ; RV32I-NEXT: beqz a1, .LBB3_6 ; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: addi a5, a2, 8 +; RV32I-NEXT: addi a6, a2, 8 ; RV32I-NEXT: j .LBB3_7 ; RV32I-NEXT: .LBB3_5: -; RV32I-NEXT: addi a7, a2, 4 +; RV32I-NEXT: addi a5, a2, 4 ; RV32I-NEXT: bnez a1, .LBB3_4 ; RV32I-NEXT: .LBB3_6: # %entry -; RV32I-NEXT: addi a5, a3, 8 +; RV32I-NEXT: addi a6, a3, 8 ; RV32I-NEXT: .LBB3_7: # %entry -; RV32I-NEXT: lw a6, 0(a4) -; RV32I-NEXT: lw a7, 0(a7) -; RV32I-NEXT: lw a4, 0(a5) +; RV32I-NEXT: lw a4, 0(a4) +; RV32I-NEXT: lw a5, 0(a5) +; RV32I-NEXT: lw a6, 0(a6) ; RV32I-NEXT: bnez a1, .LBB3_9 ; RV32I-NEXT: # %bb.8: # %entry ; RV32I-NEXT: addi a1, a3, 12 @@ -215,26 +215,26 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { ; RV32I-NEXT: .LBB3_10: # %entry ; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a7, 4(a0) -; RV32I-NEXT: sw a6, 0(a0) +; RV32I-NEXT: sw a6, 8(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: ret ; ; RV32IBT-LABEL: cmov128: ; RV32IBT: # %bb.0: # %entry -; RV32IBT-NEXT: addi a6, a3, 12 -; RV32IBT-NEXT: addi a7, a2, 12 -; RV32IBT-NEXT: addi t0, a3, 8 -; RV32IBT-NEXT: addi t1, a2, 8 -; RV32IBT-NEXT: addi a4, a3, 4 -; RV32IBT-NEXT: addi a5, a2, 4 +; RV32IBT-NEXT: addi a4, a3, 12 +; RV32IBT-NEXT: addi a5, a2, 12 +; RV32IBT-NEXT: addi a6, a3, 8 +; RV32IBT-NEXT: addi a7, a2, 8 +; RV32IBT-NEXT: addi t0, a3, 4 +; RV32IBT-NEXT: addi t1, a2, 4 ; RV32IBT-NEXT: andi a1, a1, 1 ; RV32IBT-NEXT: cmov a2, a1, a2, a3 -; RV32IBT-NEXT: cmov a3, a1, a5, a4 -; RV32IBT-NEXT: cmov a4, a1, t1, t0 -; RV32IBT-NEXT: cmov a1, a1, a7, a6 +; RV32IBT-NEXT: cmov a3, a1, t1, t0 +; RV32IBT-NEXT: cmov a6, a1, a7, a6 +; RV32IBT-NEXT: cmov a1, a1, a5, a4 ; RV32IBT-NEXT: lw a1, 0(a1) -; RV32IBT-NEXT: lw a4, 0(a4) +; RV32IBT-NEXT: lw a4, 0(a6) ; RV32IBT-NEXT: lw a3, 0(a3) ; RV32IBT-NEXT: lw a2, 0(a2) ; RV32IBT-NEXT: sw a1, 12(a0) @@ -476,17 +476,17 @@ entry: define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; RV32I-LABEL: cmovdiffcc: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: andi a0, a0, 1 -; RV32I-NEXT: andi a1, a1, 1 -; RV32I-NEXT: beqz a0, .LBB7_3 +; RV32I-NEXT: andi a6, a0, 1 +; RV32I-NEXT: andi a0, a1, 1 +; RV32I-NEXT: beqz a6, .LBB7_3 ; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: beqz a1, .LBB7_4 +; RV32I-NEXT: beqz a0, .LBB7_4 ; RV32I-NEXT: .LBB7_2: # %entry ; RV32I-NEXT: add a0, a2, a4 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB7_3: # %entry ; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bnez a1, .LBB7_2 +; RV32I-NEXT: bnez a0, .LBB7_2 ; RV32I-NEXT: .LBB7_4: # %entry ; RV32I-NEXT: mv a4, a5 ; RV32I-NEXT: add a0, a2, a4 @@ -503,17 +503,17 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; ; RV64I-LABEL: cmovdiffcc: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: andi a0, a0, 1 -; RV64I-NEXT: andi a1, a1, 1 -; RV64I-NEXT: beqz a0, .LBB7_3 +; RV64I-NEXT: andi a6, a0, 1 +; RV64I-NEXT: andi a0, a1, 1 +; RV64I-NEXT: beqz a6, .LBB7_3 ; RV64I-NEXT: # %bb.1: # %entry -; RV64I-NEXT: beqz a1, .LBB7_4 +; RV64I-NEXT: beqz a0, .LBB7_4 ; RV64I-NEXT: .LBB7_2: # %entry ; RV64I-NEXT: addw a0, a2, a4 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB7_3: # %entry ; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bnez a1, .LBB7_2 +; RV64I-NEXT: bnez a0, .LBB7_2 ; RV64I-NEXT: .LBB7_4: # %entry ; RV64I-NEXT: mv a4, a5 ; RV64I-NEXT: addw a0, a2, a4 diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index 868ab46acedc0..c580d26695e0b 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -179,19 +179,19 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sraw a0, a0, a1 ; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: addiw s2, a1, 1365 +; RV64I-NEXT: addiw s0, a1, 1365 ; RV64I-NEXT: lui a1, 209715 ; RV64I-NEXT: addiw s1, a1, 819 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw s3, a1, -241 +; RV64I-NEXT: addiw s2, a1, -241 ; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw s0, a1, 257 +; RV64I-NEXT: addiw s3, a1, 257 ; RV64I-NEXT: .LBB4_1: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call bar@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: and a0, a0, s0 ; RV64I-NEXT: subw a0, a1, a0 ; RV64I-NEXT: and a2, a0, s1 ; RV64I-NEXT: srli a0, a0, 2 @@ -199,8 +199,8 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: srli a2, a0, 4 ; RV64I-NEXT: add a0, a0, a2 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: mulw a0, a0, s0 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: mulw a0, a0, s3 ; RV64I-NEXT: srliw a0, a0, 24 ; RV64I-NEXT: bnez a1, .LBB4_1 ; RV64I-NEXT: # %bb.2: # %bb7 diff --git a/llvm/test/CodeGen/RISCV/shadowcallstack.ll b/llvm/test/CodeGen/RISCV/shadowcallstack.ll index 188bcead5d0d4..6b25b0d910c79 100644 --- a/llvm/test/CodeGen/RISCV/shadowcallstack.ll +++ b/llvm/test/CodeGen/RISCV/shadowcallstack.ll @@ -82,14 +82,14 @@ define i32 @f4() shadowcallstack { ; RV32-NEXT: .cfi_offset s1, -12 ; RV32-NEXT: .cfi_offset s3, -16 ; RV32-NEXT: call bar@plt -; RV32-NEXT: mv s3, a0 +; RV32-NEXT: mv s0, a0 ; RV32-NEXT: call bar@plt ; RV32-NEXT: mv s1, a0 ; RV32-NEXT: call bar@plt -; RV32-NEXT: mv s0, a0 +; RV32-NEXT: mv s3, a0 ; RV32-NEXT: call bar@plt -; RV32-NEXT: add a1, s3, s1 -; RV32-NEXT: add a0, s0, a0 +; RV32-NEXT: add a1, s0, s1 +; RV32-NEXT: add a0, s3, a0 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -115,14 +115,14 @@ define i32 @f4() shadowcallstack { ; RV64-NEXT: .cfi_offset s1, -24 ; RV64-NEXT: .cfi_offset s3, -32 ; RV64-NEXT: call bar@plt -; RV64-NEXT: mv s3, a0 +; RV64-NEXT: mv s0, a0 ; RV64-NEXT: call bar@plt ; RV64-NEXT: mv s1, a0 ; RV64-NEXT: call bar@plt -; RV64-NEXT: mv s0, a0 +; RV64-NEXT: mv s3, a0 ; RV64-NEXT: call bar@plt -; RV64-NEXT: addw a1, s3, s1 -; RV64-NEXT: addw a0, s0, a0 +; RV64-NEXT: addw a1, s0, s1 +; RV64-NEXT: addw a0, s3, a0 ; RV64-NEXT: addw a0, a1, a0 ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index cdee24f11a7b5..1353f2cd8c638 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -148,120 +148,116 @@ define i64 @shl64_minsize(i64 %a, i64 %b) minsize nounwind { define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: lshr128: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t4, 12(a1) -; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: li a3, 64 +; RV32I-NEXT: sub t0, a3, a2 +; RV32I-NEXT: li a6, 32 ; RV32I-NEXT: sub t1, a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub t5, a3, a2 ; RV32I-NEXT: li t2, 31 -; RV32I-NEXT: bltz t5, .LBB6_2 +; RV32I-NEXT: bltz t1, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a3, t0, t5 +; RV32I-NEXT: sll t6, a5, t1 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sll a3, t4, t1 -; RV32I-NEXT: sub a4, t2, t1 -; RV32I-NEXT: srli a5, t0, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: sll a6, a4, t0 +; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: srli t3, a5, 1 +; RV32I-NEXT: srl a7, t3, a7 +; RV32I-NEXT: or t6, a6, a7 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: lw a5, 4(a1) -; RV32I-NEXT: addi t6, a2, -32 -; RV32I-NEXT: bgez t6, .LBB6_5 +; RV32I-NEXT: lw t5, 4(a1) +; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: bgez a6, .LBB6_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a4, a5, a2 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: srl a7, t5, a2 +; RV32I-NEXT: or t6, t6, a7 ; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: addi a4, a2, -96 +; RV32I-NEXT: addi t4, a2, -96 ; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz a4, .LBB6_7 +; RV32I-NEXT: bltz t4, .LBB6_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB6_8 +; RV32I-NEXT: bgeu a2, a3, .LBB6_8 ; RV32I-NEXT: j .LBB6_9 ; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: srl a7, t4, t3 -; RV32I-NEXT: bltu a2, a6, .LBB6_9 +; RV32I-NEXT: srl a7, a4, t3 +; RV32I-NEXT: bltu a2, a3, .LBB6_9 ; RV32I-NEXT: .LBB6_8: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv t6, a7 ; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: mv a7, t5 ; RV32I-NEXT: beqz a2, .LBB6_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: mv a7, t6 ; RV32I-NEXT: .LBB6_11: -; RV32I-NEXT: lw s0, 0(a1) +; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sub t2, t2, a2 -; RV32I-NEXT: bltz t6, .LBB6_13 +; RV32I-NEXT: bltz a6, .LBB6_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl a5, a5, t6 -; RV32I-NEXT: bltz t5, .LBB6_14 +; RV32I-NEXT: srl t5, t5, a6 +; RV32I-NEXT: bltz t1, .LBB6_14 ; RV32I-NEXT: j .LBB6_15 ; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: srl a3, s0, a2 -; RV32I-NEXT: slli a5, a5, 1 -; RV32I-NEXT: sll a5, a5, t2 -; RV32I-NEXT: or a5, a3, a5 -; RV32I-NEXT: bgez t5, .LBB6_15 +; RV32I-NEXT: srl t6, a1, a2 +; RV32I-NEXT: slli t5, t5, 1 +; RV32I-NEXT: sll t5, t5, t2 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: bgez t1, .LBB6_15 ; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: sll a3, t0, t1 -; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: sll t0, a5, t0 +; RV32I-NEXT: or t5, t5, t0 ; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: slli a3, t4, 1 -; RV32I-NEXT: bltz a4, .LBB6_17 +; RV32I-NEXT: slli t0, a4, 1 +; RV32I-NEXT: bltz t4, .LBB6_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: srl a4, t4, a4 -; RV32I-NEXT: bgeu a2, a6, .LBB6_18 +; RV32I-NEXT: srl t1, a4, t4 +; RV32I-NEXT: bgeu a2, a3, .LBB6_18 ; RV32I-NEXT: j .LBB6_19 ; RV32I-NEXT: .LBB6_17: -; RV32I-NEXT: li a4, 95 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sll a4, a3, a4 -; RV32I-NEXT: srl a1, t0, t3 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB6_19 +; RV32I-NEXT: li t1, 95 +; RV32I-NEXT: sub t1, t1, a2 +; RV32I-NEXT: sll t1, t0, t1 +; RV32I-NEXT: srl t3, a5, t3 +; RV32I-NEXT: or t1, t3, t1 +; RV32I-NEXT: bltu a2, a3, .LBB6_19 ; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: mv t5, t1 ; RV32I-NEXT: .LBB6_19: ; RV32I-NEXT: bnez a2, .LBB6_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz t6, .LBB6_23 +; RV32I-NEXT: bltz a6, .LBB6_23 ; RV32I-NEXT: .LBB6_21: -; RV32I-NEXT: srl a3, t4, t6 -; RV32I-NEXT: bgeu a2, a6, .LBB6_24 +; RV32I-NEXT: srl a5, a4, a6 +; RV32I-NEXT: bgeu a2, a3, .LBB6_24 ; RV32I-NEXT: j .LBB6_25 ; RV32I-NEXT: .LBB6_22: -; RV32I-NEXT: mv s0, a5 -; RV32I-NEXT: bgez t6, .LBB6_21 +; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: bgez a6, .LBB6_21 ; RV32I-NEXT: .LBB6_23: -; RV32I-NEXT: srl a1, t0, a2 -; RV32I-NEXT: sll a3, a3, t2 -; RV32I-NEXT: or a3, a1, a3 -; RV32I-NEXT: bltu a2, a6, .LBB6_25 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: sll t0, t0, t2 +; RV32I-NEXT: or a5, a5, t0 +; RV32I-NEXT: bltu a2, a3, .LBB6_25 ; RV32I-NEXT: .LBB6_24: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: .LBB6_25: -; RV32I-NEXT: bltz t6, .LBB6_27 +; RV32I-NEXT: bltz a6, .LBB6_27 ; RV32I-NEXT: # %bb.26: ; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB6_28 +; RV32I-NEXT: bgeu a2, a3, .LBB6_28 ; RV32I-NEXT: j .LBB6_29 ; RV32I-NEXT: .LBB6_27: -; RV32I-NEXT: srl a4, t4, a2 -; RV32I-NEXT: bltu a2, a6, .LBB6_29 +; RV32I-NEXT: srl a4, a4, a2 +; RV32I-NEXT: bltu a2, a3, .LBB6_29 ; RV32I-NEXT: .LBB6_28: ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: .LBB6_29: ; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: sw a3, 8(a0) -; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a7, 4(a0) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr128: @@ -290,120 +286,118 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw t2, 8(a1) -; RV32I-NEXT: lw t5, 12(a1) -; RV32I-NEXT: li a6, 64 -; RV32I-NEXT: sub t1, a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub t6, a3, a2 +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: li a3, 64 +; RV32I-NEXT: sub t1, a3, a2 +; RV32I-NEXT: li a6, 32 +; RV32I-NEXT: sub t2, a6, a2 ; RV32I-NEXT: li t4, 31 -; RV32I-NEXT: bltz t6, .LBB7_2 +; RV32I-NEXT: bltz t2, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll s0, t2, t6 +; RV32I-NEXT: sll s0, a5, t2 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a3, t5, t1 -; RV32I-NEXT: sub a4, t4, t1 -; RV32I-NEXT: srli a5, t2, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or s0, a3, a4 +; RV32I-NEXT: sll a6, a4, t1 +; RV32I-NEXT: sub a7, t4, t1 +; RV32I-NEXT: srli t0, a5, 1 +; RV32I-NEXT: srl a7, t0, a7 +; RV32I-NEXT: or s0, a6, a7 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: lw a5, 4(a1) -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: bgez a3, .LBB7_5 +; RV32I-NEXT: lw t6, 4(a1) +; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: bgez a6, .LBB7_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a4, a5, a2 -; RV32I-NEXT: or s0, s0, a4 +; RV32I-NEXT: srl a7, t6, a2 +; RV32I-NEXT: or s0, s0, a7 ; RV32I-NEXT: .LBB7_5: ; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: addi a4, a2, -96 -; RV32I-NEXT: srai a7, t5, 31 -; RV32I-NEXT: bltz a4, .LBB7_7 +; RV32I-NEXT: addi t5, a2, -96 +; RV32I-NEXT: srai a7, a4, 31 +; RV32I-NEXT: bltz t5, .LBB7_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: bgeu a2, a6, .LBB7_8 +; RV32I-NEXT: bgeu a2, a3, .LBB7_8 ; RV32I-NEXT: j .LBB7_9 ; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: sra t0, t5, t3 -; RV32I-NEXT: bltu a2, a6, .LBB7_9 +; RV32I-NEXT: sra t0, a4, t3 +; RV32I-NEXT: bltu a2, a3, .LBB7_9 ; RV32I-NEXT: .LBB7_8: ; RV32I-NEXT: mv s0, t0 ; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: mv t0, t6 ; RV32I-NEXT: beqz a2, .LBB7_11 ; RV32I-NEXT: # %bb.10: ; RV32I-NEXT: mv t0, s0 ; RV32I-NEXT: .LBB7_11: -; RV32I-NEXT: lw s1, 0(a1) +; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: sub t4, t4, a2 -; RV32I-NEXT: bltz a3, .LBB7_13 +; RV32I-NEXT: bltz a6, .LBB7_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl a5, a5, a3 -; RV32I-NEXT: bltz t6, .LBB7_14 +; RV32I-NEXT: srl t6, t6, a6 +; RV32I-NEXT: bltz t2, .LBB7_14 ; RV32I-NEXT: j .LBB7_15 ; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: srl s0, s1, a2 -; RV32I-NEXT: slli a5, a5, 1 -; RV32I-NEXT: sll a5, a5, t4 -; RV32I-NEXT: or a5, s0, a5 -; RV32I-NEXT: bgez t6, .LBB7_15 +; RV32I-NEXT: srl s0, a1, a2 +; RV32I-NEXT: slli t6, t6, 1 +; RV32I-NEXT: sll t6, t6, t4 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: bgez t2, .LBB7_15 ; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: sll s0, t2, t1 -; RV32I-NEXT: or a5, a5, s0 +; RV32I-NEXT: sll t1, a5, t1 +; RV32I-NEXT: or t6, t6, t1 ; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: slli s0, t5, 1 -; RV32I-NEXT: bltz a4, .LBB7_17 +; RV32I-NEXT: slli t1, a4, 1 +; RV32I-NEXT: bltz t5, .LBB7_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sra a4, t5, a4 -; RV32I-NEXT: bgeu a2, a6, .LBB7_18 +; RV32I-NEXT: sra t2, a4, t5 +; RV32I-NEXT: bgeu a2, a3, .LBB7_18 ; RV32I-NEXT: j .LBB7_19 ; RV32I-NEXT: .LBB7_17: -; RV32I-NEXT: li a4, 95 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: sll a4, s0, a4 -; RV32I-NEXT: srl a1, t2, t3 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB7_19 +; RV32I-NEXT: li t2, 95 +; RV32I-NEXT: sub t2, t2, a2 +; RV32I-NEXT: sll t2, t1, t2 +; RV32I-NEXT: srl t3, a5, t3 +; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: bltu a2, a3, .LBB7_19 ; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: mv t6, t2 ; RV32I-NEXT: .LBB7_19: ; RV32I-NEXT: bnez a2, .LBB7_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a3, .LBB7_23 +; RV32I-NEXT: bltz a6, .LBB7_23 ; RV32I-NEXT: .LBB7_21: -; RV32I-NEXT: sra a4, t5, a3 -; RV32I-NEXT: bgeu a2, a6, .LBB7_24 +; RV32I-NEXT: sra a5, a4, a6 +; RV32I-NEXT: bgeu a2, a3, .LBB7_24 ; RV32I-NEXT: j .LBB7_25 ; RV32I-NEXT: .LBB7_22: -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: bgez a3, .LBB7_21 +; RV32I-NEXT: mv a1, t6 +; RV32I-NEXT: bgez a6, .LBB7_21 ; RV32I-NEXT: .LBB7_23: -; RV32I-NEXT: srl a1, t2, a2 -; RV32I-NEXT: sll a4, s0, t4 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB7_25 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: sll t1, t1, t4 +; RV32I-NEXT: or a5, a5, t1 +; RV32I-NEXT: bltu a2, a3, .LBB7_25 ; RV32I-NEXT: .LBB7_24: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB7_25: -; RV32I-NEXT: bltz a3, .LBB7_27 +; RV32I-NEXT: bltz a6, .LBB7_27 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: bgeu a2, a6, .LBB7_28 +; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: bgeu a2, a3, .LBB7_28 ; RV32I-NEXT: j .LBB7_29 ; RV32I-NEXT: .LBB7_27: -; RV32I-NEXT: sra a3, t5, a2 -; RV32I-NEXT: bltu a2, a6, .LBB7_29 +; RV32I-NEXT: sra a4, a4, a2 +; RV32I-NEXT: bltu a2, a3, .LBB7_29 ; RV32I-NEXT: .LBB7_28: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv a4, a7 ; RV32I-NEXT: .LBB7_29: -; RV32I-NEXT: sw a3, 12(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw s1, 0(a0) +; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a5, 8(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw t0, 4(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -431,120 +425,116 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: shl128: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: lw t4, 0(a1) -; RV32I-NEXT: li a6, 64 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: li a3, 64 +; RV32I-NEXT: sub t0, a3, a2 +; RV32I-NEXT: li a6, 32 ; RV32I-NEXT: sub t1, a6, a2 -; RV32I-NEXT: li a3, 32 -; RV32I-NEXT: sub t5, a3, a2 ; RV32I-NEXT: li t2, 31 -; RV32I-NEXT: bltz t5, .LBB8_2 +; RV32I-NEXT: bltz t1, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a3, t0, t5 +; RV32I-NEXT: srl t6, a5, t1 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a3, t4, t1 -; RV32I-NEXT: sub a4, t2, t1 -; RV32I-NEXT: slli a5, t0, 1 -; RV32I-NEXT: sll a4, a5, a4 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: srl a6, a4, t0 +; RV32I-NEXT: sub a7, t2, t0 +; RV32I-NEXT: slli t3, a5, 1 +; RV32I-NEXT: sll a7, t3, a7 +; RV32I-NEXT: or t6, a6, a7 ; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: addi t6, a2, -32 -; RV32I-NEXT: bgez t6, .LBB8_5 +; RV32I-NEXT: lw t5, 8(a1) +; RV32I-NEXT: addi a6, a2, -32 +; RV32I-NEXT: bgez a6, .LBB8_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a4, a5, a2 -; RV32I-NEXT: or a3, a3, a4 +; RV32I-NEXT: sll a7, t5, a2 +; RV32I-NEXT: or t6, t6, a7 ; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: addi a4, a2, -96 +; RV32I-NEXT: addi t4, a2, -96 ; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz a4, .LBB8_7 +; RV32I-NEXT: bltz t4, .LBB8_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB8_8 +; RV32I-NEXT: bgeu a2, a3, .LBB8_8 ; RV32I-NEXT: j .LBB8_9 ; RV32I-NEXT: .LBB8_7: -; RV32I-NEXT: sll a7, t4, t3 -; RV32I-NEXT: bltu a2, a6, .LBB8_9 +; RV32I-NEXT: sll a7, a4, t3 +; RV32I-NEXT: bltu a2, a3, .LBB8_9 ; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv t6, a7 ; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: mv a7, t5 ; RV32I-NEXT: beqz a2, .LBB8_11 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, a3 +; RV32I-NEXT: mv a7, t6 ; RV32I-NEXT: .LBB8_11: -; RV32I-NEXT: lw s0, 12(a1) +; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: sub t2, t2, a2 -; RV32I-NEXT: bltz t6, .LBB8_13 +; RV32I-NEXT: bltz a6, .LBB8_13 ; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: sll a5, a5, t6 -; RV32I-NEXT: bltz t5, .LBB8_14 +; RV32I-NEXT: sll t5, t5, a6 +; RV32I-NEXT: bltz t1, .LBB8_14 ; RV32I-NEXT: j .LBB8_15 ; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: sll a3, s0, a2 -; RV32I-NEXT: srli a5, a5, 1 -; RV32I-NEXT: srl a5, a5, t2 -; RV32I-NEXT: or a5, a3, a5 -; RV32I-NEXT: bgez t5, .LBB8_15 +; RV32I-NEXT: sll t6, a1, a2 +; RV32I-NEXT: srli t5, t5, 1 +; RV32I-NEXT: srl t5, t5, t2 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: bgez t1, .LBB8_15 ; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: srl a3, t0, t1 -; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: srl t0, a5, t0 +; RV32I-NEXT: or t5, t5, t0 ; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: srli a3, t4, 1 -; RV32I-NEXT: bltz a4, .LBB8_17 +; RV32I-NEXT: srli t0, a4, 1 +; RV32I-NEXT: bltz t4, .LBB8_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sll a4, t4, a4 -; RV32I-NEXT: bgeu a2, a6, .LBB8_18 +; RV32I-NEXT: sll t1, a4, t4 +; RV32I-NEXT: bgeu a2, a3, .LBB8_18 ; RV32I-NEXT: j .LBB8_19 ; RV32I-NEXT: .LBB8_17: -; RV32I-NEXT: li a4, 95 -; RV32I-NEXT: sub a4, a4, a2 -; RV32I-NEXT: srl a4, a3, a4 -; RV32I-NEXT: sll a1, t0, t3 -; RV32I-NEXT: or a4, a1, a4 -; RV32I-NEXT: bltu a2, a6, .LBB8_19 +; RV32I-NEXT: li t1, 95 +; RV32I-NEXT: sub t1, t1, a2 +; RV32I-NEXT: srl t1, t0, t1 +; RV32I-NEXT: sll t3, a5, t3 +; RV32I-NEXT: or t1, t3, t1 +; RV32I-NEXT: bltu a2, a3, .LBB8_19 ; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: mv t5, t1 ; RV32I-NEXT: .LBB8_19: ; RV32I-NEXT: bnez a2, .LBB8_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz t6, .LBB8_23 +; RV32I-NEXT: bltz a6, .LBB8_23 ; RV32I-NEXT: .LBB8_21: -; RV32I-NEXT: sll a3, t4, t6 -; RV32I-NEXT: bgeu a2, a6, .LBB8_24 +; RV32I-NEXT: sll a5, a4, a6 +; RV32I-NEXT: bgeu a2, a3, .LBB8_24 ; RV32I-NEXT: j .LBB8_25 ; RV32I-NEXT: .LBB8_22: -; RV32I-NEXT: mv s0, a5 -; RV32I-NEXT: bgez t6, .LBB8_21 +; RV32I-NEXT: mv a1, t5 +; RV32I-NEXT: bgez a6, .LBB8_21 ; RV32I-NEXT: .LBB8_23: -; RV32I-NEXT: sll a1, t0, a2 -; RV32I-NEXT: srl a3, a3, t2 -; RV32I-NEXT: or a3, a1, a3 -; RV32I-NEXT: bltu a2, a6, .LBB8_25 +; RV32I-NEXT: sll a5, a5, a2 +; RV32I-NEXT: srl t0, t0, t2 +; RV32I-NEXT: or a5, a5, t0 +; RV32I-NEXT: bltu a2, a3, .LBB8_25 ; RV32I-NEXT: .LBB8_24: -; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: .LBB8_25: -; RV32I-NEXT: bltz t6, .LBB8_27 +; RV32I-NEXT: bltz a6, .LBB8_27 ; RV32I-NEXT: # %bb.26: ; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a6, .LBB8_28 +; RV32I-NEXT: bgeu a2, a3, .LBB8_28 ; RV32I-NEXT: j .LBB8_29 ; RV32I-NEXT: .LBB8_27: -; RV32I-NEXT: sll a4, t4, a2 -; RV32I-NEXT: bltu a2, a6, .LBB8_29 +; RV32I-NEXT: sll a4, a4, a2 +; RV32I-NEXT: bltu a2, a3, .LBB8_29 ; RV32I-NEXT: .LBB8_28: ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: .LBB8_29: ; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw s0, 12(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: sw a7, 8(a0) -; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl128: @@ -606,69 +596,69 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-LABEL: fshr128_minsize: ; RV32I: # %bb.0: -; RV32I-NEXT: lw t2, 8(a1) -; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw t2, 0(a1) ; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a7, 4(a1) -; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: andi a1, a2, 64 -; RV32I-NEXT: mv a5, a7 -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: beqz a1, .LBB10_2 +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: andi t1, a2, 64 +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: mv a4, t2 +; RV32I-NEXT: beqz t1, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a5, t1 -; RV32I-NEXT: mv a6, t2 +; RV32I-NEXT: mv t0, a1 +; RV32I-NEXT: mv a4, a3 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: andi a4, a2, 32 -; RV32I-NEXT: mv t0, a6 -; RV32I-NEXT: bnez a4, .LBB10_13 +; RV32I-NEXT: andi a6, a2, 32 +; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: bnez a6, .LBB10_13 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: bnez a1, .LBB10_14 +; RV32I-NEXT: bnez t1, .LBB10_14 ; RV32I-NEXT: .LBB10_4: -; RV32I-NEXT: beqz a4, .LBB10_6 +; RV32I-NEXT: beqz a6, .LBB10_6 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: mv a5, t2 +; RV32I-NEXT: mv t0, a3 ; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: slli t3, a5, 1 -; RV32I-NEXT: not a3, a2 -; RV32I-NEXT: beqz a1, .LBB10_8 +; RV32I-NEXT: slli t3, t0, 1 +; RV32I-NEXT: not t2, a2 +; RV32I-NEXT: beqz t1, .LBB10_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: mv a1, a7 ; RV32I-NEXT: .LBB10_8: -; RV32I-NEXT: srl a7, t0, a2 -; RV32I-NEXT: sll a1, t3, a3 -; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: beqz a4, .LBB10_10 +; RV32I-NEXT: srl a7, a5, a2 +; RV32I-NEXT: sll t1, t3, t2 +; RV32I-NEXT: srl t0, t0, a2 +; RV32I-NEXT: beqz a6, .LBB10_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv t2, t1 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB10_10: -; RV32I-NEXT: or a7, a1, a7 -; RV32I-NEXT: slli a1, t2, 1 -; RV32I-NEXT: sll a1, a1, a3 -; RV32I-NEXT: or a5, a1, a5 -; RV32I-NEXT: srl a1, t2, a2 -; RV32I-NEXT: beqz a4, .LBB10_12 +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: slli t1, a3, 1 +; RV32I-NEXT: sll t1, t1, t2 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: srl a3, a3, a2 +; RV32I-NEXT: beqz a6, .LBB10_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: slli a4, t1, 1 -; RV32I-NEXT: sll a4, a4, a3 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: srl a2, t1, a2 -; RV32I-NEXT: slli a4, t0, 1 -; RV32I-NEXT: sll a3, a4, a3 -; RV32I-NEXT: or a2, a3, a2 -; RV32I-NEXT: sw a2, 12(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: slli a4, a1, 1 +; RV32I-NEXT: sll a4, a4, t2 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slli a2, a5, 1 +; RV32I-NEXT: sll a2, a2, t2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw t0, 4(a0) ; RV32I-NEXT: sw a7, 0(a0) ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB10_13: -; RV32I-NEXT: mv t0, a5 -; RV32I-NEXT: beqz a1, .LBB10_4 +; RV32I-NEXT: mv a5, t0 +; RV32I-NEXT: beqz t1, .LBB10_4 ; RV32I-NEXT: .LBB10_14: -; RV32I-NEXT: mv t2, a3 -; RV32I-NEXT: bnez a4, .LBB10_5 +; RV32I-NEXT: mv a3, t2 +; RV32I-NEXT: bnez a6, .LBB10_5 ; RV32I-NEXT: j .LBB10_6 ; ; RV64I-LABEL: fshr128_minsize: diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 6bae641408887..2b7f579c9b2ce 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -306,13 +306,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV32-NEXT: lbu a1, 12(s0) ; RV32-NEXT: lw a2, 8(s0) ; RV32-NEXT: andi a3, a0, 1 -; RV32-NEXT: neg s2, a3 +; RV32-NEXT: neg s1, a3 ; RV32-NEXT: slli a3, a1, 30 ; RV32-NEXT: srli a4, a2, 2 -; RV32-NEXT: or s3, a4, a3 +; RV32-NEXT: or s2, a4, a3 ; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: neg s1, a1 +; RV32-NEXT: neg s3, a1 ; RV32-NEXT: slli a1, a2, 31 ; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: or a0, a0, a1 @@ -327,17 +327,17 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV32-NEXT: mv s6, a1 ; RV32-NEXT: li a2, -5 ; RV32-NEXT: li a3, -1 -; RV32-NEXT: mv a0, s3 -; RV32-NEXT: mv a1, s1 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: mv a1, s3 ; RV32-NEXT: call __moddi3@plt -; RV32-NEXT: mv s1, a0 +; RV32-NEXT: mv s2, a0 ; RV32-NEXT: mv s3, a1 ; RV32-NEXT: li a2, 6 ; RV32-NEXT: mv a0, s4 -; RV32-NEXT: mv a1, s2 +; RV32-NEXT: mv a1, s1 ; RV32-NEXT: li a3, 0 ; RV32-NEXT: call __moddi3@plt -; RV32-NEXT: xori a2, s1, 2 +; RV32-NEXT: xori a2, s2, 2 ; RV32-NEXT: or a2, a2, s3 ; RV32-NEXT: snez a2, a2 ; RV32-NEXT: xori a3, s5, 1 @@ -460,13 +460,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV32M-NEXT: lbu a1, 12(s0) ; RV32M-NEXT: lw a2, 8(s0) ; RV32M-NEXT: andi a3, a0, 1 -; RV32M-NEXT: neg s2, a3 +; RV32M-NEXT: neg s1, a3 ; RV32M-NEXT: slli a3, a1, 30 ; RV32M-NEXT: srli a4, a2, 2 -; RV32M-NEXT: or s3, a4, a3 +; RV32M-NEXT: or s2, a4, a3 ; RV32M-NEXT: srli a1, a1, 2 ; RV32M-NEXT: andi a1, a1, 1 -; RV32M-NEXT: neg s1, a1 +; RV32M-NEXT: neg s3, a1 ; RV32M-NEXT: slli a1, a2, 31 ; RV32M-NEXT: srli a0, a0, 1 ; RV32M-NEXT: or a0, a0, a1 @@ -481,17 +481,17 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind { ; RV32M-NEXT: mv s6, a1 ; RV32M-NEXT: li a2, -5 ; RV32M-NEXT: li a3, -1 -; RV32M-NEXT: mv a0, s3 -; RV32M-NEXT: mv a1, s1 +; RV32M-NEXT: mv a0, s2 +; RV32M-NEXT: mv a1, s3 ; RV32M-NEXT: call __moddi3@plt -; RV32M-NEXT: mv s1, a0 +; RV32M-NEXT: mv s2, a0 ; RV32M-NEXT: mv s3, a1 ; RV32M-NEXT: li a2, 6 ; RV32M-NEXT: mv a0, s4 -; RV32M-NEXT: mv a1, s2 +; RV32M-NEXT: mv a1, s1 ; RV32M-NEXT: li a3, 0 ; RV32M-NEXT: call __moddi3@plt -; RV32M-NEXT: xori a2, s1, 2 +; RV32M-NEXT: xori a2, s2, 2 ; RV32M-NEXT: or a2, a2, s3 ; RV32M-NEXT: snez a2, a2 ; RV32M-NEXT: xori a3, s5, 1 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index 0295f955292b1..a45cb88adb5f8 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -18,44 +18,42 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 12(a1) -; RV32I-NEXT: lh s3, 8(a1) -; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh s0, 12(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh s2, 4(a1) ; RV32I-NEXT: lh a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, -124 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 98 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, -1003 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_srem_vec_1: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 12(a1) +; RV32IM-NEXT: lh a2, 12(a1) ; RV32IM-NEXT: lh a3, 8(a1) ; RV32IM-NEXT: lh a4, 0(a1) ; RV32IM-NEXT: lh a1, 4(a1) @@ -63,88 +61,86 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV32IM-NEXT: addi a5, a5, 389 ; RV32IM-NEXT: mulh a5, a4, a5 ; RV32IM-NEXT: add a5, a5, a4 -; RV32IM-NEXT: srli a2, a5, 31 +; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a2, a4, a2 -; RV32IM-NEXT: lui a4, 507375 -; RV32IM-NEXT: addi a4, a4, 1981 -; RV32IM-NEXT: mulh a4, a1, a4 -; RV32IM-NEXT: sub a4, a4, a1 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srli a4, a4, 6 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: li a5, -124 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a1, a1, a4 -; RV32IM-NEXT: lui a4, 342392 -; RV32IM-NEXT: addi a4, a4, 669 -; RV32IM-NEXT: mulh a4, a3, a4 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srli a4, a4, 5 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: li a5, 98 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a3, a3, a4 -; RV32IM-NEXT: lui a4, 780943 -; RV32IM-NEXT: addi a4, a4, 1809 -; RV32IM-NEXT: mulh a4, a6, a4 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srli a4, a4, 8 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: li a5, -1003 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a4, a6, a4 -; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: lui a5, 507375 +; RV32IM-NEXT: addi a5, a5, 1981 +; RV32IM-NEXT: mulh a5, a1, a5 +; RV32IM-NEXT: sub a5, a5, a1 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, -124 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: lui a5, 342392 +; RV32IM-NEXT: addi a5, a5, 669 +; RV32IM-NEXT: mulh a5, a3, a5 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 5 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, 98 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: lui a5, 780943 +; RV32IM-NEXT: addi a5, a5, 1809 +; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 8 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, -1003 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_1: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 24(a1) -; RV64I-NEXT: lh s3, 16(a1) -; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lh s0, 24(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh s2, 8(a1) ; RV64I-NEXT: lh a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, -124 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 98 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, -1003 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_srem_vec_1: @@ -152,45 +148,45 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lh a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) -; RV64IM-NEXT: lh a6, 24(a1) -; RV64IM-NEXT: lh a7, 16(a1) +; RV64IM-NEXT: lh a4, 24(a1) +; RV64IM-NEXT: lh a5, 16(a1) ; RV64IM-NEXT: lh a1, 8(a1) ; RV64IM-NEXT: mulh a3, a2, a3 ; RV64IM-NEXT: add a3, a3, a2 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: lui a6, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6) +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a3, a3, a7 ; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulh a3, a1, a4 +; RV64IM-NEXT: mulh a3, a1, a6 ; RV64IM-NEXT: sub a3, a3, a1 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) -; RV64IM-NEXT: li a5, -124 -; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: lui a6, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6) +; RV64IM-NEXT: li a7, -124 +; RV64IM-NEXT: mulw a3, a3, a7 ; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: mulh a3, a7, a4 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulh a3, a5, a6 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 5 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) -; RV64IM-NEXT: li a5, 98 -; RV64IM-NEXT: mulw a3, a3, a5 -; RV64IM-NEXT: subw a3, a7, a3 -; RV64IM-NEXT: mulh a4, a6, a4 -; RV64IM-NEXT: srli a5, a4, 63 -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: addw a4, a4, a5 -; RV64IM-NEXT: li a5, -1003 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw a4, a6, a4 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: lui a6, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6) +; RV64IM-NEXT: li a7, 98 +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a3, a5, a3 +; RV64IM-NEXT: mulh a5, a4, a6 +; RV64IM-NEXT: srli a6, a5, 63 +; RV64IM-NEXT: srli a5, a5, 7 +; RV64IM-NEXT: addw a5, a5, a6 +; RV64IM-NEXT: li a6, -1003 +; RV64IM-NEXT: mulw a5, a5, a6 +; RV64IM-NEXT: subw a4, a4, a5 ; RV64IM-NEXT: sh a4, 6(a0) ; RV64IM-NEXT: sh a3, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) @@ -210,126 +206,122 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 12(a1) -; RV32I-NEXT: lh s3, 8(a1) -; RV32I-NEXT: lh s0, 4(a1) +; RV32I-NEXT: lh s0, 12(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh s2, 4(a1) ; RV32I-NEXT: lh a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_srem_vec_2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 12(a1) +; RV32IM-NEXT: lh a2, 12(a1) ; RV32IM-NEXT: lh a3, 8(a1) ; RV32IM-NEXT: lh a4, 0(a1) ; RV32IM-NEXT: lh a1, 4(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a2, a4, a5 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: srli a7, a2, 31 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: add a2, a2, a7 +; RV32IM-NEXT: mulh a6, a4, a5 +; RV32IM-NEXT: add a6, a6, a4 +; RV32IM-NEXT: srli a7, a6, 31 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: add a6, a6, a7 ; RV32IM-NEXT: li a7, 95 -; RV32IM-NEXT: mul a2, a2, a7 -; RV32IM-NEXT: sub t0, a4, a2 -; RV32IM-NEXT: mulh a4, a1, a5 -; RV32IM-NEXT: add a4, a4, a1 -; RV32IM-NEXT: srli a2, a4, 31 -; RV32IM-NEXT: srli a4, a4, 6 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: mul a2, a2, a7 -; RV32IM-NEXT: sub a1, a1, a2 -; RV32IM-NEXT: mulh a2, a3, a5 -; RV32IM-NEXT: add a2, a2, a3 -; RV32IM-NEXT: srli a4, a2, 31 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: mul a2, a2, a7 -; RV32IM-NEXT: sub a2, a3, a2 -; RV32IM-NEXT: mulh a3, a6, a5 -; RV32IM-NEXT: add a3, a3, a6 -; RV32IM-NEXT: srli a4, a3, 31 -; RV32IM-NEXT: srli a3, a3, 6 -; RV32IM-NEXT: add a3, a3, a4 -; RV32IM-NEXT: mul a3, a3, a7 -; RV32IM-NEXT: sub a3, a6, a3 -; RV32IM-NEXT: sh a3, 6(a0) -; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a4, a4, a6 +; RV32IM-NEXT: mulh a6, a1, a5 +; RV32IM-NEXT: add a6, a6, a1 +; RV32IM-NEXT: srli t0, a6, 31 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: add a6, a6, t0 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a1, a1, a6 +; RV32IM-NEXT: mulh a6, a3, a5 +; RV32IM-NEXT: add a6, a6, a3 +; RV32IM-NEXT: srli t0, a6, 31 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: add a6, a6, t0 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a3, a3, a6 +; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: add a5, a5, a2 +; RV32IM-NEXT: srli a6, a5, 31 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: mul a5, a5, a7 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh t0, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_srem_vec_2: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 24(a1) -; RV64I-NEXT: lh s3, 16(a1) -; RV64I-NEXT: lh s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lh s0, 24(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh s2, 8(a1) ; RV64I-NEXT: lh a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_srem_vec_2: @@ -337,42 +329,42 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lh a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) -; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a4, 24(a1) ; RV64IM-NEXT: lh a5, 16(a1) ; RV64IM-NEXT: lh a1, 8(a1) -; RV64IM-NEXT: mulh a4, a2, a3 -; RV64IM-NEXT: add a4, a4, a2 -; RV64IM-NEXT: srli a7, a4, 63 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: addw a4, a4, a7 +; RV64IM-NEXT: mulh a6, a2, a3 +; RV64IM-NEXT: add a6, a6, a2 +; RV64IM-NEXT: srli a7, a6, 63 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, a7 ; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: mulw a4, a4, a7 -; RV64IM-NEXT: subw t0, a2, a4 -; RV64IM-NEXT: mulh a4, a1, a3 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a2, a4, 63 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: addw a2, a4, a2 -; RV64IM-NEXT: mulw a2, a2, a7 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: mulh a2, a5, a3 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a4, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: addw a2, a2, a4 -; RV64IM-NEXT: mulw a2, a2, a7 -; RV64IM-NEXT: subw a2, a5, a2 -; RV64IM-NEXT: mulh a3, a6, a3 -; RV64IM-NEXT: add a3, a3, a6 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: mulh a6, a1, a3 +; RV64IM-NEXT: add a6, a6, a1 +; RV64IM-NEXT: srli t0, a6, 63 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, t0 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a1, a1, a6 +; RV64IM-NEXT: mulh a6, a5, a3 +; RV64IM-NEXT: add a6, a6, a5 +; RV64IM-NEXT: srli t0, a6, 63 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, t0 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a5, a5, a6 +; RV64IM-NEXT: mulh a3, a4, a3 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a4 +; RV64IM-NEXT: addw a3, a3, a6 ; RV64IM-NEXT: mulw a3, a3, a7 -; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a5, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -394,47 +386,46 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 0(a1) -; RV32I-NEXT: lh s3, 4(a1) -; RV32I-NEXT: lh s4, 8(a1) -; RV32I-NEXT: lh s1, 12(a1) +; RV32I-NEXT: lh s1, 0(a1) +; RV32I-NEXT: lh s2, 4(a1) +; RV32I-NEXT: lh s3, 8(a1) +; RV32I-NEXT: lh s4, 12(a1) ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: mv s8, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __divsi3@plt -; RV32I-NEXT: mv s9, a0 -; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __divsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __divsi3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __divsi3@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __divsi3@plt ; RV32I-NEXT: add a0, s8, a0 -; RV32I-NEXT: add a1, s7, s1 -; RV32I-NEXT: add a2, s6, s4 -; RV32I-NEXT: add a3, s5, s9 +; RV32I-NEXT: add a1, s7, s2 +; RV32I-NEXT: add a2, s6, s3 +; RV32I-NEXT: add a3, s5, s4 ; RV32I-NEXT: sh a3, 6(s0) ; RV32I-NEXT: sh a2, 4(s0) ; RV32I-NEXT: sh a1, 2(s0) @@ -449,127 +440,124 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: combine_srem_sdiv: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 0(a1) +; RV32IM-NEXT: lh a2, 0(a1) ; RV32IM-NEXT: lh a3, 4(a1) ; RV32IM-NEXT: lh a4, 12(a1) ; RV32IM-NEXT: lh a1, 8(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a2, a4, a5 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: srli a7, a2, 31 -; RV32IM-NEXT: srai a2, a2, 6 -; RV32IM-NEXT: add t0, a2, a7 +; RV32IM-NEXT: mulh a6, a4, a5 +; RV32IM-NEXT: add a6, a6, a4 +; RV32IM-NEXT: srli a7, a6, 31 +; RV32IM-NEXT: srai a6, a6, 6 +; RV32IM-NEXT: add a6, a6, a7 ; RV32IM-NEXT: li a7, 95 -; RV32IM-NEXT: mul a2, t0, a7 -; RV32IM-NEXT: sub t1, a4, a2 -; RV32IM-NEXT: mulh a4, a1, a5 -; RV32IM-NEXT: add a4, a4, a1 -; RV32IM-NEXT: srli a2, a4, 31 -; RV32IM-NEXT: srai a4, a4, 6 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: mul a4, a2, a7 -; RV32IM-NEXT: sub t2, a1, a4 -; RV32IM-NEXT: mulh a4, a3, a5 -; RV32IM-NEXT: add a4, a4, a3 -; RV32IM-NEXT: srli a1, a4, 31 -; RV32IM-NEXT: srai a4, a4, 6 -; RV32IM-NEXT: add a1, a4, a1 -; RV32IM-NEXT: mul a4, a1, a7 -; RV32IM-NEXT: sub a3, a3, a4 -; RV32IM-NEXT: mulh a4, a6, a5 +; RV32IM-NEXT: mul t0, a6, a7 +; RV32IM-NEXT: sub a4, a4, t0 +; RV32IM-NEXT: mulh t0, a1, a5 +; RV32IM-NEXT: add t0, t0, a1 +; RV32IM-NEXT: srli t1, t0, 31 +; RV32IM-NEXT: srai t0, t0, 6 +; RV32IM-NEXT: add t0, t0, t1 +; RV32IM-NEXT: mul t1, t0, a7 +; RV32IM-NEXT: sub a1, a1, t1 +; RV32IM-NEXT: mulh t1, a3, a5 +; RV32IM-NEXT: add t1, t1, a3 +; RV32IM-NEXT: srli t2, t1, 31 +; RV32IM-NEXT: srai t1, t1, 6 +; RV32IM-NEXT: add t1, t1, t2 +; RV32IM-NEXT: mul t2, t1, a7 +; RV32IM-NEXT: sub a3, a3, t2 +; RV32IM-NEXT: mulh a5, a2, a5 +; RV32IM-NEXT: add a5, a5, a2 +; RV32IM-NEXT: srli t2, a5, 31 +; RV32IM-NEXT: srai a5, a5, 6 +; RV32IM-NEXT: add a5, a5, t2 +; RV32IM-NEXT: mul a7, a5, a7 +; RV32IM-NEXT: sub a2, a2, a7 +; RV32IM-NEXT: add a2, a2, a5 +; RV32IM-NEXT: add a3, a3, t1 +; RV32IM-NEXT: add a1, a1, t0 ; RV32IM-NEXT: add a4, a4, a6 -; RV32IM-NEXT: srli a5, a4, 31 -; RV32IM-NEXT: srai a4, a4, 6 -; RV32IM-NEXT: add a4, a4, a5 -; RV32IM-NEXT: mul a5, a4, a7 -; RV32IM-NEXT: sub a5, a6, a5 -; RV32IM-NEXT: add a4, a5, a4 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: add a2, t2, a2 -; RV32IM-NEXT: add a3, t1, t0 -; RV32IM-NEXT: sh a3, 6(a0) -; RV32IM-NEXT: sh a2, 4(a0) -; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a4, 0(a0) +; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_srem_sdiv: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 0(a1) -; RV64I-NEXT: lh s3, 8(a1) -; RV64I-NEXT: lh s4, 16(a1) -; RV64I-NEXT: lh s1, 24(a1) +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lh s1, 0(a1) +; RV64I-NEXT: lh s2, 8(a1) +; RV64I-NEXT: lh s3, 16(a1) +; RV64I-NEXT: lh s4, 24(a1) ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s5, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s6, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s7, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: mv s8, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __divdi3@plt -; RV64I-NEXT: mv s9, a0 -; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __divdi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __divdi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __divdi3@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __divdi3@plt ; RV64I-NEXT: addw a0, s8, a0 -; RV64I-NEXT: addw a1, s7, s1 -; RV64I-NEXT: addw a2, s6, s4 -; RV64I-NEXT: addw a3, s5, s9 +; RV64I-NEXT: addw a1, s7, s2 +; RV64I-NEXT: addw a2, s6, s3 +; RV64I-NEXT: addw a3, s5, s4 ; RV64I-NEXT: sh a3, 6(s0) ; RV64I-NEXT: sh a2, 4(s0) ; RV64I-NEXT: sh a1, 2(s0) ; RV64I-NEXT: sh a0, 0(s0) -; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: combine_srem_sdiv: @@ -577,45 +565,45 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lh a2, 24(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) -; RV64IM-NEXT: lh a6, 0(a1) +; RV64IM-NEXT: lh a4, 0(a1) ; RV64IM-NEXT: lh a5, 8(a1) ; RV64IM-NEXT: lh a1, 16(a1) -; RV64IM-NEXT: mulh a4, a2, a3 -; RV64IM-NEXT: add a4, a4, a2 -; RV64IM-NEXT: srli a7, a4, 63 -; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw t0, a4, a7 +; RV64IM-NEXT: mulh a6, a2, a3 +; RV64IM-NEXT: add a6, a6, a2 +; RV64IM-NEXT: srli a7, a6, 63 +; RV64IM-NEXT: srai a6, a6, 6 +; RV64IM-NEXT: addw a6, a6, a7 ; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: mulw a4, t0, a7 -; RV64IM-NEXT: subw t1, a2, a4 -; RV64IM-NEXT: mulh a4, a1, a3 -; RV64IM-NEXT: add a4, a4, a1 -; RV64IM-NEXT: srli a2, a4, 63 -; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw a2, a4, a2 -; RV64IM-NEXT: mulw a4, a2, a7 -; RV64IM-NEXT: subw t2, a1, a4 -; RV64IM-NEXT: mulh a4, a5, a3 -; RV64IM-NEXT: add a4, a4, a5 -; RV64IM-NEXT: srli a1, a4, 63 -; RV64IM-NEXT: srai a4, a4, 6 -; RV64IM-NEXT: addw a1, a4, a1 -; RV64IM-NEXT: mulw a4, a1, a7 -; RV64IM-NEXT: subw a4, a5, a4 -; RV64IM-NEXT: mulh a3, a6, a3 -; RV64IM-NEXT: add a3, a3, a6 -; RV64IM-NEXT: srli a5, a3, 63 +; RV64IM-NEXT: mulw t0, a6, a7 +; RV64IM-NEXT: subw a2, a2, t0 +; RV64IM-NEXT: mulh t0, a1, a3 +; RV64IM-NEXT: add t0, t0, a1 +; RV64IM-NEXT: srli t1, t0, 63 +; RV64IM-NEXT: srai t0, t0, 6 +; RV64IM-NEXT: addw t0, t0, t1 +; RV64IM-NEXT: mulw t1, t0, a7 +; RV64IM-NEXT: subw a1, a1, t1 +; RV64IM-NEXT: mulh t1, a5, a3 +; RV64IM-NEXT: add t1, t1, a5 +; RV64IM-NEXT: srli t2, t1, 63 +; RV64IM-NEXT: srai t1, t1, 6 +; RV64IM-NEXT: addw t1, t1, t2 +; RV64IM-NEXT: mulw t2, t1, a7 +; RV64IM-NEXT: subw a5, a5, t2 +; RV64IM-NEXT: mulh a3, a4, a3 +; RV64IM-NEXT: add a3, a3, a4 +; RV64IM-NEXT: srli t2, a3, 63 ; RV64IM-NEXT: srai a3, a3, 6 -; RV64IM-NEXT: addw a3, a3, a5 -; RV64IM-NEXT: mulw a5, a3, a7 -; RV64IM-NEXT: subw a5, a6, a5 -; RV64IM-NEXT: addw a3, a5, a3 -; RV64IM-NEXT: addw a1, a4, a1 -; RV64IM-NEXT: addw a2, t2, a2 -; RV64IM-NEXT: addw a4, t1, t0 -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) -; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: addw a3, a3, t2 +; RV64IM-NEXT: mulw a7, a3, a7 +; RV64IM-NEXT: subw a4, a4, a7 +; RV64IM-NEXT: addw a3, a4, a3 +; RV64IM-NEXT: addw a4, a5, t1 +; RV64IM-NEXT: addw a1, a1, t0 +; RV64IM-NEXT: addw a2, a2, a6 +; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, @@ -642,21 +630,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: srli a4, a2, 26 ; RV32I-NEXT: add a4, a2, a4 ; RV32I-NEXT: andi a4, a4, -64 -; RV32I-NEXT: sub s2, a2, a4 +; RV32I-NEXT: sub s1, a2, a4 ; RV32I-NEXT: srli a2, a1, 27 ; RV32I-NEXT: add a2, a1, a2 ; RV32I-NEXT: andi a2, a2, -32 -; RV32I-NEXT: sub s3, a1, a2 +; RV32I-NEXT: sub s2, a1, a2 ; RV32I-NEXT: srli a1, a3, 29 ; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: andi a1, a1, -8 -; RV32I-NEXT: sub s1, a3, a1 +; RV32I-NEXT: sub s3, a3, a1 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) -; RV32I-NEXT: sh s3, 2(s0) -; RV32I-NEXT: sh s2, 0(s0) +; RV32I-NEXT: sh s3, 4(s0) +; RV32I-NEXT: sh s2, 2(s0) +; RV32I-NEXT: sh s1, 0(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -677,9 +665,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV32IM-NEXT: add a5, a5, a4 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 -; RV32IM-NEXT: add a6, a5, a6 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a5, a6, a5 +; RV32IM-NEXT: add a5, a5, a6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 ; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: srli a5, a1, 26 ; RV32IM-NEXT: add a5, a1, a5 @@ -715,21 +703,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: srli a4, a2, 58 ; RV64I-NEXT: add a4, a2, a4 ; RV64I-NEXT: andi a4, a4, -64 -; RV64I-NEXT: subw s2, a2, a4 +; RV64I-NEXT: subw s1, a2, a4 ; RV64I-NEXT: srli a2, a1, 59 ; RV64I-NEXT: add a2, a1, a2 ; RV64I-NEXT: andi a2, a2, -32 -; RV64I-NEXT: subw s3, a1, a2 +; RV64I-NEXT: subw s2, a1, a2 ; RV64I-NEXT: srli a1, a3, 61 ; RV64I-NEXT: add a1, a3, a1 ; RV64I-NEXT: andi a1, a1, -8 -; RV64I-NEXT: subw s1, a3, a1 +; RV64I-NEXT: subw s3, a3, a1 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) -; RV64I-NEXT: sh s3, 2(s0) -; RV64I-NEXT: sh s2, 0(s0) +; RV64I-NEXT: sh s3, 4(s0) +; RV64I-NEXT: sh s2, 2(s0) +; RV64I-NEXT: sh s1, 0(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -750,9 +738,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; RV64IM-NEXT: add a3, a3, a2 ; RV64IM-NEXT: srli a6, a3, 63 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: addw a6, a3, a6 -; RV64IM-NEXT: li a3, 95 -; RV64IM-NEXT: mulw a3, a6, a3 +; RV64IM-NEXT: addw a3, a3, a6 +; RV64IM-NEXT: li a6, 95 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 58 ; RV64IM-NEXT: add a3, a1, a3 @@ -785,10 +773,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh s0, 12(a1) ; RV32I-NEXT: lh s1, 8(a1) ; RV32I-NEXT: lh a2, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 654 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3@plt @@ -799,12 +787,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) -; RV32I-NEXT: sh s3, 2(s0) -; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh a0, 6(s2) +; RV32I-NEXT: sh s1, 4(s2) +; RV32I-NEXT: sh s3, 2(s2) +; RV32I-NEXT: sh zero, 0(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -862,10 +850,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh s0, 24(a1) ; RV64I-NEXT: lh s1, 16(a1) ; RV64I-NEXT: lh a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt @@ -876,12 +864,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) -; RV64I-NEXT: sh s3, 2(s0) -; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh a0, 6(s2) +; RV64I-NEXT: sh s1, 4(s2) +; RV64I-NEXT: sh s3, 2(s2) +; RV64I-NEXT: sh zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -895,7 +883,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lh a2, 16(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI4_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3) -; RV64IM-NEXT: lh a6, 24(a1) +; RV64IM-NEXT: lh a4, 24(a1) ; RV64IM-NEXT: lh a1, 8(a1) ; RV64IM-NEXT: mulh a3, a2, a3 ; RV64IM-NEXT: add a3, a3, a2 @@ -904,26 +892,26 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: addw a3, a3, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) -; RV64IM-NEXT: li a4, 23 -; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: mulh a3, a1, a5 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 8 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI4_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI4_2)(a4) -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a1, a1, a3 -; RV64IM-NEXT: mulh a3, a6, a4 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 11 -; RV64IM-NEXT: addw a3, a3, a4 -; RV64IM-NEXT: lui a4, 1 -; RV64IM-NEXT: addiw a4, a4, 1327 -; RV64IM-NEXT: mulw a3, a3, a4 -; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: addw a3, a3, a5 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh zero, 0(a0) ; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a1, 2(a0) @@ -945,7 +933,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lh a2, 4(a1) ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lh s2, 12(a1) +; RV32I-NEXT: lh s1, 12(a1) ; RV32I-NEXT: lh a0, 8(a1) ; RV32I-NEXT: srli a1, a2, 17 ; RV32I-NEXT: add a1, a2, a1 @@ -954,13 +942,13 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV32I-NEXT: sub s3, a2, a1 ; RV32I-NEXT: li a1, 23 ; RV32I-NEXT: call __modsi3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3@plt ; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh s2, 4(s0) ; RV32I-NEXT: sh zero, 0(s0) ; RV32I-NEXT: sh s3, 2(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1017,7 +1005,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lh a2, 8(a1) ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lh s2, 24(a1) +; RV64I-NEXT: lh s1, 24(a1) ; RV64I-NEXT: lh a0, 16(a1) ; RV64I-NEXT: srli a1, a2, 49 ; RV64I-NEXT: add a1, a2, a1 @@ -1026,13 +1014,13 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64I-NEXT: subw s3, a2, a1 ; RV64I-NEXT: li a1, 23 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3@plt ; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh s2, 4(s0) ; RV64I-NEXT: sh zero, 0(s0) ; RV64I-NEXT: sh s3, 2(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -1054,13 +1042,13 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 4 ; RV64IM-NEXT: addw a3, a3, a5 -; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: lui a5, %hi(.LCPI5_1) -; RV64IM-NEXT: ld a5, %lo(.LCPI5_1)(a5) -; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: li a5, 23 +; RV64IM-NEXT: lui a6, %hi(.LCPI5_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI5_1)(a6) +; RV64IM-NEXT: mulw a3, a3, a5 ; RV64IM-NEXT: lh a1, 8(a1) ; RV64IM-NEXT: subw a2, a2, a3 -; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: mulh a3, a4, a6 ; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srli a3, a3, 11 ; RV64IM-NEXT: addw a3, a3, a5 @@ -1097,16 +1085,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s2, 24(a1) -; RV32I-NEXT: lw s3, 28(a1) -; RV32I-NEXT: lw s4, 16(a1) -; RV32I-NEXT: lw s5, 20(a1) -; RV32I-NEXT: lw s6, 8(a1) -; RV32I-NEXT: lw s1, 12(a1) +; RV32I-NEXT: lw s0, 24(a1) +; RV32I-NEXT: lw s1, 28(a1) +; RV32I-NEXT: lw s2, 16(a1) +; RV32I-NEXT: lw s3, 20(a1) +; RV32I-NEXT: lw s4, 8(a1) +; RV32I-NEXT: lw s5, 12(a1) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: li a3, 0 @@ -1114,33 +1101,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: mv s8, a1 ; RV32I-NEXT: li a2, 654 -; RV32I-NEXT: mv a0, s6 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __moddi3@plt -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: mv s9, a1 -; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s5 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3@plt -; RV32I-NEXT: sw a1, 28(s0) -; RV32I-NEXT: sw a0, 24(s0) -; RV32I-NEXT: sw s1, 20(s0) -; RV32I-NEXT: sw s4, 16(s0) -; RV32I-NEXT: sw s9, 12(s0) -; RV32I-NEXT: sw s6, 8(s0) -; RV32I-NEXT: sw s8, 4(s0) -; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __moddi3@plt +; RV32I-NEXT: sw a1, 28(s6) +; RV32I-NEXT: sw a0, 24(s6) +; RV32I-NEXT: sw s3, 20(s6) +; RV32I-NEXT: sw s2, 16(s6) +; RV32I-NEXT: sw s5, 12(s6) +; RV32I-NEXT: sw s4, 8(s6) +; RV32I-NEXT: sw s8, 4(s6) +; RV32I-NEXT: sw s7, 0(s6) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1151,7 +1138,6 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; @@ -1168,16 +1154,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s2, 24(a1) -; RV32IM-NEXT: lw s3, 28(a1) -; RV32IM-NEXT: lw s4, 16(a1) -; RV32IM-NEXT: lw s5, 20(a1) -; RV32IM-NEXT: lw s6, 8(a1) -; RV32IM-NEXT: lw s1, 12(a1) +; RV32IM-NEXT: lw s0, 24(a1) +; RV32IM-NEXT: lw s1, 28(a1) +; RV32IM-NEXT: lw s2, 16(a1) +; RV32IM-NEXT: lw s3, 20(a1) +; RV32IM-NEXT: lw s4, 8(a1) +; RV32IM-NEXT: lw s5, 12(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a1, 4(a1) -; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: mv s6, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: li a3, 0 @@ -1185,33 +1170,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: mv s7, a0 ; RV32IM-NEXT: mv s8, a1 ; RV32IM-NEXT: li a2, 654 -; RV32IM-NEXT: mv a0, s6 -; RV32IM-NEXT: mv a1, s1 -; RV32IM-NEXT: li a3, 0 -; RV32IM-NEXT: call __moddi3@plt -; RV32IM-NEXT: mv s6, a0 -; RV32IM-NEXT: mv s9, a1 -; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s4 ; RV32IM-NEXT: mv a1, s5 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3@plt ; RV32IM-NEXT: mv s4, a0 -; RV32IM-NEXT: mv s1, a1 -; RV32IM-NEXT: lui a0, 1 -; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv s5, a1 +; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s2 ; RV32IM-NEXT: mv a1, s3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3@plt -; RV32IM-NEXT: sw a1, 28(s0) -; RV32IM-NEXT: sw a0, 24(s0) -; RV32IM-NEXT: sw s1, 20(s0) -; RV32IM-NEXT: sw s4, 16(s0) -; RV32IM-NEXT: sw s9, 12(s0) -; RV32IM-NEXT: sw s6, 8(s0) -; RV32IM-NEXT: sw s8, 4(s0) -; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: mv s2, a0 +; RV32IM-NEXT: mv s3, a1 +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv a0, s0 +; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: li a3, 0 +; RV32IM-NEXT: call __moddi3@plt +; RV32IM-NEXT: sw a1, 28(s6) +; RV32IM-NEXT: sw a0, 24(s6) +; RV32IM-NEXT: sw s3, 20(s6) +; RV32IM-NEXT: sw s2, 16(s6) +; RV32IM-NEXT: sw s5, 12(s6) +; RV32IM-NEXT: sw s4, 8(s6) +; RV32IM-NEXT: sw s8, 4(s6) +; RV32IM-NEXT: sw s7, 0(s6) ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1222,7 +1207,6 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 48 ; RV32IM-NEXT: ret ; @@ -1234,10 +1218,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld s2, 24(a1) +; RV64I-NEXT: ld s0, 24(a1) ; RV64I-NEXT: ld s1, 16(a1) ; RV64I-NEXT: ld a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3@plt @@ -1248,12 +1232,12 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __moddi3@plt -; RV64I-NEXT: sd a0, 24(s0) -; RV64I-NEXT: sd s1, 16(s0) -; RV64I-NEXT: sd s3, 8(s0) -; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd a0, 24(s2) +; RV64I-NEXT: sd s1, 16(s2) +; RV64I-NEXT: sd s3, 8(s2) +; RV64I-NEXT: sd zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1267,7 +1251,7 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64IM-NEXT: ld a2, 16(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI6_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3) -; RV64IM-NEXT: ld a6, 24(a1) +; RV64IM-NEXT: ld a4, 24(a1) ; RV64IM-NEXT: ld a1, 8(a1) ; RV64IM-NEXT: mulh a3, a2, a3 ; RV64IM-NEXT: add a3, a3, a2 @@ -1276,26 +1260,26 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64IM-NEXT: add a3, a3, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) ; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) -; RV64IM-NEXT: li a4, 23 -; RV64IM-NEXT: mul a3, a3, a4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: mul a3, a3, a6 ; RV64IM-NEXT: sub a2, a2, a3 ; RV64IM-NEXT: mulh a3, a1, a5 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srai a3, a3, 8 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: lui a4, %hi(.LCPI6_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI6_2)(a4) -; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) +; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mul a3, a3, a6 ; RV64IM-NEXT: sub a1, a1, a3 -; RV64IM-NEXT: mulh a3, a6, a4 -; RV64IM-NEXT: srli a4, a3, 63 +; RV64IM-NEXT: mulh a3, a4, a5 +; RV64IM-NEXT: srli a5, a3, 63 ; RV64IM-NEXT: srai a3, a3, 11 -; RV64IM-NEXT: add a3, a3, a4 -; RV64IM-NEXT: lui a4, 1 -; RV64IM-NEXT: addiw a4, a4, 1327 -; RV64IM-NEXT: mul a3, a3, a4 -; RV64IM-NEXT: sub a3, a6, a3 +; RV64IM-NEXT: add a3, a3, a5 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a3, a4, a3 ; RV64IM-NEXT: sd zero, 0(a0) ; RV64IM-NEXT: sd a3, 24(a0) ; RV64IM-NEXT: sd a1, 8(a0) diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll index 7fe062176ec4f..b250773b30978 100644 --- a/llvm/test/CodeGen/RISCV/ssub_sat.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll @@ -156,16 +156,16 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; RV32IZbbZbt-NEXT: sltu a4, a0, a2 ; RV32IZbbZbt-NEXT: sub a5, a1, a3 ; RV32IZbbZbt-NEXT: sub a4, a5, a4 -; RV32IZbbZbt-NEXT: srai a6, a4, 31 -; RV32IZbbZbt-NEXT: lui a5, 524288 -; RV32IZbbZbt-NEXT: xor a7, a6, a5 -; RV32IZbbZbt-NEXT: xor a5, a1, a4 +; RV32IZbbZbt-NEXT: srai a5, a4, 31 +; RV32IZbbZbt-NEXT: lui a6, 524288 +; RV32IZbbZbt-NEXT: xor a6, a5, a6 +; RV32IZbbZbt-NEXT: xor a7, a1, a4 ; RV32IZbbZbt-NEXT: xor a1, a1, a3 -; RV32IZbbZbt-NEXT: and a1, a1, a5 +; RV32IZbbZbt-NEXT: and a1, a1, a7 ; RV32IZbbZbt-NEXT: slti a3, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a3, a7, a4 +; RV32IZbbZbt-NEXT: cmov a1, a3, a6, a4 ; RV32IZbbZbt-NEXT: sub a0, a0, a2 -; RV32IZbbZbt-NEXT: cmov a0, a3, a6, a0 +; RV32IZbbZbt-NEXT: cmov a0, a3, a5, a0 ; RV32IZbbZbt-NEXT: ret ; ; RV64IZbbZbt-LABEL: func2: diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll index 662eacc27b6aa..a7c366ce1679f 100644 --- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll @@ -164,16 +164,16 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; RV32IZbbZbt-NEXT: sltu a2, a0, a4 ; RV32IZbbZbt-NEXT: sub a3, a1, a5 ; RV32IZbbZbt-NEXT: sub a2, a3, a2 -; RV32IZbbZbt-NEXT: srai a6, a2, 31 -; RV32IZbbZbt-NEXT: lui a3, 524288 -; RV32IZbbZbt-NEXT: xor a7, a6, a3 -; RV32IZbbZbt-NEXT: xor a3, a1, a2 +; RV32IZbbZbt-NEXT: srai a3, a2, 31 +; RV32IZbbZbt-NEXT: lui a6, 524288 +; RV32IZbbZbt-NEXT: xor a6, a3, a6 +; RV32IZbbZbt-NEXT: xor a7, a1, a2 ; RV32IZbbZbt-NEXT: xor a1, a1, a5 -; RV32IZbbZbt-NEXT: and a1, a1, a3 -; RV32IZbbZbt-NEXT: slti a3, a1, 0 -; RV32IZbbZbt-NEXT: cmov a1, a3, a7, a2 +; RV32IZbbZbt-NEXT: and a1, a1, a7 +; RV32IZbbZbt-NEXT: slti a5, a1, 0 +; RV32IZbbZbt-NEXT: cmov a1, a5, a6, a2 ; RV32IZbbZbt-NEXT: sub a0, a0, a4 -; RV32IZbbZbt-NEXT: cmov a0, a3, a6, a0 +; RV32IZbbZbt-NEXT: cmov a0, a5, a3, a0 ; RV32IZbbZbt-NEXT: ret ; ; RV64IZbbZbt-LABEL: func64: diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll index 218490933333e..323ee554b67e4 100644 --- a/llvm/test/CodeGen/RISCV/stack-store-check.ll +++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll @@ -32,12 +32,12 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: lw s6, %lo(U)(a0) ; CHECK-NEXT: lw s7, %lo(U+4)(a0) ; CHECK-NEXT: lw s8, %lo(U+8)(a0) -; CHECK-NEXT: lw s2, %lo(U+12)(a0) +; CHECK-NEXT: lw s0, %lo(U+12)(a0) ; CHECK-NEXT: sw zero, 612(sp) ; CHECK-NEXT: sw zero, 608(sp) ; CHECK-NEXT: sw zero, 604(sp) ; CHECK-NEXT: sw zero, 600(sp) -; CHECK-NEXT: sw s2, 596(sp) +; CHECK-NEXT: sw s0, 596(sp) ; CHECK-NEXT: sw s8, 592(sp) ; CHECK-NEXT: sw s7, 588(sp) ; CHECK-NEXT: addi a0, sp, 616 @@ -45,21 +45,21 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: addi a2, sp, 584 ; CHECK-NEXT: sw s6, 584(sp) ; CHECK-NEXT: call __subtf3@plt -; CHECK-NEXT: lw s4, 616(sp) -; CHECK-NEXT: lw s5, 620(sp) +; CHECK-NEXT: lw s9, 616(sp) +; CHECK-NEXT: lw s2, 620(sp) ; CHECK-NEXT: lw s3, 624(sp) -; CHECK-NEXT: lw s11, 628(sp) -; CHECK-NEXT: sw s2, 548(sp) +; CHECK-NEXT: lw s4, 628(sp) +; CHECK-NEXT: sw s0, 548(sp) ; CHECK-NEXT: sw s8, 544(sp) ; CHECK-NEXT: sw s7, 540(sp) ; CHECK-NEXT: sw s6, 536(sp) -; CHECK-NEXT: sw s11, 564(sp) +; CHECK-NEXT: sw s4, 564(sp) ; CHECK-NEXT: sw s3, 560(sp) -; CHECK-NEXT: sw s5, 556(sp) +; CHECK-NEXT: sw s2, 556(sp) ; CHECK-NEXT: addi a0, sp, 568 ; CHECK-NEXT: addi a1, sp, 552 ; CHECK-NEXT: addi a2, sp, 536 -; CHECK-NEXT: sw s4, 552(sp) +; CHECK-NEXT: sw s9, 552(sp) ; CHECK-NEXT: call __subtf3@plt ; CHECK-NEXT: lw a0, 568(sp) ; CHECK-NEXT: sw a0, 40(sp) # 4-byte Folded Spill @@ -73,7 +73,7 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw zero, 496(sp) ; CHECK-NEXT: sw zero, 492(sp) ; CHECK-NEXT: sw zero, 488(sp) -; CHECK-NEXT: sw s2, 516(sp) +; CHECK-NEXT: sw s0, 516(sp) ; CHECK-NEXT: sw s8, 512(sp) ; CHECK-NEXT: sw s7, 508(sp) ; CHECK-NEXT: addi a0, sp, 520 @@ -81,10 +81,10 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: addi a2, sp, 488 ; CHECK-NEXT: sw s6, 504(sp) ; CHECK-NEXT: call __addtf3@plt -; CHECK-NEXT: lw s9, 520(sp) +; CHECK-NEXT: lw s11, 520(sp) ; CHECK-NEXT: lw s10, 524(sp) -; CHECK-NEXT: lw s0, 528(sp) -; CHECK-NEXT: sw s0, 20(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw s5, 528(sp) +; CHECK-NEXT: sw s5, 20(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw s1, 532(sp) ; CHECK-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; CHECK-NEXT: lui a0, %hi(Y1) @@ -100,13 +100,13 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw a3, 304(sp) ; CHECK-NEXT: sw a2, 300(sp) ; CHECK-NEXT: sw a1, 296(sp) -; CHECK-NEXT: sw s11, 324(sp) +; CHECK-NEXT: sw s4, 324(sp) ; CHECK-NEXT: sw s3, 320(sp) -; CHECK-NEXT: sw s5, 316(sp) +; CHECK-NEXT: sw s2, 316(sp) ; CHECK-NEXT: addi a0, sp, 328 ; CHECK-NEXT: addi a1, sp, 312 ; CHECK-NEXT: addi a2, sp, 296 -; CHECK-NEXT: sw s4, 312(sp) +; CHECK-NEXT: sw s9, 312(sp) ; CHECK-NEXT: call __multf3@plt ; CHECK-NEXT: lw a0, 328(sp) ; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill @@ -114,18 +114,18 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw a0, 336(sp) ; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw s4, 340(sp) -; CHECK-NEXT: sw s2, 468(sp) +; CHECK-NEXT: lw s9, 340(sp) +; CHECK-NEXT: sw s0, 468(sp) ; CHECK-NEXT: sw s8, 464(sp) ; CHECK-NEXT: sw s7, 460(sp) ; CHECK-NEXT: sw s6, 456(sp) ; CHECK-NEXT: sw s1, 452(sp) -; CHECK-NEXT: sw s0, 448(sp) +; CHECK-NEXT: sw s5, 448(sp) ; CHECK-NEXT: sw s10, 444(sp) ; CHECK-NEXT: addi a0, sp, 472 ; CHECK-NEXT: addi a1, sp, 456 ; CHECK-NEXT: addi a2, sp, 440 -; CHECK-NEXT: sw s9, 440(sp) +; CHECK-NEXT: sw s11, 440(sp) ; CHECK-NEXT: call __addtf3@plt ; CHECK-NEXT: lw a3, 472(sp) ; CHECK-NEXT: lw a0, 476(sp) @@ -152,31 +152,31 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw a2, %lo(X+8)(a4) ; CHECK-NEXT: sw a3, %lo(X+4)(a4) ; CHECK-NEXT: sw a0, %lo(X)(a4) -; CHECK-NEXT: lw s8, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s8, 212(sp) -; CHECK-NEXT: lw s7, 8(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s7, 208(sp) -; CHECK-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s11, 204(sp) +; CHECK-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s4, 212(sp) +; CHECK-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s3, 208(sp) +; CHECK-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s2, 204(sp) ; CHECK-NEXT: lw a0, 52(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 200(sp) ; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 228(sp) -; CHECK-NEXT: lw s3, 24(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s3, 224(sp) -; CHECK-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s2, 220(sp) +; CHECK-NEXT: lw s1, 24(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s1, 224(sp) +; CHECK-NEXT: lw s0, 32(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s0, 220(sp) ; CHECK-NEXT: addi a0, sp, 232 ; CHECK-NEXT: addi a1, sp, 216 ; CHECK-NEXT: addi a2, sp, 200 -; CHECK-NEXT: lw s1, 40(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s1, 216(sp) +; CHECK-NEXT: lw s8, 40(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s8, 216(sp) ; CHECK-NEXT: call __multf3@plt ; CHECK-NEXT: lw s5, 232(sp) ; CHECK-NEXT: lw a0, 236(sp) ; CHECK-NEXT: sw a0, 0(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw s6, 240(sp) -; CHECK-NEXT: lw s0, 244(sp) +; CHECK-NEXT: lw s7, 244(sp) ; CHECK-NEXT: sw zero, 356(sp) ; CHECK-NEXT: sw zero, 352(sp) ; CHECK-NEXT: sw zero, 348(sp) @@ -189,7 +189,7 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: addi a0, sp, 376 ; CHECK-NEXT: addi a1, sp, 360 ; CHECK-NEXT: addi a2, sp, 344 -; CHECK-NEXT: sw s9, 360(sp) +; CHECK-NEXT: sw s11, 360(sp) ; CHECK-NEXT: call __multf3@plt ; CHECK-NEXT: lw a0, 376(sp) ; CHECK-NEXT: lw a1, 388(sp) @@ -202,10 +202,10 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw a0, %lo(S)(a4) ; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 260(sp) -; CHECK-NEXT: sw s3, 256(sp) -; CHECK-NEXT: sw s2, 252(sp) -; CHECK-NEXT: sw s1, 248(sp) -; CHECK-NEXT: sw s4, 276(sp) +; CHECK-NEXT: sw s1, 256(sp) +; CHECK-NEXT: sw s0, 252(sp) +; CHECK-NEXT: sw s8, 248(sp) +; CHECK-NEXT: sw s9, 276(sp) ; CHECK-NEXT: lw a0, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 272(sp) ; CHECK-NEXT: lw a0, 36(sp) # 4-byte Folded Reload @@ -229,7 +229,7 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw zero, 160(sp) ; CHECK-NEXT: sw zero, 156(sp) ; CHECK-NEXT: sw zero, 152(sp) -; CHECK-NEXT: sw s0, 180(sp) +; CHECK-NEXT: sw s7, 180(sp) ; CHECK-NEXT: sw s6, 176(sp) ; CHECK-NEXT: lw a0, 0(sp) # 4-byte Folded Reload ; CHECK-NEXT: sw a0, 172(sp) @@ -251,9 +251,9 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw zero, 112(sp) ; CHECK-NEXT: sw zero, 108(sp) ; CHECK-NEXT: sw zero, 104(sp) -; CHECK-NEXT: sw s8, 132(sp) -; CHECK-NEXT: sw s7, 128(sp) -; CHECK-NEXT: sw s11, 124(sp) +; CHECK-NEXT: sw s4, 132(sp) +; CHECK-NEXT: sw s3, 128(sp) +; CHECK-NEXT: sw s2, 124(sp) ; CHECK-NEXT: addi a0, sp, 136 ; CHECK-NEXT: addi a1, sp, 120 ; CHECK-NEXT: addi a2, sp, 104 diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index c358b6172d468..200a8731cbd64 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -45,12 +45,12 @@ define void @caller_indirect_tail(i32 %a) nounwind { ; CHECK-NOT: tail callee_indirect2 ; CHECK: lui a0, %hi(callee_indirect2) -; CHECK-NEXT: addi a5, a0, %lo(callee_indirect2) -; CHECK-NEXT: jr a5 +; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2) +; CHECK-NEXT: jr t1 ; CHECK: lui a0, %hi(callee_indirect1) -; CHECK-NEXT: addi a5, a0, %lo(callee_indirect1) -; CHECK-NEXT: jr a5 +; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1) +; CHECK-NEXT: jr t1 entry: %tobool = icmp eq i32 %a, 0 %callee = select i1 %tobool, void ()* @callee_indirect1, void ()* @callee_indirect2 diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll index 09f74a175802d..8e2c3f350df81 100644 --- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -10,103 +10,99 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 { ; RISCV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RISCV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s6, 4(sp) # 4-byte Folded Spill -; RISCV32-NEXT: lw a6, 12(a1) +; RISCV32-NEXT: lw a3, 12(a1) ; RISCV32-NEXT: lw a7, 12(a2) -; RISCV32-NEXT: lw t3, 8(a1) +; RISCV32-NEXT: lw a6, 8(a1) ; RISCV32-NEXT: lw a4, 0(a2) ; RISCV32-NEXT: lw a5, 0(a1) -; RISCV32-NEXT: lw a3, 4(a1) -; RISCV32-NEXT: lw s2, 8(a2) +; RISCV32-NEXT: lw t3, 4(a1) +; RISCV32-NEXT: lw t0, 8(a2) ; RISCV32-NEXT: lw a2, 4(a2) ; RISCV32-NEXT: mulhu a1, a5, a4 -; RISCV32-NEXT: mul s1, a3, a4 -; RISCV32-NEXT: add a1, s1, a1 -; RISCV32-NEXT: sltu s1, a1, s1 -; RISCV32-NEXT: mulhu s0, a3, a4 -; RISCV32-NEXT: add t4, s0, s1 -; RISCV32-NEXT: mul s0, a5, a2 -; RISCV32-NEXT: add t0, s0, a1 -; RISCV32-NEXT: sltu a1, t0, s0 -; RISCV32-NEXT: mulhu s0, a5, a2 -; RISCV32-NEXT: add a1, s0, a1 -; RISCV32-NEXT: add a1, t4, a1 -; RISCV32-NEXT: mul s0, a3, a2 -; RISCV32-NEXT: add s1, s0, a1 -; RISCV32-NEXT: mul t1, s2, a5 -; RISCV32-NEXT: mul s3, t3, a4 +; RISCV32-NEXT: mul t1, t3, a4 +; RISCV32-NEXT: add a1, t1, a1 +; RISCV32-NEXT: sltu t1, a1, t1 +; RISCV32-NEXT: mulhu t2, t3, a4 +; RISCV32-NEXT: add t4, t2, t1 +; RISCV32-NEXT: mul t1, a5, a2 +; RISCV32-NEXT: add a1, t1, a1 +; RISCV32-NEXT: sltu t1, a1, t1 +; RISCV32-NEXT: mulhu t2, a5, a2 +; RISCV32-NEXT: add t1, t2, t1 +; RISCV32-NEXT: add t5, t4, t1 +; RISCV32-NEXT: mul t6, t3, a2 +; RISCV32-NEXT: add s0, t6, t5 +; RISCV32-NEXT: mul t1, t0, a5 +; RISCV32-NEXT: mul s3, a6, a4 ; RISCV32-NEXT: add s4, s3, t1 -; RISCV32-NEXT: add t1, s1, s4 -; RISCV32-NEXT: sltu t2, t1, s1 -; RISCV32-NEXT: sltu s1, s1, s0 -; RISCV32-NEXT: sltu a1, a1, t4 -; RISCV32-NEXT: mulhu s0, a3, a2 -; RISCV32-NEXT: add a1, s0, a1 -; RISCV32-NEXT: add s0, a1, s1 -; RISCV32-NEXT: mul a1, a3, s2 -; RISCV32-NEXT: mul s1, a7, a5 -; RISCV32-NEXT: add a1, s1, a1 -; RISCV32-NEXT: mulhu s5, s2, a5 -; RISCV32-NEXT: add s6, s5, a1 -; RISCV32-NEXT: mul s1, a2, t3 -; RISCV32-NEXT: mul a1, a6, a4 -; RISCV32-NEXT: add a1, a1, s1 -; RISCV32-NEXT: mulhu t5, t3, a4 -; RISCV32-NEXT: add t6, t5, a1 -; RISCV32-NEXT: add a1, t6, s6 -; RISCV32-NEXT: sltu s1, s4, s3 -; RISCV32-NEXT: add a1, a1, s1 -; RISCV32-NEXT: add a1, s0, a1 -; RISCV32-NEXT: add t4, a1, t2 +; RISCV32-NEXT: add t1, s0, s4 +; RISCV32-NEXT: sltu t2, t1, s0 +; RISCV32-NEXT: sltu t6, s0, t6 +; RISCV32-NEXT: sltu t4, t5, t4 +; RISCV32-NEXT: mulhu t5, t3, a2 +; RISCV32-NEXT: add t4, t5, t4 +; RISCV32-NEXT: add s0, t4, t6 +; RISCV32-NEXT: mul t4, t3, t0 +; RISCV32-NEXT: mul t5, a7, a5 +; RISCV32-NEXT: add t4, t5, t4 +; RISCV32-NEXT: mulhu s1, t0, a5 +; RISCV32-NEXT: add s2, s1, t4 +; RISCV32-NEXT: mul t4, a2, a6 +; RISCV32-NEXT: mul t5, a3, a4 +; RISCV32-NEXT: add t4, t5, t4 +; RISCV32-NEXT: mulhu t5, a6, a4 +; RISCV32-NEXT: add t6, t5, t4 +; RISCV32-NEXT: add t4, t6, s2 +; RISCV32-NEXT: sltu s3, s4, s3 +; RISCV32-NEXT: add t4, t4, s3 +; RISCV32-NEXT: add t4, s0, t4 +; RISCV32-NEXT: add t4, t4, t2 ; RISCV32-NEXT: beq t4, s0, .LBB0_2 ; RISCV32-NEXT: # %bb.1: # %start ; RISCV32-NEXT: sltu t2, t4, s0 ; RISCV32-NEXT: .LBB0_2: # %start -; RISCV32-NEXT: sltu a1, s6, s5 +; RISCV32-NEXT: sltu s0, s2, s1 +; RISCV32-NEXT: snez s1, t3 +; RISCV32-NEXT: snez s2, a7 +; RISCV32-NEXT: and s1, s2, s1 +; RISCV32-NEXT: mulhu s2, a7, a5 +; RISCV32-NEXT: snez s2, s2 +; RISCV32-NEXT: or s1, s1, s2 +; RISCV32-NEXT: mulhu t3, t3, t0 +; RISCV32-NEXT: snez t3, t3 +; RISCV32-NEXT: or t3, s1, t3 +; RISCV32-NEXT: or t3, t3, s0 +; RISCV32-NEXT: sltu t5, t6, t5 +; RISCV32-NEXT: snez t6, a2 ; RISCV32-NEXT: snez s0, a3 -; RISCV32-NEXT: snez s1, a7 -; RISCV32-NEXT: and s0, s1, s0 -; RISCV32-NEXT: mulhu s1, a7, a5 -; RISCV32-NEXT: snez s1, s1 -; RISCV32-NEXT: or s0, s0, s1 -; RISCV32-NEXT: mulhu a3, a3, s2 -; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: or a3, s0, a3 -; RISCV32-NEXT: or a1, a3, a1 -; RISCV32-NEXT: sltu a3, t6, t5 -; RISCV32-NEXT: snez s1, a2 -; RISCV32-NEXT: snez s0, a6 -; RISCV32-NEXT: and s1, s0, s1 -; RISCV32-NEXT: mulhu s0, a6, a4 +; RISCV32-NEXT: and t6, s0, t6 +; RISCV32-NEXT: mulhu s0, a3, a4 ; RISCV32-NEXT: snez s0, s0 -; RISCV32-NEXT: or s1, s1, s0 -; RISCV32-NEXT: mulhu a2, a2, t3 +; RISCV32-NEXT: or t6, t6, s0 +; RISCV32-NEXT: mulhu a2, a2, a6 ; RISCV32-NEXT: snez a2, a2 -; RISCV32-NEXT: or a2, s1, a2 -; RISCV32-NEXT: or a2, a2, a3 -; RISCV32-NEXT: or a3, s2, a7 +; RISCV32-NEXT: or a2, t6, a2 +; RISCV32-NEXT: or a2, a2, t5 +; RISCV32-NEXT: or a7, t0, a7 +; RISCV32-NEXT: snez a7, a7 +; RISCV32-NEXT: or a3, a6, a3 ; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: or s1, t3, a6 -; RISCV32-NEXT: snez s1, s1 -; RISCV32-NEXT: and a3, s1, a3 +; RISCV32-NEXT: and a3, a3, a7 ; RISCV32-NEXT: or a2, a3, a2 -; RISCV32-NEXT: or a1, a2, a1 -; RISCV32-NEXT: or a1, a1, t2 -; RISCV32-NEXT: mul a2, a5, a4 -; RISCV32-NEXT: andi a1, a1, 1 -; RISCV32-NEXT: sw a2, 0(a0) -; RISCV32-NEXT: sw t0, 4(a0) +; RISCV32-NEXT: or a2, a2, t3 +; RISCV32-NEXT: or a2, a2, t2 +; RISCV32-NEXT: mul a3, a5, a4 +; RISCV32-NEXT: andi a2, a2, 1 +; RISCV32-NEXT: sw a3, 0(a0) +; RISCV32-NEXT: sw a1, 4(a0) ; RISCV32-NEXT: sw t1, 8(a0) ; RISCV32-NEXT: sw t4, 12(a0) -; RISCV32-NEXT: sb a1, 16(a0) +; RISCV32-NEXT: sb a2, 16(a0) ; RISCV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RISCV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RISCV32-NEXT: addi sp, sp, 32 ; RISCV32-NEXT: ret start: diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index 9acc1ad7e0347..e0de325fd5359 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -394,8 +394,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64-NEXT: lwu a1, 0(s0) ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli s2, a0, 11 -; RV64-NEXT: srli s1, a0, 22 +; RV64-NEXT: srli s1, a0, 11 +; RV64-NEXT: srli s2, a0, 22 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: li a1, 683 ; RV64-NEXT: call __muldi3@plt @@ -407,14 +407,14 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64-NEXT: li a1, 341 ; RV64-NEXT: sltu s3, a1, a0 ; RV64-NEXT: li a1, 819 -; RV64-NEXT: mv a0, s1 +; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __muldi3@plt ; RV64-NEXT: addiw a0, a0, -1638 ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: li a1, 1 -; RV64-NEXT: sltu s1, a1, a0 +; RV64-NEXT: sltu s2, a1, a0 ; RV64-NEXT: li a1, 1463 -; RV64-NEXT: mv a0, s2 +; RV64-NEXT: mv a0, s1 ; RV64-NEXT: call __muldi3@plt ; RV64-NEXT: addiw a0, a0, -1463 ; RV64-NEXT: andi a0, a0, 2047 @@ -426,7 +426,7 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind { ; RV64-NEXT: andi a0, a0, 2047 ; RV64-NEXT: slli a0, a0, 11 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a1, s1, 22 +; RV64-NEXT: slli a1, s2, 22 ; RV64-NEXT: sub a0, a0, a1 ; RV64-NEXT: sw a0, 0(s0) ; RV64-NEXT: slli a0, a0, 31 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index c028c7d387dcd..b804e53b6a71d 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -19,127 +19,123 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 12(a1) -; RV32I-NEXT: lhu s3, 8(a1) -; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu s0, 12(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 124 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 98 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 1003 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_urem_vec_1: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 12(a1) +; RV32IM-NEXT: lhu a2, 12(a1) ; RV32IM-NEXT: lhu a3, 8(a1) ; RV32IM-NEXT: lhu a4, 0(a1) ; RV32IM-NEXT: lhu a1, 4(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 ; RV32IM-NEXT: mulhu a5, a4, a5 -; RV32IM-NEXT: sub a2, a4, a5 -; RV32IM-NEXT: srli a2, a2, 1 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a2, a4, a2 -; RV32IM-NEXT: srli a4, a1, 2 -; RV32IM-NEXT: lui a5, 135300 -; RV32IM-NEXT: addi a5, a5, 529 -; RV32IM-NEXT: mulhu a4, a4, a5 -; RV32IM-NEXT: srli a4, a4, 2 -; RV32IM-NEXT: li a5, 124 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a1, a1, a4 -; RV32IM-NEXT: lui a4, 342392 -; RV32IM-NEXT: addi a4, a4, 669 -; RV32IM-NEXT: mulhu a4, a3, a4 -; RV32IM-NEXT: srli a4, a4, 5 -; RV32IM-NEXT: li a5, 98 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a3, a3, a4 -; RV32IM-NEXT: lui a4, 267633 -; RV32IM-NEXT: addi a4, a4, -1809 -; RV32IM-NEXT: mulhu a4, a6, a4 -; RV32IM-NEXT: srli a4, a4, 8 -; RV32IM-NEXT: li a5, 1003 -; RV32IM-NEXT: mul a4, a4, a5 -; RV32IM-NEXT: sub a4, a6, a4 -; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sub a6, a4, a5 +; RV32IM-NEXT: srli a6, a6, 1 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: srli a5, a1, 2 +; RV32IM-NEXT: lui a6, 135300 +; RV32IM-NEXT: addi a6, a6, 529 +; RV32IM-NEXT: mulhu a5, a5, a6 +; RV32IM-NEXT: srli a5, a5, 2 +; RV32IM-NEXT: li a6, 124 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: lui a5, 342392 +; RV32IM-NEXT: addi a5, a5, 669 +; RV32IM-NEXT: mulhu a5, a3, a5 +; RV32IM-NEXT: srli a5, a5, 5 +; RV32IM-NEXT: li a6, 98 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: lui a5, 267633 +; RV32IM-NEXT: addi a5, a5, -1809 +; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: srli a5, a5, 8 +; RV32IM-NEXT: li a6, 1003 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) ; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_1: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 24(a1) -; RV64I-NEXT: lhu s3, 16(a1) -; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lhu s0, 24(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 124 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 98 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 1003 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_urem_vec_1: @@ -147,44 +143,44 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lhu a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI0_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3) -; RV64IM-NEXT: lhu a6, 24(a1) +; RV64IM-NEXT: lhu a4, 24(a1) ; RV64IM-NEXT: lhu a5, 16(a1) ; RV64IM-NEXT: lhu a1, 8(a1) ; RV64IM-NEXT: mulhu a3, a2, a3 -; RV64IM-NEXT: sub a4, a2, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: srli a6, a6, 1 +; RV64IM-NEXT: add a3, a6, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_1) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_1)(a4) -; RV64IM-NEXT: mulw a3, a3, a7 -; RV64IM-NEXT: subw t0, a2, a3 +; RV64IM-NEXT: li a6, 95 +; RV64IM-NEXT: lui a7, %hi(.LCPI0_1) +; RV64IM-NEXT: ld a7, %lo(.LCPI0_1)(a7) +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 2 -; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: mulhu a3, a3, a7 ; RV64IM-NEXT: srli a3, a3, 3 -; RV64IM-NEXT: li a7, 124 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_2) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_2)(a4) -; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: li a6, 124 +; RV64IM-NEXT: lui a7, %hi(.LCPI0_2) +; RV64IM-NEXT: ld a7, %lo(.LCPI0_2)(a7) +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a1, a1, a3 ; RV64IM-NEXT: srli a3, a5, 1 -; RV64IM-NEXT: mulhu a3, a3, a4 +; RV64IM-NEXT: mulhu a3, a3, a7 ; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: lui a4, %hi(.LCPI0_3) -; RV64IM-NEXT: ld a4, %lo(.LCPI0_3)(a4) -; RV64IM-NEXT: li a2, 98 -; RV64IM-NEXT: mulw a2, a3, a2 -; RV64IM-NEXT: subw a2, a5, a2 -; RV64IM-NEXT: mulhu a3, a6, a4 -; RV64IM-NEXT: srli a3, a3, 7 -; RV64IM-NEXT: li a4, 1003 -; RV64IM-NEXT: mulw a3, a3, a4 -; RV64IM-NEXT: subw a3, a6, a3 -; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: lui a6, %hi(.LCPI0_3) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6) +; RV64IM-NEXT: li a7, 98 +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a3, a5, a3 +; RV64IM-NEXT: mulhu a5, a4, a6 +; RV64IM-NEXT: srli a5, a5, 7 +; RV64IM-NEXT: li a6, 1003 +; RV64IM-NEXT: mulw a5, a5, a6 +; RV64IM-NEXT: subw a4, a4, a5 +; RV64IM-NEXT: sh a4, 6(a0) +; RV64IM-NEXT: sh a3, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -200,126 +196,122 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 12(a1) -; RV32I-NEXT: lhu s3, 8(a1) -; RV32I-NEXT: lhu s0, 4(a1) +; RV32I-NEXT: lhu s0, 12(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu a2, 0(a1) -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s5, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: sh a0, 6(s1) -; RV32I-NEXT: sh s0, 4(s1) -; RV32I-NEXT: sh s5, 2(s1) -; RV32I-NEXT: sh s4, 0(s1) +; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: sh s1, 4(s3) +; RV32I-NEXT: sh s2, 2(s3) +; RV32I-NEXT: sh s4, 0(s3) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: fold_urem_vec_2: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 12(a1) -; RV32IM-NEXT: lhu a7, 8(a1) +; RV32IM-NEXT: lhu a2, 12(a1) +; RV32IM-NEXT: lhu a3, 8(a1) ; RV32IM-NEXT: lhu a4, 0(a1) ; RV32IM-NEXT: lhu a1, 4(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 -; RV32IM-NEXT: mulhu a2, a4, a5 -; RV32IM-NEXT: sub a3, a4, a2 -; RV32IM-NEXT: srli a3, a3, 1 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: li a3, 95 -; RV32IM-NEXT: mul a2, a2, a3 -; RV32IM-NEXT: sub t0, a4, a2 -; RV32IM-NEXT: mulhu a4, a1, a5 -; RV32IM-NEXT: sub a2, a1, a4 -; RV32IM-NEXT: srli a2, a2, 1 -; RV32IM-NEXT: add a2, a2, a4 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: mul a2, a2, a3 -; RV32IM-NEXT: sub a1, a1, a2 -; RV32IM-NEXT: mulhu a2, a7, a5 -; RV32IM-NEXT: sub a4, a7, a2 -; RV32IM-NEXT: srli a4, a4, 1 -; RV32IM-NEXT: add a2, a4, a2 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: mul a2, a2, a3 -; RV32IM-NEXT: sub a2, a7, a2 -; RV32IM-NEXT: mulhu a4, a6, a5 -; RV32IM-NEXT: sub a5, a6, a4 -; RV32IM-NEXT: srli a5, a5, 1 -; RV32IM-NEXT: add a4, a5, a4 -; RV32IM-NEXT: srli a4, a4, 6 -; RV32IM-NEXT: mul a3, a4, a3 -; RV32IM-NEXT: sub a3, a6, a3 -; RV32IM-NEXT: sh a3, 6(a0) -; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: sub a7, a4, a6 +; RV32IM-NEXT: srli a7, a7, 1 +; RV32IM-NEXT: add a6, a7, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: li a7, 95 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a4, a4, a6 +; RV32IM-NEXT: mulhu a6, a1, a5 +; RV32IM-NEXT: sub t0, a1, a6 +; RV32IM-NEXT: srli t0, t0, 1 +; RV32IM-NEXT: add a6, t0, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a1, a1, a6 +; RV32IM-NEXT: mulhu a6, a3, a5 +; RV32IM-NEXT: sub t0, a3, a6 +; RV32IM-NEXT: srli t0, t0, 1 +; RV32IM-NEXT: add a6, t0, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: mul a6, a6, a7 +; RV32IM-NEXT: sub a3, a3, a6 +; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: sub a6, a2, a5 +; RV32IM-NEXT: srli a6, a6, 1 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: mul a5, a5, a7 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: sh a3, 4(a0) ; RV32IM-NEXT: sh a1, 2(a0) -; RV32IM-NEXT: sh t0, 0(a0) +; RV32IM-NEXT: sh a4, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: fold_urem_vec_2: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 24(a1) -; RV64I-NEXT: lhu s3, 16(a1) -; RV64I-NEXT: lhu s0, 8(a1) +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lhu s0, 24(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu a2, 0(a1) -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s5, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sh a0, 6(s1) -; RV64I-NEXT: sh s0, 4(s1) -; RV64I-NEXT: sh s5, 2(s1) -; RV64I-NEXT: sh s4, 0(s1) -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: sh s1, 4(s3) +; RV64I-NEXT: sh s2, 2(s3) +; RV64I-NEXT: sh s4, 0(s3) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: fold_urem_vec_2: @@ -327,42 +319,42 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lhu a2, 0(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI1_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3) -; RV64IM-NEXT: lhu a6, 24(a1) -; RV64IM-NEXT: lhu a7, 16(a1) +; RV64IM-NEXT: lhu a4, 24(a1) +; RV64IM-NEXT: lhu a5, 16(a1) ; RV64IM-NEXT: lhu a1, 8(a1) -; RV64IM-NEXT: mulhu a4, a2, a3 -; RV64IM-NEXT: sub a5, a2, a4 -; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli a4, a4, 6 -; RV64IM-NEXT: li a5, 95 -; RV64IM-NEXT: mulw a4, a4, a5 -; RV64IM-NEXT: subw t0, a2, a4 -; RV64IM-NEXT: mulhu a4, a1, a3 -; RV64IM-NEXT: sub a2, a1, a4 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a4 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: mulhu a2, a7, a3 -; RV64IM-NEXT: sub a4, a7, a2 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a2, a4, a2 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a2, a2, a5 -; RV64IM-NEXT: subw a2, a7, a2 -; RV64IM-NEXT: mulhu a3, a6, a3 -; RV64IM-NEXT: sub a4, a6, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: mulhu a6, a2, a3 +; RV64IM-NEXT: sub a7, a2, a6 +; RV64IM-NEXT: srli a7, a7, 1 +; RV64IM-NEXT: add a6, a7, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: mulhu a6, a1, a3 +; RV64IM-NEXT: sub t0, a1, a6 +; RV64IM-NEXT: srli t0, t0, 1 +; RV64IM-NEXT: add a6, t0, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a1, a1, a6 +; RV64IM-NEXT: mulhu a6, a5, a3 +; RV64IM-NEXT: sub t0, a5, a6 +; RV64IM-NEXT: srli t0, t0, 1 +; RV64IM-NEXT: add a6, t0, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: mulw a6, a6, a7 +; RV64IM-NEXT: subw a5, a5, a6 +; RV64IM-NEXT: mulhu a3, a4, a3 +; RV64IM-NEXT: sub a6, a4, a3 +; RV64IM-NEXT: srli a6, a6, 1 +; RV64IM-NEXT: add a3, a6, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: mulw a3, a3, a5 -; RV64IM-NEXT: subw a3, a6, a3 +; RV64IM-NEXT: mulw a3, a3, a7 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh a3, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) +; RV64IM-NEXT: sh a5, 4(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh t0, 0(a0) +; RV64IM-NEXT: sh a2, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -384,47 +376,46 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 0(a1) -; RV32I-NEXT: lhu s3, 4(a1) -; RV32I-NEXT: lhu s4, 8(a1) -; RV32I-NEXT: lhu s1, 12(a1) +; RV32I-NEXT: lhu s1, 0(a1) +; RV32I-NEXT: lhu s2, 4(a1) +; RV32I-NEXT: lhu s3, 8(a1) +; RV32I-NEXT: lhu s4, 12(a1) ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s5, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3@plt ; RV32I-NEXT: mv s8, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __udivsi3@plt -; RV32I-NEXT: mv s9, a0 -; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __udivsi3@plt ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __udivsi3@plt -; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __udivsi3@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __udivsi3@plt ; RV32I-NEXT: add a0, s8, a0 -; RV32I-NEXT: add a1, s7, s1 -; RV32I-NEXT: add a2, s6, s4 -; RV32I-NEXT: add a3, s5, s9 +; RV32I-NEXT: add a1, s7, s2 +; RV32I-NEXT: add a2, s6, s3 +; RV32I-NEXT: add a3, s5, s4 ; RV32I-NEXT: sh a3, 6(s0) ; RV32I-NEXT: sh a2, 4(s0) ; RV32I-NEXT: sh a1, 2(s0) @@ -439,127 +430,124 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: combine_urem_udiv: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 0(a1) -; RV32IM-NEXT: lhu a7, 4(a1) +; RV32IM-NEXT: lhu a2, 0(a1) +; RV32IM-NEXT: lhu a3, 4(a1) ; RV32IM-NEXT: lhu a4, 12(a1) ; RV32IM-NEXT: lhu a1, 8(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 -; RV32IM-NEXT: mulhu a2, a4, a5 -; RV32IM-NEXT: sub a3, a4, a2 -; RV32IM-NEXT: srli a3, a3, 1 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: srli t3, a2, 6 -; RV32IM-NEXT: li t0, 95 -; RV32IM-NEXT: mul a3, t3, t0 -; RV32IM-NEXT: sub t1, a4, a3 -; RV32IM-NEXT: mulhu a4, a1, a5 -; RV32IM-NEXT: sub a3, a1, a4 -; RV32IM-NEXT: srli a3, a3, 1 -; RV32IM-NEXT: add a3, a3, a4 -; RV32IM-NEXT: srli a3, a3, 6 -; RV32IM-NEXT: mul a4, a3, t0 -; RV32IM-NEXT: sub t2, a1, a4 -; RV32IM-NEXT: mulhu a4, a7, a5 -; RV32IM-NEXT: sub a1, a7, a4 -; RV32IM-NEXT: srli a1, a1, 1 -; RV32IM-NEXT: add a1, a1, a4 -; RV32IM-NEXT: srli a1, a1, 6 -; RV32IM-NEXT: mul a4, a1, t0 -; RV32IM-NEXT: sub a4, a7, a4 -; RV32IM-NEXT: mulhu a5, a6, a5 -; RV32IM-NEXT: sub a2, a6, a5 -; RV32IM-NEXT: srli a2, a2, 1 +; RV32IM-NEXT: mulhu a6, a4, a5 +; RV32IM-NEXT: sub a7, a4, a6 +; RV32IM-NEXT: srli a7, a7, 1 +; RV32IM-NEXT: add a6, a7, a6 +; RV32IM-NEXT: srli a6, a6, 6 +; RV32IM-NEXT: li a7, 95 +; RV32IM-NEXT: mul t0, a6, a7 +; RV32IM-NEXT: sub a4, a4, t0 +; RV32IM-NEXT: mulhu t0, a1, a5 +; RV32IM-NEXT: sub t1, a1, t0 +; RV32IM-NEXT: srli t1, t1, 1 +; RV32IM-NEXT: add t0, t1, t0 +; RV32IM-NEXT: srli t0, t0, 6 +; RV32IM-NEXT: mul t1, t0, a7 +; RV32IM-NEXT: sub a1, a1, t1 +; RV32IM-NEXT: mulhu t1, a3, a5 +; RV32IM-NEXT: sub t2, a3, t1 +; RV32IM-NEXT: srli t2, t2, 1 +; RV32IM-NEXT: add t1, t2, t1 +; RV32IM-NEXT: srli t1, t1, 6 +; RV32IM-NEXT: mul t2, t1, a7 +; RV32IM-NEXT: sub a3, a3, t2 +; RV32IM-NEXT: mulhu a5, a2, a5 +; RV32IM-NEXT: sub t2, a2, a5 +; RV32IM-NEXT: srli t2, t2, 1 +; RV32IM-NEXT: add a5, t2, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: mul a7, a5, a7 +; RV32IM-NEXT: sub a2, a2, a7 ; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: mul a5, a2, t0 -; RV32IM-NEXT: sub a5, a6, a5 -; RV32IM-NEXT: add a2, a5, a2 -; RV32IM-NEXT: add a1, a4, a1 -; RV32IM-NEXT: add a3, t2, a3 -; RV32IM-NEXT: add a4, t1, t3 +; RV32IM-NEXT: add a3, a3, t1 +; RV32IM-NEXT: add a1, a1, t0 +; RV32IM-NEXT: add a4, a4, a6 ; RV32IM-NEXT: sh a4, 6(a0) -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a1, 2(a0) +; RV32IM-NEXT: sh a1, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a2, 0(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: combine_urem_udiv: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -96 -; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s9, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 0(a1) -; RV64I-NEXT: lhu s3, 8(a1) -; RV64I-NEXT: lhu s4, 16(a1) -; RV64I-NEXT: lhu s1, 24(a1) +; RV64I-NEXT: addi sp, sp, -80 +; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: lhu s1, 0(a1) +; RV64I-NEXT: lhu s2, 8(a1) +; RV64I-NEXT: lhu s3, 16(a1) +; RV64I-NEXT: lhu s4, 24(a1) ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s5, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s4 +; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s6, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s7, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3@plt ; RV64I-NEXT: mv s8, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __udivdi3@plt -; RV64I-NEXT: mv s9, a0 -; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __udivdi3@plt ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __udivdi3@plt -; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __udivdi3@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __udivdi3@plt ; RV64I-NEXT: addw a0, s8, a0 -; RV64I-NEXT: addw a1, s7, s1 -; RV64I-NEXT: addw a2, s6, s4 -; RV64I-NEXT: addw a3, s5, s9 +; RV64I-NEXT: addw a1, s7, s2 +; RV64I-NEXT: addw a2, s6, s3 +; RV64I-NEXT: addw a3, s5, s4 ; RV64I-NEXT: sh a3, 6(s0) ; RV64I-NEXT: sh a2, 4(s0) ; RV64I-NEXT: sh a1, 2(s0) ; RV64I-NEXT: sh a0, 0(s0) -; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s9, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 96 +; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: combine_urem_udiv: @@ -567,45 +555,45 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lhu a2, 24(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI2_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI2_0)(a3) -; RV64IM-NEXT: lhu a6, 0(a1) -; RV64IM-NEXT: lhu a7, 8(a1) +; RV64IM-NEXT: lhu a4, 0(a1) +; RV64IM-NEXT: lhu a5, 8(a1) ; RV64IM-NEXT: lhu a1, 16(a1) -; RV64IM-NEXT: mulhu a4, a2, a3 -; RV64IM-NEXT: sub a5, a2, a4 -; RV64IM-NEXT: srli a5, a5, 1 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: srli t3, a4, 6 -; RV64IM-NEXT: li t0, 95 -; RV64IM-NEXT: mulw a5, t3, t0 -; RV64IM-NEXT: subw t1, a2, a5 -; RV64IM-NEXT: mulhu a5, a1, a3 -; RV64IM-NEXT: sub a2, a1, a5 -; RV64IM-NEXT: srli a2, a2, 1 -; RV64IM-NEXT: add a2, a2, a5 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: mulw a5, a2, t0 -; RV64IM-NEXT: subw t2, a1, a5 -; RV64IM-NEXT: mulhu a5, a7, a3 -; RV64IM-NEXT: sub a1, a7, a5 -; RV64IM-NEXT: srli a1, a1, 1 -; RV64IM-NEXT: add a1, a1, a5 -; RV64IM-NEXT: srli a1, a1, 6 -; RV64IM-NEXT: mulw a5, a1, t0 -; RV64IM-NEXT: subw a5, a7, a5 -; RV64IM-NEXT: mulhu a3, a6, a3 -; RV64IM-NEXT: sub a4, a6, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: mulhu a6, a2, a3 +; RV64IM-NEXT: sub a7, a2, a6 +; RV64IM-NEXT: srli a7, a7, 1 +; RV64IM-NEXT: add a6, a7, a6 +; RV64IM-NEXT: srli a6, a6, 6 +; RV64IM-NEXT: li a7, 95 +; RV64IM-NEXT: mulw t0, a6, a7 +; RV64IM-NEXT: subw a2, a2, t0 +; RV64IM-NEXT: mulhu t0, a1, a3 +; RV64IM-NEXT: sub t1, a1, t0 +; RV64IM-NEXT: srli t1, t1, 1 +; RV64IM-NEXT: add t0, t1, t0 +; RV64IM-NEXT: srli t0, t0, 6 +; RV64IM-NEXT: mulw t1, t0, a7 +; RV64IM-NEXT: subw a1, a1, t1 +; RV64IM-NEXT: mulhu t1, a5, a3 +; RV64IM-NEXT: sub t2, a5, t1 +; RV64IM-NEXT: srli t2, t2, 1 +; RV64IM-NEXT: add t1, t2, t1 +; RV64IM-NEXT: srli t1, t1, 6 +; RV64IM-NEXT: mulw t2, t1, a7 +; RV64IM-NEXT: subw a5, a5, t2 +; RV64IM-NEXT: mulhu a3, a4, a3 +; RV64IM-NEXT: sub t2, a4, a3 +; RV64IM-NEXT: srli t2, t2, 1 +; RV64IM-NEXT: add a3, t2, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: mulw a4, a3, t0 -; RV64IM-NEXT: subw a4, a6, a4 +; RV64IM-NEXT: mulw a7, a3, a7 +; RV64IM-NEXT: subw a4, a4, a7 ; RV64IM-NEXT: addw a3, a4, a3 -; RV64IM-NEXT: addw a1, a5, a1 -; RV64IM-NEXT: addw a2, t2, a2 -; RV64IM-NEXT: addw a4, t1, t3 -; RV64IM-NEXT: sh a4, 6(a0) -; RV64IM-NEXT: sh a2, 4(a0) -; RV64IM-NEXT: sh a1, 2(a0) +; RV64IM-NEXT: addw a4, a5, t1 +; RV64IM-NEXT: addw a1, a1, t0 +; RV64IM-NEXT: addw a2, a2, a6 +; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a1, 4(a0) +; RV64IM-NEXT: sh a4, 2(a0) ; RV64IM-NEXT: sh a3, 0(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, @@ -624,17 +612,17 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 8(a1) -; RV32I-NEXT: lhu s3, 4(a1) -; RV32I-NEXT: lhu s1, 0(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 4(a1) +; RV32I-NEXT: lhu s3, 0(a1) ; RV32I-NEXT: lhu a2, 12(a1) ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: andi a1, s1, 63 -; RV32I-NEXT: andi a2, s3, 31 -; RV32I-NEXT: andi a3, s2, 7 +; RV32I-NEXT: andi a1, s3, 63 +; RV32I-NEXT: andi a2, s2, 31 +; RV32I-NEXT: andi a3, s1, 7 ; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: sh a3, 4(s0) ; RV32I-NEXT: sh a2, 2(s0) @@ -649,27 +637,27 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: dont_fold_urem_power_of_two: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lhu a6, 8(a1) +; RV32IM-NEXT: lhu a2, 8(a1) ; RV32IM-NEXT: lhu a3, 4(a1) ; RV32IM-NEXT: lhu a4, 12(a1) ; RV32IM-NEXT: lhu a1, 0(a1) ; RV32IM-NEXT: lui a5, 364242 ; RV32IM-NEXT: addi a5, a5, 777 ; RV32IM-NEXT: mulhu a5, a4, a5 -; RV32IM-NEXT: sub a2, a4, a5 -; RV32IM-NEXT: srli a2, a2, 1 -; RV32IM-NEXT: add a2, a2, a5 -; RV32IM-NEXT: srli a2, a2, 6 -; RV32IM-NEXT: li a5, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a2, a4, a2 +; RV32IM-NEXT: sub a6, a4, a5 +; RV32IM-NEXT: srli a6, a6, 1 +; RV32IM-NEXT: add a5, a6, a5 +; RV32IM-NEXT: srli a5, a5, 6 +; RV32IM-NEXT: li a6, 95 +; RV32IM-NEXT: mul a5, a5, a6 +; RV32IM-NEXT: sub a4, a4, a5 ; RV32IM-NEXT: andi a1, a1, 63 ; RV32IM-NEXT: andi a3, a3, 31 -; RV32IM-NEXT: andi a4, a6, 7 -; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: andi a2, a2, 7 +; RV32IM-NEXT: sh a2, 4(a0) ; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) -; RV32IM-NEXT: sh a2, 6(a0) +; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_urem_power_of_two: @@ -680,17 +668,17 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 16(a1) -; RV64I-NEXT: lhu s3, 8(a1) -; RV64I-NEXT: lhu s1, 0(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 8(a1) +; RV64I-NEXT: lhu s3, 0(a1) ; RV64I-NEXT: lhu a2, 24(a1) ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: andi a1, s1, 63 -; RV64I-NEXT: andi a2, s3, 31 -; RV64I-NEXT: andi a3, s2, 7 +; RV64I-NEXT: andi a1, s3, 63 +; RV64I-NEXT: andi a2, s2, 31 +; RV64I-NEXT: andi a3, s1, 7 ; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: sh a3, 4(s0) ; RV64I-NEXT: sh a2, 2(s0) @@ -708,20 +696,20 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lhu a2, 24(a1) ; RV64IM-NEXT: lui a3, %hi(.LCPI3_0) ; RV64IM-NEXT: ld a3, %lo(.LCPI3_0)(a3) -; RV64IM-NEXT: lhu a6, 16(a1) +; RV64IM-NEXT: lhu a4, 16(a1) ; RV64IM-NEXT: lhu a5, 8(a1) ; RV64IM-NEXT: lhu a1, 0(a1) ; RV64IM-NEXT: mulhu a3, a2, a3 -; RV64IM-NEXT: sub a4, a2, a3 -; RV64IM-NEXT: srli a4, a4, 1 -; RV64IM-NEXT: add a3, a4, a3 +; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: srli a6, a6, 1 +; RV64IM-NEXT: add a3, a6, a3 ; RV64IM-NEXT: srli a3, a3, 6 -; RV64IM-NEXT: li a4, 95 -; RV64IM-NEXT: mulw a3, a3, a4 +; RV64IM-NEXT: li a6, 95 +; RV64IM-NEXT: mulw a3, a3, a6 ; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: andi a1, a1, 63 ; RV64IM-NEXT: andi a3, a5, 31 -; RV64IM-NEXT: andi a4, a6, 7 +; RV64IM-NEXT: andi a4, a4, 7 ; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a3, 2(a0) ; RV64IM-NEXT: sh a1, 0(a0) @@ -741,10 +729,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu s2, 12(a1) +; RV32I-NEXT: lhu s0, 12(a1) ; RV32I-NEXT: lhu s1, 8(a1) ; RV32I-NEXT: lhu a2, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: li a1, 654 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3@plt @@ -755,12 +743,12 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __umodsi3@plt -; RV32I-NEXT: sh a0, 6(s0) -; RV32I-NEXT: sh s1, 4(s0) -; RV32I-NEXT: sh s3, 2(s0) -; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh a0, 6(s2) +; RV32I-NEXT: sh s1, 4(s2) +; RV32I-NEXT: sh s3, 2(s2) +; RV32I-NEXT: sh zero, 0(s2) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -811,10 +799,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu s2, 24(a1) +; RV64I-NEXT: lhu s0, 24(a1) ; RV64I-NEXT: lhu s1, 16(a1) ; RV64I-NEXT: lhu a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt @@ -825,12 +813,12 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sh a0, 6(s0) -; RV64I-NEXT: sh s1, 4(s0) -; RV64I-NEXT: sh s3, 2(s0) -; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh a0, 6(s2) +; RV64I-NEXT: sh s1, 4(s2) +; RV64I-NEXT: sh s3, 2(s2) +; RV64I-NEXT: sh zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -851,29 +839,29 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: srli a5, a5, 1 ; RV64IM-NEXT: add a3, a5, a3 ; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: lui a5, %hi(.LCPI4_1) -; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5) -; RV64IM-NEXT: mulw a3, a3, a6 -; RV64IM-NEXT: subw a6, a2, a3 +; RV64IM-NEXT: li a5, 23 +; RV64IM-NEXT: lui a6, %hi(.LCPI4_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI4_1)(a6) +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 1 -; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: mulhu a3, a3, a6 ; RV64IM-NEXT: srli a3, a3, 7 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) -; RV64IM-NEXT: li a2, 654 -; RV64IM-NEXT: mulw a2, a3, a2 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: mulhu a2, a4, a5 -; RV64IM-NEXT: srli a2, a2, 12 -; RV64IM-NEXT: lui a3, 1 -; RV64IM-NEXT: addiw a3, a3, 1327 -; RV64IM-NEXT: mulw a2, a2, a3 -; RV64IM-NEXT: subw a2, a4, a2 +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mulw a3, a3, a6 +; RV64IM-NEXT: subw a1, a1, a3 +; RV64IM-NEXT: mulhu a3, a4, a5 +; RV64IM-NEXT: srli a3, a3, 12 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mulw a3, a3, a5 +; RV64IM-NEXT: subw a3, a4, a3 ; RV64IM-NEXT: sh zero, 0(a0) -; RV64IM-NEXT: sh a2, 6(a0) +; RV64IM-NEXT: sh a3, 6(a0) ; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a6, 4(a0) +; RV64IM-NEXT: sh a2, 4(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -903,16 +891,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s2, 24(a1) -; RV32I-NEXT: lw s3, 28(a1) -; RV32I-NEXT: lw s4, 16(a1) -; RV32I-NEXT: lw s5, 20(a1) -; RV32I-NEXT: lw s6, 8(a1) -; RV32I-NEXT: lw s1, 12(a1) +; RV32I-NEXT: lw s0, 24(a1) +; RV32I-NEXT: lw s1, 28(a1) +; RV32I-NEXT: lw s2, 16(a1) +; RV32I-NEXT: lw s3, 20(a1) +; RV32I-NEXT: lw s4, 8(a1) +; RV32I-NEXT: lw s5, 12(a1) ; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: li a3, 0 @@ -920,33 +907,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: mv s7, a0 ; RV32I-NEXT: mv s8, a1 ; RV32I-NEXT: li a2, 654 -; RV32I-NEXT: mv a0, s6 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: call __umoddi3@plt -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: mv s9, a1 -; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s5 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3@plt ; RV32I-NEXT: mv s4, a0 -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, 1 -; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: li a2, 23 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3@plt -; RV32I-NEXT: sw a1, 28(s0) -; RV32I-NEXT: sw a0, 24(s0) -; RV32I-NEXT: sw s1, 20(s0) -; RV32I-NEXT: sw s4, 16(s0) -; RV32I-NEXT: sw s9, 12(s0) -; RV32I-NEXT: sw s6, 8(s0) -; RV32I-NEXT: sw s8, 4(s0) -; RV32I-NEXT: sw s7, 0(s0) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s3, a1 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a2, a0, 1327 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __umoddi3@plt +; RV32I-NEXT: sw a1, 28(s6) +; RV32I-NEXT: sw a0, 24(s6) +; RV32I-NEXT: sw s3, 20(s6) +; RV32I-NEXT: sw s2, 16(s6) +; RV32I-NEXT: sw s5, 12(s6) +; RV32I-NEXT: sw s4, 8(s6) +; RV32I-NEXT: sw s8, 4(s6) +; RV32I-NEXT: sw s7, 0(s6) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -957,7 +944,6 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; @@ -974,16 +960,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s9, 4(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw s2, 24(a1) -; RV32IM-NEXT: lw s3, 28(a1) -; RV32IM-NEXT: lw s4, 16(a1) -; RV32IM-NEXT: lw s5, 20(a1) -; RV32IM-NEXT: lw s6, 8(a1) -; RV32IM-NEXT: lw s1, 12(a1) +; RV32IM-NEXT: lw s0, 24(a1) +; RV32IM-NEXT: lw s1, 28(a1) +; RV32IM-NEXT: lw s2, 16(a1) +; RV32IM-NEXT: lw s3, 20(a1) +; RV32IM-NEXT: lw s4, 8(a1) +; RV32IM-NEXT: lw s5, 12(a1) ; RV32IM-NEXT: lw a3, 0(a1) ; RV32IM-NEXT: lw a1, 4(a1) -; RV32IM-NEXT: mv s0, a0 +; RV32IM-NEXT: mv s6, a0 ; RV32IM-NEXT: li a2, 1 ; RV32IM-NEXT: mv a0, a3 ; RV32IM-NEXT: li a3, 0 @@ -991,33 +976,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: mv s7, a0 ; RV32IM-NEXT: mv s8, a1 ; RV32IM-NEXT: li a2, 654 -; RV32IM-NEXT: mv a0, s6 -; RV32IM-NEXT: mv a1, s1 -; RV32IM-NEXT: li a3, 0 -; RV32IM-NEXT: call __umoddi3@plt -; RV32IM-NEXT: mv s6, a0 -; RV32IM-NEXT: mv s9, a1 -; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s4 ; RV32IM-NEXT: mv a1, s5 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3@plt ; RV32IM-NEXT: mv s4, a0 -; RV32IM-NEXT: mv s1, a1 -; RV32IM-NEXT: lui a0, 1 -; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv s5, a1 +; RV32IM-NEXT: li a2, 23 ; RV32IM-NEXT: mv a0, s2 ; RV32IM-NEXT: mv a1, s3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3@plt -; RV32IM-NEXT: sw a1, 28(s0) -; RV32IM-NEXT: sw a0, 24(s0) -; RV32IM-NEXT: sw s1, 20(s0) -; RV32IM-NEXT: sw s4, 16(s0) -; RV32IM-NEXT: sw s9, 12(s0) -; RV32IM-NEXT: sw s6, 8(s0) -; RV32IM-NEXT: sw s8, 4(s0) -; RV32IM-NEXT: sw s7, 0(s0) +; RV32IM-NEXT: mv s2, a0 +; RV32IM-NEXT: mv s3, a1 +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a2, a0, 1327 +; RV32IM-NEXT: mv a0, s0 +; RV32IM-NEXT: mv a1, s1 +; RV32IM-NEXT: li a3, 0 +; RV32IM-NEXT: call __umoddi3@plt +; RV32IM-NEXT: sw a1, 28(s6) +; RV32IM-NEXT: sw a0, 24(s6) +; RV32IM-NEXT: sw s3, 20(s6) +; RV32IM-NEXT: sw s2, 16(s6) +; RV32IM-NEXT: sw s5, 12(s6) +; RV32IM-NEXT: sw s4, 8(s6) +; RV32IM-NEXT: sw s8, 4(s6) +; RV32IM-NEXT: sw s7, 0(s6) ; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1028,7 +1013,6 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload ; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw s9, 4(sp) # 4-byte Folded Reload ; RV32IM-NEXT: addi sp, sp, 48 ; RV32IM-NEXT: ret ; @@ -1040,10 +1024,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld s2, 24(a1) +; RV64I-NEXT: ld s0, 24(a1) ; RV64I-NEXT: ld s1, 16(a1) ; RV64I-NEXT: ld a2, 8(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: li a1, 654 ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3@plt @@ -1054,12 +1038,12 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __umoddi3@plt -; RV64I-NEXT: sd a0, 24(s0) -; RV64I-NEXT: sd s1, 16(s0) -; RV64I-NEXT: sd s3, 8(s0) -; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd a0, 24(s2) +; RV64I-NEXT: sd s1, 16(s2) +; RV64I-NEXT: sd s3, 8(s2) +; RV64I-NEXT: sd zero, 0(s2) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1080,29 +1064,29 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64IM-NEXT: srli a5, a5, 1 ; RV64IM-NEXT: add a3, a5, a3 ; RV64IM-NEXT: srli a3, a3, 4 -; RV64IM-NEXT: li a6, 23 -; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) -; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) -; RV64IM-NEXT: mul a3, a3, a6 -; RV64IM-NEXT: sub a6, a2, a3 +; RV64IM-NEXT: li a5, 23 +; RV64IM-NEXT: lui a6, %hi(.LCPI6_1) +; RV64IM-NEXT: ld a6, %lo(.LCPI6_1)(a6) +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a2, a2, a3 ; RV64IM-NEXT: srli a3, a1, 1 -; RV64IM-NEXT: mulhu a3, a3, a5 +; RV64IM-NEXT: mulhu a3, a3, a6 ; RV64IM-NEXT: srli a3, a3, 7 ; RV64IM-NEXT: lui a5, %hi(.LCPI6_2) ; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5) -; RV64IM-NEXT: li a2, 654 -; RV64IM-NEXT: mul a2, a3, a2 -; RV64IM-NEXT: sub a1, a1, a2 -; RV64IM-NEXT: mulhu a2, a4, a5 -; RV64IM-NEXT: srli a2, a2, 12 -; RV64IM-NEXT: lui a3, 1 -; RV64IM-NEXT: addiw a3, a3, 1327 -; RV64IM-NEXT: mul a2, a2, a3 -; RV64IM-NEXT: sub a2, a4, a2 +; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: mul a3, a3, a6 +; RV64IM-NEXT: sub a1, a1, a3 +; RV64IM-NEXT: mulhu a3, a4, a5 +; RV64IM-NEXT: srli a3, a3, 12 +; RV64IM-NEXT: lui a5, 1 +; RV64IM-NEXT: addiw a5, a5, 1327 +; RV64IM-NEXT: mul a3, a3, a5 +; RV64IM-NEXT: sub a3, a4, a3 ; RV64IM-NEXT: sd zero, 0(a0) -; RV64IM-NEXT: sd a2, 24(a0) +; RV64IM-NEXT: sd a3, 24(a0) ; RV64IM-NEXT: sd a1, 8(a0) -; RV64IM-NEXT: sd a6, 16(a0) +; RV64IM-NEXT: sd a2, 16(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll index 929c154f39e89..1b81beb113ecd 100644 --- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll @@ -16,18 +16,18 @@ define void @vec3_setcc_crash(<3 x i8>* %in, <3 x i8>* %out) { ; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: and a2, a0, a2 ; RV32-NEXT: slli a3, a2, 16 -; RV32-NEXT: srai a6, a3, 24 +; RV32-NEXT: srai a3, a3, 24 ; RV32-NEXT: slli a4, a0, 24 -; RV32-NEXT: srai a3, a4, 24 +; RV32-NEXT: srai a6, a4, 24 ; RV32-NEXT: slli a4, a0, 8 ; RV32-NEXT: mv a5, a0 -; RV32-NEXT: bgtz a3, .LBB0_2 +; RV32-NEXT: bgtz a6, .LBB0_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a5, 0 ; RV32-NEXT: .LBB0_2: ; RV32-NEXT: srai a4, a4, 24 ; RV32-NEXT: andi a5, a5, 255 -; RV32-NEXT: bgtz a6, .LBB0_4 +; RV32-NEXT: bgtz a3, .LBB0_4 ; RV32-NEXT: # %bb.3: ; RV32-NEXT: li a2, 0 ; RV32-NEXT: j .LBB0_5 @@ -54,18 +54,18 @@ define void @vec3_setcc_crash(<3 x i8>* %in, <3 x i8>* %out) { ; RV64-NEXT: addiw a2, a2, -256 ; RV64-NEXT: and a2, a0, a2 ; RV64-NEXT: slli a3, a2, 48 -; RV64-NEXT: srai a6, a3, 56 +; RV64-NEXT: srai a3, a3, 56 ; RV64-NEXT: slli a4, a0, 56 -; RV64-NEXT: srai a3, a4, 56 +; RV64-NEXT: srai a6, a4, 56 ; RV64-NEXT: slli a4, a0, 40 ; RV64-NEXT: mv a5, a0 -; RV64-NEXT: bgtz a3, .LBB0_2 +; RV64-NEXT: bgtz a6, .LBB0_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a5, 0 ; RV64-NEXT: .LBB0_2: ; RV64-NEXT: srai a4, a4, 56 ; RV64-NEXT: andi a5, a5, 255 -; RV64-NEXT: bgtz a6, .LBB0_4 +; RV64-NEXT: bgtz a3, .LBB0_4 ; RV64-NEXT: # %bb.3: ; RV64-NEXT: li a2, 0 ; RV64-NEXT: j .LBB0_5 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index a3732e1104eb8..7342cb56827c2 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -567,9 +567,9 @@ entry: define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV32-LABEL: ssubo.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: sltu a6, a0, a2 -; RV32-NEXT: sub a5, a1, a3 -; RV32-NEXT: sub a5, a5, a6 +; RV32-NEXT: sltu a5, a0, a2 +; RV32-NEXT: sub a6, a1, a3 +; RV32-NEXT: sub a5, a6, a5 ; RV32-NEXT: xor a6, a1, a5 ; RV32-NEXT: xor a1, a1, a3 ; RV32-NEXT: and a1, a1, a6 @@ -591,9 +591,9 @@ define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) { ; ; RV32ZBA-LABEL: ssubo.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: sltu a6, a0, a2 -; RV32ZBA-NEXT: sub a5, a1, a3 -; RV32ZBA-NEXT: sub a5, a5, a6 +; RV32ZBA-NEXT: sltu a5, a0, a2 +; RV32ZBA-NEXT: sub a6, a1, a3 +; RV32ZBA-NEXT: sub a5, a6, a5 ; RV32ZBA-NEXT: xor a6, a1, a5 ; RV32ZBA-NEXT: xor a1, a1, a3 ; RV32ZBA-NEXT: and a1, a1, a6 @@ -905,64 +905,58 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 ; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 -; RV32-NEXT: .cfi_offset s3, -16 -; RV32-NEXT: mulhu a6, a0, a2 -; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: sltu a7, a6, a5 -; RV32-NEXT: mulhu a5, a1, a2 -; RV32-NEXT: add a7, a5, a7 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: sltu t0, a6, a5 -; RV32-NEXT: mulhu a5, a0, a3 -; RV32-NEXT: add a5, a5, t0 -; RV32-NEXT: add t0, a7, a5 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a5, t1, t0 +; RV32-NEXT: mulhu a5, a0, a2 +; RV32-NEXT: mul a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: sltu a6, a5, a6 +; RV32-NEXT: mulhu a7, a1, a2 +; RV32-NEXT: add a6, a7, a6 +; RV32-NEXT: mul a7, a0, a3 +; RV32-NEXT: add a5, a7, a5 +; RV32-NEXT: sltu a7, a5, a7 +; RV32-NEXT: mulhu t0, a0, a3 +; RV32-NEXT: add a7, t0, a7 +; RV32-NEXT: add a7, a6, a7 +; RV32-NEXT: mul t0, a1, a3 +; RV32-NEXT: add t1, t0, a7 ; RV32-NEXT: srai t2, a1, 31 ; RV32-NEXT: mul t3, a2, t2 ; RV32-NEXT: srai t4, a3, 31 ; RV32-NEXT: mul t5, t4, a0 ; RV32-NEXT: add t6, t5, t3 -; RV32-NEXT: add s3, a5, t6 -; RV32-NEXT: sltu s2, s3, a5 -; RV32-NEXT: sltu a5, a5, t1 -; RV32-NEXT: sltu s1, t0, a7 -; RV32-NEXT: mulhu s0, a1, a3 -; RV32-NEXT: add s1, s0, s1 -; RV32-NEXT: add a5, s1, a5 -; RV32-NEXT: mulhu s1, a2, t2 -; RV32-NEXT: add s1, s1, t3 +; RV32-NEXT: add s0, t1, t6 +; RV32-NEXT: sltu s1, s0, t1 +; RV32-NEXT: sltu t0, t1, t0 +; RV32-NEXT: sltu a6, a7, a6 +; RV32-NEXT: mulhu a7, a1, a3 +; RV32-NEXT: add a6, a7, a6 +; RV32-NEXT: add a6, a6, t0 +; RV32-NEXT: mulhu a7, a2, t2 +; RV32-NEXT: add a7, a7, t3 ; RV32-NEXT: mul a3, a3, t2 -; RV32-NEXT: add a3, s1, a3 +; RV32-NEXT: add a3, a7, a3 ; RV32-NEXT: mul a1, t4, a1 -; RV32-NEXT: mulhu s1, t4, a0 -; RV32-NEXT: add a1, s1, a1 +; RV32-NEXT: mulhu a7, t4, a0 +; RV32-NEXT: add a1, a7, a1 ; RV32-NEXT: add a1, a1, t5 ; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: sltu a3, t6, t5 ; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: add a1, a5, a1 -; RV32-NEXT: add a1, a1, s2 -; RV32-NEXT: srai a3, a6, 31 +; RV32-NEXT: add a1, a6, a1 +; RV32-NEXT: add a1, a1, s1 +; RV32-NEXT: srai a3, a5, 31 ; RV32-NEXT: xor a1, a1, a3 -; RV32-NEXT: xor a3, s3, a3 +; RV32-NEXT: xor a3, s0, a3 ; RV32-NEXT: or a1, a3, a1 ; RV32-NEXT: snez a1, a1 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: sw a0, 0(a4) -; RV32-NEXT: sw a6, 4(a4) +; RV32-NEXT: sw a5, 4(a4) ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -982,64 +976,58 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s3, 0(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 ; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 -; RV32ZBA-NEXT: .cfi_offset s3, -16 -; RV32ZBA-NEXT: mulhu a6, a0, a2 -; RV32ZBA-NEXT: mul a5, a1, a2 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: sltu a7, a6, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 -; RV32ZBA-NEXT: add a7, a5, a7 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: sltu t0, a6, a5 -; RV32ZBA-NEXT: mulhu a5, a0, a3 -; RV32ZBA-NEXT: add a5, a5, t0 -; RV32ZBA-NEXT: add t0, a7, a5 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a5, t1, t0 +; RV32ZBA-NEXT: mulhu a5, a0, a2 +; RV32ZBA-NEXT: mul a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: sltu a6, a5, a6 +; RV32ZBA-NEXT: mulhu a7, a1, a2 +; RV32ZBA-NEXT: add a6, a7, a6 +; RV32ZBA-NEXT: mul a7, a0, a3 +; RV32ZBA-NEXT: add a5, a7, a5 +; RV32ZBA-NEXT: sltu a7, a5, a7 +; RV32ZBA-NEXT: mulhu t0, a0, a3 +; RV32ZBA-NEXT: add a7, t0, a7 +; RV32ZBA-NEXT: add a7, a6, a7 +; RV32ZBA-NEXT: mul t0, a1, a3 +; RV32ZBA-NEXT: add t1, t0, a7 ; RV32ZBA-NEXT: srai t2, a1, 31 ; RV32ZBA-NEXT: mul t3, a2, t2 ; RV32ZBA-NEXT: srai t4, a3, 31 ; RV32ZBA-NEXT: mul t5, t4, a0 ; RV32ZBA-NEXT: add t6, t5, t3 -; RV32ZBA-NEXT: add s3, a5, t6 -; RV32ZBA-NEXT: sltu s2, s3, a5 -; RV32ZBA-NEXT: sltu a5, a5, t1 -; RV32ZBA-NEXT: sltu s1, t0, a7 -; RV32ZBA-NEXT: mulhu s0, a1, a3 -; RV32ZBA-NEXT: add s1, s0, s1 -; RV32ZBA-NEXT: add a5, s1, a5 -; RV32ZBA-NEXT: mulhu s1, a2, t2 -; RV32ZBA-NEXT: add s1, s1, t3 +; RV32ZBA-NEXT: add s0, t1, t6 +; RV32ZBA-NEXT: sltu s1, s0, t1 +; RV32ZBA-NEXT: sltu t0, t1, t0 +; RV32ZBA-NEXT: sltu a6, a7, a6 +; RV32ZBA-NEXT: mulhu a7, a1, a3 +; RV32ZBA-NEXT: add a6, a7, a6 +; RV32ZBA-NEXT: add a6, a6, t0 +; RV32ZBA-NEXT: mulhu a7, a2, t2 +; RV32ZBA-NEXT: add a7, a7, t3 ; RV32ZBA-NEXT: mul a3, a3, t2 -; RV32ZBA-NEXT: add a3, s1, a3 +; RV32ZBA-NEXT: add a3, a7, a3 ; RV32ZBA-NEXT: mul a1, t4, a1 -; RV32ZBA-NEXT: mulhu s1, t4, a0 -; RV32ZBA-NEXT: add a1, s1, a1 +; RV32ZBA-NEXT: mulhu a7, t4, a0 +; RV32ZBA-NEXT: add a1, a7, a1 ; RV32ZBA-NEXT: add a1, a1, t5 ; RV32ZBA-NEXT: add a1, a1, a3 ; RV32ZBA-NEXT: sltu a3, t6, t5 ; RV32ZBA-NEXT: add a1, a1, a3 -; RV32ZBA-NEXT: add a1, a5, a1 -; RV32ZBA-NEXT: add a1, a1, s2 -; RV32ZBA-NEXT: srai a3, a6, 31 +; RV32ZBA-NEXT: add a1, a6, a1 +; RV32ZBA-NEXT: add a1, a1, s1 +; RV32ZBA-NEXT: srai a3, a5, 31 ; RV32ZBA-NEXT: xor a1, a1, a3 -; RV32ZBA-NEXT: xor a3, s3, a3 +; RV32ZBA-NEXT: xor a3, s0, a3 ; RV32ZBA-NEXT: or a1, a3, a1 ; RV32ZBA-NEXT: snez a1, a1 ; RV32ZBA-NEXT: mul a0, a0, a2 ; RV32ZBA-NEXT: sw a0, 0(a4) -; RV32ZBA-NEXT: sw a6, 4(a4) +; RV32ZBA-NEXT: sw a5, 4(a4) ; RV32ZBA-NEXT: mv a0, a1 ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s3, 0(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -1063,27 +1051,27 @@ entry: define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) { ; RV32-LABEL: smulo2.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: li a7, 13 -; RV32-NEXT: mulhu a4, a0, a7 -; RV32-NEXT: mul a5, a1, a7 -; RV32-NEXT: add t0, a5, a4 -; RV32-NEXT: sltu a6, t0, a5 -; RV32-NEXT: mulhu a5, a1, a7 -; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mulhu a4, a0, a3 +; RV32-NEXT: mul a5, a1, a3 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 ; RV32-NEXT: srai a1, a1, 31 -; RV32-NEXT: mul a3, a1, a7 -; RV32-NEXT: add a3, a5, a3 -; RV32-NEXT: srai a4, t0, 31 -; RV32-NEXT: xor a6, a3, a4 -; RV32-NEXT: sltu a3, a3, a5 -; RV32-NEXT: mulh a1, a1, a7 -; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: xor a1, a1, a4 -; RV32-NEXT: or a1, a6, a1 +; RV32-NEXT: mul a6, a1, a3 +; RV32-NEXT: add a6, a5, a6 +; RV32-NEXT: srai a7, a4, 31 +; RV32-NEXT: xor t0, a6, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulh a1, a1, a3 +; RV32-NEXT: add a1, a1, a5 +; RV32-NEXT: xor a1, a1, a7 +; RV32-NEXT: or a1, t0, a1 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: mul a0, a0, a7 +; RV32-NEXT: mul a0, a0, a3 ; RV32-NEXT: sw a0, 0(a2) -; RV32-NEXT: sw t0, 4(a2) +; RV32-NEXT: sw a4, 4(a2) ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; @@ -1100,27 +1088,27 @@ define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) { ; ; RV32ZBA-LABEL: smulo2.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: li a7, 13 -; RV32ZBA-NEXT: mulhu a4, a0, a7 -; RV32ZBA-NEXT: mul a5, a1, a7 -; RV32ZBA-NEXT: add t0, a5, a4 -; RV32ZBA-NEXT: sltu a6, t0, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a7 -; RV32ZBA-NEXT: add a5, a5, a6 +; RV32ZBA-NEXT: li a3, 13 +; RV32ZBA-NEXT: mulhu a4, a0, a3 +; RV32ZBA-NEXT: mul a5, a1, a3 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 ; RV32ZBA-NEXT: srai a1, a1, 31 -; RV32ZBA-NEXT: mul a3, a1, a7 -; RV32ZBA-NEXT: add a3, a5, a3 -; RV32ZBA-NEXT: srai a4, t0, 31 -; RV32ZBA-NEXT: xor a6, a3, a4 -; RV32ZBA-NEXT: sltu a3, a3, a5 -; RV32ZBA-NEXT: mulh a1, a1, a7 -; RV32ZBA-NEXT: add a1, a1, a3 -; RV32ZBA-NEXT: xor a1, a1, a4 -; RV32ZBA-NEXT: or a1, a6, a1 +; RV32ZBA-NEXT: mul a6, a1, a3 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: srai a7, a4, 31 +; RV32ZBA-NEXT: xor t0, a6, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulh a1, a1, a3 +; RV32ZBA-NEXT: add a1, a1, a5 +; RV32ZBA-NEXT: xor a1, a1, a7 +; RV32ZBA-NEXT: or a1, t0, a1 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: mul a0, a0, a7 +; RV32ZBA-NEXT: mul a0, a0, a3 ; RV32ZBA-NEXT: sw a0, 0(a2) -; RV32ZBA-NEXT: sw t0, 4(a2) +; RV32ZBA-NEXT: sw a4, 4(a2) ; RV32ZBA-NEXT: mv a0, a1 ; RV32ZBA-NEXT: ret ; @@ -1289,25 +1277,25 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, i32* %2) { define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) { ; RV32-LABEL: umulo.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: mul a6, a3, a0 -; RV32-NEXT: mul a5, a1, a2 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mulhu a5, a0, a2 -; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: sltu a7, a6, a5 -; RV32-NEXT: snez t0, a3 -; RV32-NEXT: snez a5, a1 -; RV32-NEXT: and a5, a5, t0 +; RV32-NEXT: mul a5, a3, a0 +; RV32-NEXT: mul a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mulhu a6, a0, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: sltu a6, a5, a6 +; RV32-NEXT: snez a7, a3 +; RV32-NEXT: snez t0, a1 +; RV32-NEXT: and a7, t0, a7 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a5, a1 +; RV32-NEXT: or a1, a7, a1 ; RV32-NEXT: mulhu a3, a3, a0 ; RV32-NEXT: snez a3, a3 ; RV32-NEXT: or a1, a1, a3 -; RV32-NEXT: or a1, a1, a7 +; RV32-NEXT: or a1, a1, a6 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: sw a0, 0(a4) -; RV32-NEXT: sw a6, 4(a4) +; RV32-NEXT: sw a5, 4(a4) ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; @@ -1322,25 +1310,25 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) { ; ; RV32ZBA-LABEL: umulo.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: mul a6, a3, a0 -; RV32ZBA-NEXT: mul a5, a1, a2 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mulhu a5, a0, a2 -; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: sltu a7, a6, a5 -; RV32ZBA-NEXT: snez t0, a3 -; RV32ZBA-NEXT: snez a5, a1 -; RV32ZBA-NEXT: and a5, a5, t0 +; RV32ZBA-NEXT: mul a5, a3, a0 +; RV32ZBA-NEXT: mul a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a0, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: sltu a6, a5, a6 +; RV32ZBA-NEXT: snez a7, a3 +; RV32ZBA-NEXT: snez t0, a1 +; RV32ZBA-NEXT: and a7, t0, a7 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a5, a1 +; RV32ZBA-NEXT: or a1, a7, a1 ; RV32ZBA-NEXT: mulhu a3, a3, a0 ; RV32ZBA-NEXT: snez a3, a3 ; RV32ZBA-NEXT: or a1, a1, a3 -; RV32ZBA-NEXT: or a1, a1, a7 +; RV32ZBA-NEXT: or a1, a1, a6 ; RV32ZBA-NEXT: mul a0, a0, a2 ; RV32ZBA-NEXT: sw a0, 0(a4) -; RV32ZBA-NEXT: sw a6, 4(a4) +; RV32ZBA-NEXT: sw a5, 4(a4) ; RV32ZBA-NEXT: mv a0, a1 ; RV32ZBA-NEXT: ret ; @@ -2340,62 +2328,56 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: mulhu a4, a0, a2 ; RV32-NEXT: mul a5, a1, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mul a6, a0, a3 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: sltu a6, a4, a6 +; RV32-NEXT: mulhu a7, a0, a3 +; RV32-NEXT: add a6, a7, a6 ; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a7, a5, a4 -; RV32-NEXT: sltu a5, a7, a5 -; RV32-NEXT: mulhu a4, a0, a3 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add t0, a6, a4 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a4, t1, t0 -; RV32-NEXT: srai a5, a1, 31 -; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: mul a7, a1, a3 +; RV32-NEXT: add t0, a7, a6 +; RV32-NEXT: srai t1, a1, 31 +; RV32-NEXT: mul t2, a2, t1 ; RV32-NEXT: srai t3, a3, 31 ; RV32-NEXT: mul t4, t3, a0 ; RV32-NEXT: add t5, t4, t2 -; RV32-NEXT: add t6, a4, t5 -; RV32-NEXT: sltu s2, t6, a4 -; RV32-NEXT: sltu a4, a4, t1 -; RV32-NEXT: sltu s0, t0, a6 -; RV32-NEXT: mulhu s1, a1, a3 -; RV32-NEXT: add s1, s1, s0 -; RV32-NEXT: add a4, s1, a4 -; RV32-NEXT: mulhu s1, a2, a5 -; RV32-NEXT: add s1, s1, t2 -; RV32-NEXT: mul a5, a3, a5 -; RV32-NEXT: add a5, s1, a5 -; RV32-NEXT: mul s1, t3, a1 -; RV32-NEXT: mulhu s0, t3, a0 -; RV32-NEXT: add s1, s0, s1 -; RV32-NEXT: add s1, s1, t4 -; RV32-NEXT: add a5, s1, a5 -; RV32-NEXT: sltu s1, t5, t4 -; RV32-NEXT: add a5, a5, s1 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add a4, a4, s2 -; RV32-NEXT: srai a5, a7, 31 -; RV32-NEXT: xor a4, a4, a5 -; RV32-NEXT: xor a5, t6, a5 -; RV32-NEXT: or a4, a5, a4 +; RV32-NEXT: add t6, t0, t5 +; RV32-NEXT: sltu s0, t6, t0 +; RV32-NEXT: sltu a7, t0, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: add a5, a5, a7 +; RV32-NEXT: mulhu a6, a2, t1 +; RV32-NEXT: add a6, a6, t2 +; RV32-NEXT: mul a7, a3, t1 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: mul a7, t3, a1 +; RV32-NEXT: mulhu t0, t3, a0 +; RV32-NEXT: add a7, t0, a7 +; RV32-NEXT: add a7, a7, t4 +; RV32-NEXT: add a6, a7, a6 +; RV32-NEXT: sltu a7, t5, t4 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: add a5, a5, s0 +; RV32-NEXT: srai a4, a4, 31 +; RV32-NEXT: xor a5, a5, a4 +; RV32-NEXT: xor a4, t6, a4 +; RV32-NEXT: or a4, a4, a5 ; RV32-NEXT: bnez a4, .LBB44_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB44_2: # %entry ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2415,62 +2397,56 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 -; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 ; RV32ZBA-NEXT: mulhu a4, a0, a2 ; RV32ZBA-NEXT: mul a5, a1, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mul a6, a0, a3 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: sltu a6, a4, a6 +; RV32ZBA-NEXT: mulhu a7, a0, a3 +; RV32ZBA-NEXT: add a6, a7, a6 ; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a7, a5, a4 -; RV32ZBA-NEXT: sltu a5, a7, a5 -; RV32ZBA-NEXT: mulhu a4, a0, a3 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add t0, a6, a4 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a4, t1, t0 -; RV32ZBA-NEXT: srai a5, a1, 31 -; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: mul a7, a1, a3 +; RV32ZBA-NEXT: add t0, a7, a6 +; RV32ZBA-NEXT: srai t1, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, t1 ; RV32ZBA-NEXT: srai t3, a3, 31 ; RV32ZBA-NEXT: mul t4, t3, a0 ; RV32ZBA-NEXT: add t5, t4, t2 -; RV32ZBA-NEXT: add t6, a4, t5 -; RV32ZBA-NEXT: sltu s2, t6, a4 -; RV32ZBA-NEXT: sltu a4, a4, t1 -; RV32ZBA-NEXT: sltu s0, t0, a6 -; RV32ZBA-NEXT: mulhu s1, a1, a3 -; RV32ZBA-NEXT: add s1, s1, s0 -; RV32ZBA-NEXT: add a4, s1, a4 -; RV32ZBA-NEXT: mulhu s1, a2, a5 -; RV32ZBA-NEXT: add s1, s1, t2 -; RV32ZBA-NEXT: mul a5, a3, a5 -; RV32ZBA-NEXT: add a5, s1, a5 -; RV32ZBA-NEXT: mul s1, t3, a1 -; RV32ZBA-NEXT: mulhu s0, t3, a0 -; RV32ZBA-NEXT: add s1, s0, s1 -; RV32ZBA-NEXT: add s1, s1, t4 -; RV32ZBA-NEXT: add a5, s1, a5 -; RV32ZBA-NEXT: sltu s1, t5, t4 -; RV32ZBA-NEXT: add a5, a5, s1 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add a4, a4, s2 -; RV32ZBA-NEXT: srai a5, a7, 31 -; RV32ZBA-NEXT: xor a4, a4, a5 -; RV32ZBA-NEXT: xor a5, t6, a5 -; RV32ZBA-NEXT: or a4, a5, a4 +; RV32ZBA-NEXT: add t6, t0, t5 +; RV32ZBA-NEXT: sltu s0, t6, t0 +; RV32ZBA-NEXT: sltu a7, t0, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: add a5, a5, a7 +; RV32ZBA-NEXT: mulhu a6, a2, t1 +; RV32ZBA-NEXT: add a6, a6, t2 +; RV32ZBA-NEXT: mul a7, a3, t1 +; RV32ZBA-NEXT: add a6, a6, a7 +; RV32ZBA-NEXT: mul a7, t3, a1 +; RV32ZBA-NEXT: mulhu t0, t3, a0 +; RV32ZBA-NEXT: add a7, t0, a7 +; RV32ZBA-NEXT: add a7, a7, t4 +; RV32ZBA-NEXT: add a6, a7, a6 +; RV32ZBA-NEXT: sltu a7, t5, t4 +; RV32ZBA-NEXT: add a6, a6, a7 +; RV32ZBA-NEXT: add a5, a5, a6 +; RV32ZBA-NEXT: add a5, a5, s0 +; RV32ZBA-NEXT: srai a4, a4, 31 +; RV32ZBA-NEXT: xor a5, a5, a4 +; RV32ZBA-NEXT: xor a4, t6, a4 +; RV32ZBA-NEXT: or a4, a4, a5 ; RV32ZBA-NEXT: bnez a4, .LBB44_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: mv a0, a2 ; RV32ZBA-NEXT: mv a1, a3 ; RV32ZBA-NEXT: .LBB44_2: # %entry ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -2497,40 +2473,36 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: mulhu a4, a0, a2 ; RV32-NEXT: mul a5, a1, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mul a6, a0, a3 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: sltu a6, a4, a6 +; RV32-NEXT: mulhu a7, a0, a3 +; RV32-NEXT: add a6, a7, a6 ; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a7, a5, a4 -; RV32-NEXT: sltu a5, a7, a5 -; RV32-NEXT: mulhu a4, a0, a3 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add t0, a6, a4 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a4, t1, t0 -; RV32-NEXT: srai a5, a1, 31 -; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: mul a7, a1, a3 +; RV32-NEXT: add t0, a7, a6 +; RV32-NEXT: srai t1, a1, 31 +; RV32-NEXT: mul t2, a2, t1 ; RV32-NEXT: srai t3, a3, 31 ; RV32-NEXT: mul t4, t3, a0 ; RV32-NEXT: add t5, t4, t2 -; RV32-NEXT: add t6, a4, t5 -; RV32-NEXT: sltu s2, t6, a4 -; RV32-NEXT: sltu a4, a4, t1 -; RV32-NEXT: sltu s0, t0, a6 -; RV32-NEXT: mulhu s1, a1, a3 -; RV32-NEXT: add s1, s1, s0 -; RV32-NEXT: add a4, s1, a4 -; RV32-NEXT: mulhu a2, a2, a5 +; RV32-NEXT: add t6, t0, t5 +; RV32-NEXT: sltu s0, t6, t0 +; RV32-NEXT: sltu a7, t0, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: add a5, a5, a7 +; RV32-NEXT: mulhu a2, a2, t1 ; RV32-NEXT: add a2, a2, t2 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: mul a3, a3, t1 ; RV32-NEXT: add a2, a2, a3 ; RV32-NEXT: mul a1, t3, a1 ; RV32-NEXT: mulhu a0, t3, a0 @@ -2539,16 +2511,14 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: sltu a1, t5, t4 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, a4, a0 -; RV32-NEXT: add a0, a0, s2 -; RV32-NEXT: srai a1, a7, 31 +; RV32-NEXT: add a0, a5, a0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: srai a1, a4, 31 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: xor a1, t6, a1 ; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2566,40 +2536,36 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 -; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 ; RV32ZBA-NEXT: mulhu a4, a0, a2 ; RV32ZBA-NEXT: mul a5, a1, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mul a6, a0, a3 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: sltu a6, a4, a6 +; RV32ZBA-NEXT: mulhu a7, a0, a3 +; RV32ZBA-NEXT: add a6, a7, a6 ; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a7, a5, a4 -; RV32ZBA-NEXT: sltu a5, a7, a5 -; RV32ZBA-NEXT: mulhu a4, a0, a3 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add t0, a6, a4 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a4, t1, t0 -; RV32ZBA-NEXT: srai a5, a1, 31 -; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: mul a7, a1, a3 +; RV32ZBA-NEXT: add t0, a7, a6 +; RV32ZBA-NEXT: srai t1, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, t1 ; RV32ZBA-NEXT: srai t3, a3, 31 ; RV32ZBA-NEXT: mul t4, t3, a0 ; RV32ZBA-NEXT: add t5, t4, t2 -; RV32ZBA-NEXT: add t6, a4, t5 -; RV32ZBA-NEXT: sltu s2, t6, a4 -; RV32ZBA-NEXT: sltu a4, a4, t1 -; RV32ZBA-NEXT: sltu s0, t0, a6 -; RV32ZBA-NEXT: mulhu s1, a1, a3 -; RV32ZBA-NEXT: add s1, s1, s0 -; RV32ZBA-NEXT: add a4, s1, a4 -; RV32ZBA-NEXT: mulhu a2, a2, a5 +; RV32ZBA-NEXT: add t6, t0, t5 +; RV32ZBA-NEXT: sltu s0, t6, t0 +; RV32ZBA-NEXT: sltu a7, t0, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: add a5, a5, a7 +; RV32ZBA-NEXT: mulhu a2, a2, t1 ; RV32ZBA-NEXT: add a2, a2, t2 -; RV32ZBA-NEXT: mul a3, a3, a5 +; RV32ZBA-NEXT: mul a3, a3, t1 ; RV32ZBA-NEXT: add a2, a2, a3 ; RV32ZBA-NEXT: mul a1, t3, a1 ; RV32ZBA-NEXT: mulhu a0, t3, a0 @@ -2608,16 +2574,14 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: add a0, a0, a2 ; RV32ZBA-NEXT: sltu a1, t5, t4 ; RV32ZBA-NEXT: add a0, a0, a1 -; RV32ZBA-NEXT: add a0, a4, a0 -; RV32ZBA-NEXT: add a0, a0, s2 -; RV32ZBA-NEXT: srai a1, a7, 31 +; RV32ZBA-NEXT: add a0, a5, a0 +; RV32ZBA-NEXT: add a0, a0, s0 +; RV32ZBA-NEXT: srai a1, a4, 31 ; RV32ZBA-NEXT: xor a0, a0, a1 ; RV32ZBA-NEXT: xor a1, t6, a1 ; RV32ZBA-NEXT: or a0, a1, a0 ; RV32ZBA-NEXT: seqz a0, a0 ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -2730,17 +2694,17 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: mulhu a5, a0, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: sltu a4, a4, a5 ; RV32-NEXT: snez a5, a3 -; RV32-NEXT: snez a4, a1 -; RV32-NEXT: and a4, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 -; RV32-NEXT: snez a5, a5 -; RV32-NEXT: or a4, a4, a5 -; RV32-NEXT: mulhu a5, a3, a0 -; RV32-NEXT: snez a5, a5 -; RV32-NEXT: or a4, a4, a5 -; RV32-NEXT: or a4, a4, a6 +; RV32-NEXT: snez a6, a1 +; RV32-NEXT: and a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: snez a6, a6 +; RV32-NEXT: or a5, a5, a6 +; RV32-NEXT: mulhu a6, a3, a0 +; RV32-NEXT: snez a6, a6 +; RV32-NEXT: or a5, a5, a6 +; RV32-NEXT: or a4, a5, a4 ; RV32-NEXT: bnez a4, .LBB48_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: mv a0, a2 @@ -2764,17 +2728,17 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: add a4, a5, a4 ; RV32ZBA-NEXT: mulhu a5, a0, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a5 ; RV32ZBA-NEXT: snez a5, a3 -; RV32ZBA-NEXT: snez a4, a1 -; RV32ZBA-NEXT: and a4, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 -; RV32ZBA-NEXT: snez a5, a5 -; RV32ZBA-NEXT: or a4, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a3, a0 -; RV32ZBA-NEXT: snez a5, a5 -; RV32ZBA-NEXT: or a4, a4, a5 -; RV32ZBA-NEXT: or a4, a4, a6 +; RV32ZBA-NEXT: snez a6, a1 +; RV32ZBA-NEXT: and a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: snez a6, a6 +; RV32ZBA-NEXT: or a5, a5, a6 +; RV32ZBA-NEXT: mulhu a6, a3, a0 +; RV32ZBA-NEXT: snez a6, a6 +; RV32ZBA-NEXT: or a5, a5, a6 +; RV32ZBA-NEXT: or a4, a5, a4 ; RV32ZBA-NEXT: bnez a4, .LBB48_2 ; RV32ZBA-NEXT: # %bb.1: # %entry ; RV32ZBA-NEXT: mv a0, a2 @@ -2805,17 +2769,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: mulhu a5, a0, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: sltu a4, a4, a5 ; RV32-NEXT: snez a5, a3 -; RV32-NEXT: snez a4, a1 -; RV32-NEXT: and a4, a4, a5 +; RV32-NEXT: snez a6, a1 +; RV32-NEXT: and a5, a6, a5 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a4, a1 +; RV32-NEXT: or a1, a5, a1 ; RV32-NEXT: mulhu a0, a3, a0 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: or a0, a0, a6 +; RV32-NEXT: or a0, a0, a4 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: ret ; @@ -2832,17 +2796,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: add a4, a5, a4 ; RV32ZBA-NEXT: mulhu a5, a0, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a5 ; RV32ZBA-NEXT: snez a5, a3 -; RV32ZBA-NEXT: snez a4, a1 -; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: snez a6, a1 +; RV32ZBA-NEXT: and a5, a6, a5 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a4, a1 +; RV32ZBA-NEXT: or a1, a5, a1 ; RV32ZBA-NEXT: mulhu a0, a3, a0 ; RV32ZBA-NEXT: snez a0, a0 ; RV32ZBA-NEXT: or a0, a1, a0 -; RV32ZBA-NEXT: or a0, a0, a6 +; RV32ZBA-NEXT: or a0, a0, a4 ; RV32ZBA-NEXT: xori a0, a0, 1 ; RV32ZBA-NEXT: ret ; @@ -3479,40 +3443,36 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset s0, -4 -; RV32-NEXT: .cfi_offset s1, -8 -; RV32-NEXT: .cfi_offset s2, -12 ; RV32-NEXT: mulhu a4, a0, a2 ; RV32-NEXT: mul a5, a1, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 -; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: sltu a5, a4, a5 +; RV32-NEXT: mulhu a6, a1, a2 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: mul a6, a0, a3 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: sltu a6, a4, a6 +; RV32-NEXT: mulhu a7, a0, a3 +; RV32-NEXT: add a6, a7, a6 ; RV32-NEXT: add a6, a5, a6 -; RV32-NEXT: mul a5, a0, a3 -; RV32-NEXT: add a7, a5, a4 -; RV32-NEXT: sltu a5, a7, a5 -; RV32-NEXT: mulhu a4, a0, a3 -; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: add t0, a6, a4 -; RV32-NEXT: mul t1, a1, a3 -; RV32-NEXT: add a4, t1, t0 -; RV32-NEXT: srai a5, a1, 31 -; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: mul a7, a1, a3 +; RV32-NEXT: add t0, a7, a6 +; RV32-NEXT: srai t1, a1, 31 +; RV32-NEXT: mul t2, a2, t1 ; RV32-NEXT: srai t3, a3, 31 ; RV32-NEXT: mul t4, t3, a0 ; RV32-NEXT: add t5, t4, t2 -; RV32-NEXT: add t6, a4, t5 -; RV32-NEXT: sltu s2, t6, a4 -; RV32-NEXT: sltu a4, a4, t1 -; RV32-NEXT: sltu s0, t0, a6 -; RV32-NEXT: mulhu s1, a1, a3 -; RV32-NEXT: add s1, s1, s0 -; RV32-NEXT: add a4, s1, a4 -; RV32-NEXT: mulhu a2, a2, a5 +; RV32-NEXT: add t6, t0, t5 +; RV32-NEXT: sltu s0, t6, t0 +; RV32-NEXT: sltu a7, t0, a7 +; RV32-NEXT: sltu a5, a6, a5 +; RV32-NEXT: mulhu a6, a1, a3 +; RV32-NEXT: add a5, a6, a5 +; RV32-NEXT: add a5, a5, a7 +; RV32-NEXT: mulhu a2, a2, t1 ; RV32-NEXT: add a2, a2, t2 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: mul a3, a3, t1 ; RV32-NEXT: add a2, a2, a3 ; RV32-NEXT: mul a1, t3, a1 ; RV32-NEXT: mulhu a0, t3, a0 @@ -3521,9 +3481,9 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: sltu a1, t5, t4 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a0, a4, a0 -; RV32-NEXT: add a0, a0, s2 -; RV32-NEXT: srai a1, a7, 31 +; RV32-NEXT: add a0, a5, a0 +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: srai a1, a4, 31 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: xor a1, t6, a1 ; RV32-NEXT: or a0, a1, a0 @@ -3535,8 +3495,6 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB59_3: # %overflow ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3558,40 +3516,36 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 ; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill ; RV32ZBA-NEXT: .cfi_offset s0, -4 -; RV32ZBA-NEXT: .cfi_offset s1, -8 -; RV32ZBA-NEXT: .cfi_offset s2, -12 ; RV32ZBA-NEXT: mulhu a4, a0, a2 ; RV32ZBA-NEXT: mul a5, a1, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 -; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: sltu a5, a4, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a2 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: mul a6, a0, a3 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: sltu a6, a4, a6 +; RV32ZBA-NEXT: mulhu a7, a0, a3 +; RV32ZBA-NEXT: add a6, a7, a6 ; RV32ZBA-NEXT: add a6, a5, a6 -; RV32ZBA-NEXT: mul a5, a0, a3 -; RV32ZBA-NEXT: add a7, a5, a4 -; RV32ZBA-NEXT: sltu a5, a7, a5 -; RV32ZBA-NEXT: mulhu a4, a0, a3 -; RV32ZBA-NEXT: add a4, a4, a5 -; RV32ZBA-NEXT: add t0, a6, a4 -; RV32ZBA-NEXT: mul t1, a1, a3 -; RV32ZBA-NEXT: add a4, t1, t0 -; RV32ZBA-NEXT: srai a5, a1, 31 -; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: mul a7, a1, a3 +; RV32ZBA-NEXT: add t0, a7, a6 +; RV32ZBA-NEXT: srai t1, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, t1 ; RV32ZBA-NEXT: srai t3, a3, 31 ; RV32ZBA-NEXT: mul t4, t3, a0 ; RV32ZBA-NEXT: add t5, t4, t2 -; RV32ZBA-NEXT: add t6, a4, t5 -; RV32ZBA-NEXT: sltu s2, t6, a4 -; RV32ZBA-NEXT: sltu a4, a4, t1 -; RV32ZBA-NEXT: sltu s0, t0, a6 -; RV32ZBA-NEXT: mulhu s1, a1, a3 -; RV32ZBA-NEXT: add s1, s1, s0 -; RV32ZBA-NEXT: add a4, s1, a4 -; RV32ZBA-NEXT: mulhu a2, a2, a5 +; RV32ZBA-NEXT: add t6, t0, t5 +; RV32ZBA-NEXT: sltu s0, t6, t0 +; RV32ZBA-NEXT: sltu a7, t0, a7 +; RV32ZBA-NEXT: sltu a5, a6, a5 +; RV32ZBA-NEXT: mulhu a6, a1, a3 +; RV32ZBA-NEXT: add a5, a6, a5 +; RV32ZBA-NEXT: add a5, a5, a7 +; RV32ZBA-NEXT: mulhu a2, a2, t1 ; RV32ZBA-NEXT: add a2, a2, t2 -; RV32ZBA-NEXT: mul a3, a3, a5 +; RV32ZBA-NEXT: mul a3, a3, t1 ; RV32ZBA-NEXT: add a2, a2, a3 ; RV32ZBA-NEXT: mul a1, t3, a1 ; RV32ZBA-NEXT: mulhu a0, t3, a0 @@ -3600,9 +3554,9 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: add a0, a0, a2 ; RV32ZBA-NEXT: sltu a1, t5, t4 ; RV32ZBA-NEXT: add a0, a0, a1 -; RV32ZBA-NEXT: add a0, a4, a0 -; RV32ZBA-NEXT: add a0, a0, s2 -; RV32ZBA-NEXT: srai a1, a7, 31 +; RV32ZBA-NEXT: add a0, a5, a0 +; RV32ZBA-NEXT: add a0, a0, s0 +; RV32ZBA-NEXT: srai a1, a4, 31 ; RV32ZBA-NEXT: xor a0, a0, a1 ; RV32ZBA-NEXT: xor a1, t6, a1 ; RV32ZBA-NEXT: or a0, a1, a0 @@ -3614,8 +3568,6 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: li a0, 1 ; RV32ZBA-NEXT: .LBB59_3: # %overflow ; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -3647,41 +3599,41 @@ continue: define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32-LABEL: smulo2.br.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: li a6, -13 -; RV32-NEXT: mulhu a3, a0, a6 -; RV32-NEXT: mul a4, a1, a6 +; RV32-NEXT: li a2, -13 +; RV32-NEXT: mulhu a3, a0, a2 +; RV32-NEXT: mul a4, a1, a2 ; RV32-NEXT: add a3, a4, a3 ; RV32-NEXT: sltu a4, a3, a4 -; RV32-NEXT: mulhu a5, a1, a6 -; RV32-NEXT: add t3, a5, a4 -; RV32-NEXT: sub t0, a3, a0 -; RV32-NEXT: neg t1, a0 -; RV32-NEXT: sltu a2, t0, t1 +; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sub a3, a3, a0 +; RV32-NEXT: neg a5, a0 +; RV32-NEXT: sltu a6, a3, a5 ; RV32-NEXT: li a7, -1 -; RV32-NEXT: mulhu t2, a0, a7 -; RV32-NEXT: add a2, t2, a2 -; RV32-NEXT: add a2, t3, a2 -; RV32-NEXT: sub a5, a2, a1 -; RV32-NEXT: srai t6, a1, 31 -; RV32-NEXT: mul a4, t6, a6 -; RV32-NEXT: sub a4, a4, a0 -; RV32-NEXT: add t4, a5, a4 -; RV32-NEXT: sltu t5, t4, a5 -; RV32-NEXT: neg a3, a1 -; RV32-NEXT: sltu a3, a5, a3 -; RV32-NEXT: sltu a2, a2, t3 -; RV32-NEXT: mulhu a5, a1, a7 -; RV32-NEXT: add a2, a5, a2 -; RV32-NEXT: add a2, a2, a3 -; RV32-NEXT: sltu a3, a4, t1 -; RV32-NEXT: mulh a4, t6, a6 -; RV32-NEXT: sub a0, t2, a0 +; RV32-NEXT: mulhu t0, a0, a7 +; RV32-NEXT: add a6, t0, a6 +; RV32-NEXT: add a6, a4, a6 +; RV32-NEXT: sub t1, a6, a1 +; RV32-NEXT: srai t2, a1, 31 +; RV32-NEXT: mul t3, t2, a2 +; RV32-NEXT: sub t3, t3, a0 +; RV32-NEXT: add t4, t1, t3 +; RV32-NEXT: sltu t5, t4, t1 +; RV32-NEXT: neg t6, a1 +; RV32-NEXT: sltu t1, t1, t6 +; RV32-NEXT: sltu a4, a6, a4 +; RV32-NEXT: mulhu a6, a1, a7 +; RV32-NEXT: add a4, a6, a4 +; RV32-NEXT: add a4, a4, t1 +; RV32-NEXT: sltu a5, t3, a5 +; RV32-NEXT: mulh a2, t2, a2 +; RV32-NEXT: sub a0, t0, a0 ; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: add a0, a0, a4 -; RV32-NEXT: add a0, a0, a3 -; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a0, a4, a0 ; RV32-NEXT: add a0, a0, t5 -; RV32-NEXT: srai a1, t0, 31 +; RV32-NEXT: srai a1, a3, 31 ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: xor a1, t4, a1 ; RV32-NEXT: or a0, a1, a0 @@ -3709,41 +3661,41 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; ; RV32ZBA-LABEL: smulo2.br.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: li a6, -13 -; RV32ZBA-NEXT: mulhu a3, a0, a6 -; RV32ZBA-NEXT: mul a4, a1, a6 +; RV32ZBA-NEXT: li a2, -13 +; RV32ZBA-NEXT: mulhu a3, a0, a2 +; RV32ZBA-NEXT: mul a4, a1, a2 ; RV32ZBA-NEXT: add a3, a4, a3 ; RV32ZBA-NEXT: sltu a4, a3, a4 -; RV32ZBA-NEXT: mulhu a5, a1, a6 -; RV32ZBA-NEXT: add t3, a5, a4 -; RV32ZBA-NEXT: sub t0, a3, a0 -; RV32ZBA-NEXT: neg t1, a0 -; RV32ZBA-NEXT: sltu a2, t0, t1 +; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sub a3, a3, a0 +; RV32ZBA-NEXT: neg a5, a0 +; RV32ZBA-NEXT: sltu a6, a3, a5 ; RV32ZBA-NEXT: li a7, -1 -; RV32ZBA-NEXT: mulhu t2, a0, a7 -; RV32ZBA-NEXT: add a2, t2, a2 -; RV32ZBA-NEXT: add a2, t3, a2 -; RV32ZBA-NEXT: sub a5, a2, a1 -; RV32ZBA-NEXT: srai t6, a1, 31 -; RV32ZBA-NEXT: mul a4, t6, a6 -; RV32ZBA-NEXT: sub a4, a4, a0 -; RV32ZBA-NEXT: add t4, a5, a4 -; RV32ZBA-NEXT: sltu t5, t4, a5 -; RV32ZBA-NEXT: neg a3, a1 -; RV32ZBA-NEXT: sltu a3, a5, a3 -; RV32ZBA-NEXT: sltu a2, a2, t3 -; RV32ZBA-NEXT: mulhu a5, a1, a7 -; RV32ZBA-NEXT: add a2, a5, a2 -; RV32ZBA-NEXT: add a2, a2, a3 -; RV32ZBA-NEXT: sltu a3, a4, t1 -; RV32ZBA-NEXT: mulh a4, t6, a6 -; RV32ZBA-NEXT: sub a0, t2, a0 +; RV32ZBA-NEXT: mulhu t0, a0, a7 +; RV32ZBA-NEXT: add a6, t0, a6 +; RV32ZBA-NEXT: add a6, a4, a6 +; RV32ZBA-NEXT: sub t1, a6, a1 +; RV32ZBA-NEXT: srai t2, a1, 31 +; RV32ZBA-NEXT: mul t3, t2, a2 +; RV32ZBA-NEXT: sub t3, t3, a0 +; RV32ZBA-NEXT: add t4, t1, t3 +; RV32ZBA-NEXT: sltu t5, t4, t1 +; RV32ZBA-NEXT: neg t6, a1 +; RV32ZBA-NEXT: sltu t1, t1, t6 +; RV32ZBA-NEXT: sltu a4, a6, a4 +; RV32ZBA-NEXT: mulhu a6, a1, a7 +; RV32ZBA-NEXT: add a4, a6, a4 +; RV32ZBA-NEXT: add a4, a4, t1 +; RV32ZBA-NEXT: sltu a5, t3, a5 +; RV32ZBA-NEXT: mulh a2, t2, a2 +; RV32ZBA-NEXT: sub a0, t0, a0 ; RV32ZBA-NEXT: sub a0, a0, a1 -; RV32ZBA-NEXT: add a0, a0, a4 -; RV32ZBA-NEXT: add a0, a0, a3 -; RV32ZBA-NEXT: add a0, a2, a0 +; RV32ZBA-NEXT: add a0, a0, a2 +; RV32ZBA-NEXT: add a0, a0, a5 +; RV32ZBA-NEXT: add a0, a4, a0 ; RV32ZBA-NEXT: add a0, a0, t5 -; RV32ZBA-NEXT: srai a1, t0, 31 +; RV32ZBA-NEXT: srai a1, a3, 31 ; RV32ZBA-NEXT: xor a0, a0, a1 ; RV32ZBA-NEXT: xor a1, t4, a1 ; RV32ZBA-NEXT: or a0, a1, a0 @@ -3852,17 +3804,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) { ; RV32-NEXT: add a4, a5, a4 ; RV32-NEXT: mulhu a5, a0, a2 ; RV32-NEXT: add a4, a5, a4 -; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: sltu a4, a4, a5 ; RV32-NEXT: snez a5, a3 -; RV32-NEXT: snez a4, a1 -; RV32-NEXT: and a4, a4, a5 +; RV32-NEXT: snez a6, a1 +; RV32-NEXT: and a5, a6, a5 ; RV32-NEXT: mulhu a1, a1, a2 ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a1, a4, a1 +; RV32-NEXT: or a1, a5, a1 ; RV32-NEXT: mulhu a0, a3, a0 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: or a0, a0, a6 +; RV32-NEXT: or a0, a0, a4 ; RV32-NEXT: beqz a0, .LBB62_2 ; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: li a0, 0 @@ -3889,17 +3841,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) { ; RV32ZBA-NEXT: add a4, a5, a4 ; RV32ZBA-NEXT: mulhu a5, a0, a2 ; RV32ZBA-NEXT: add a4, a5, a4 -; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: sltu a4, a4, a5 ; RV32ZBA-NEXT: snez a5, a3 -; RV32ZBA-NEXT: snez a4, a1 -; RV32ZBA-NEXT: and a4, a4, a5 +; RV32ZBA-NEXT: snez a6, a1 +; RV32ZBA-NEXT: and a5, a6, a5 ; RV32ZBA-NEXT: mulhu a1, a1, a2 ; RV32ZBA-NEXT: snez a1, a1 -; RV32ZBA-NEXT: or a1, a4, a1 +; RV32ZBA-NEXT: or a1, a5, a1 ; RV32ZBA-NEXT: mulhu a0, a3, a0 ; RV32ZBA-NEXT: snez a0, a0 ; RV32ZBA-NEXT: or a0, a1, a0 -; RV32ZBA-NEXT: or a0, a0, a6 +; RV32ZBA-NEXT: or a0, a0, a4 ; RV32ZBA-NEXT: beqz a0, .LBB62_2 ; RV32ZBA-NEXT: # %bb.1: # %overflow ; RV32ZBA-NEXT: li a0, 0 From 75184f14aecd8147a02189a843789a4eb5e5b571 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Fri, 21 Jan 2022 13:04:32 +0700 Subject: [PATCH 133/946] [DebugInfo] Fix handling '# line "file"' for DWARFv5 `CppHashInfo.Filename` is a `StringRef` that references a part of the source file and it is not null-terminated at the end of the file name. `AsmParser::parseAndMatchAndEmitTargetInstruction()` passes it to `getStreamer().emitDwarfFileDirective()`, and it eventually comes to `isRootFile()`. The comparison fails because `FileName.data()` is not properly terminated. In addition, the old code might cause a significant speed degradation for long source files. The `operator!=()` for `std::string` and `const char *` can be implemented in a way that it finds the length of the second argument first, which slows the comparison for long data. `parseAndMatchAndEmitTargetInstruction()` calls `emitDwarfFileDirective()` every time if `CppHashInfo.Filename` is not empty. As a result, the longer the source file is, the slower the compilation wend, and for a very long file, it might take hours instead of a couple of seconds normally. Differential Revision: https://reviews.llvm.org/D117785 --- llvm/lib/MC/MCDwarf.cpp | 2 +- llvm/test/MC/ELF/debug-hash-file.s | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 1c9cfb9042e28..15cfdba5d7fde 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -561,7 +561,7 @@ Expected MCDwarfLineTable::tryGetFile(StringRef &Directory, static bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory, StringRef &FileName, Optional Checksum) { - if (RootFile.Name.empty() || RootFile.Name != FileName.data()) + if (RootFile.Name.empty() || StringRef(RootFile.Name) != FileName) return false; return RootFile.Checksum == Checksum; } diff --git a/llvm/test/MC/ELF/debug-hash-file.s b/llvm/test/MC/ELF/debug-hash-file.s index a72e5d6aa8d42..9d4cf6ae68bdc 100644 --- a/llvm/test/MC/ELF/debug-hash-file.s +++ b/llvm/test/MC/ELF/debug-hash-file.s @@ -1,5 +1,8 @@ // RUN: llvm-mc -triple x86_64-unknown-linux-gnu -filetype obj -g -dwarf-version 4 -o %t %s -// RUN: llvm-dwarfdump -debug-info -debug-line %t | FileCheck %s +// RUN: llvm-dwarfdump -debug-info -debug-line %t | FileCheck %s --check-prefixes=CHECK,DWARF4 + +// RUN: llvm-mc -triple x86_64-unknown-linux-gnu -filetype obj -g -dwarf-version 5 -o %t %s +// RUN: llvm-dwarfdump -debug-info -debug-line %t | FileCheck %s --check-prefixes=CHECK,DWARF5 // CHECK: DW_TAG_compile_unit // CHECK-NOT: DW_TAG_ @@ -8,10 +11,17 @@ // CHECK-NOT: DW_TAG_ // CHECK: DW_AT_decl_file ("/MyTest/Inputs{{(/|\\)+}}other.S") -// CHECK: include_directories[ 1] = "/MyTest/Inputs" -// CHECK: file_names[ 1]: -// CHECK-NEXT: name: "other.S" -// CHECK-NEXT: dir_index: 1 +// DWARF4: include_directories[ 1] = "/MyTest/Inputs" +// DWARF4: file_names[ 1]: +// DWARF4-NEXT: name: "other.S" +// DWARF4-NEXT: dir_index: 1 + +// DWARF5: include_directories[ 0] = +// DWARF5-NOT: include_directories[ 1] = +// DWARF5: file_names[ 0]: +// DWARF5-NEXT: name: "/MyTest/Inputs/other.S" +// DWARF5-NEXT: dir_index: 0 +// DWARF5-NOT: file_names[ 1]: # 1 "/MyTest/Inputs/other.S" From 86b08ed6bb16a96e921445299858f3cabd6f9e45 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Fri, 21 Jan 2022 13:04:44 +0700 Subject: [PATCH 134/946] [DebugInfo][NFC] Do not call 'isRootFile' for DWARF Version < 5 A quicker comparison should be done first. Differential Revision: https://reviews.llvm.org/D117786 --- llvm/lib/MC/MCDwarf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 15cfdba5d7fde..2cb5a000f88a7 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -586,7 +586,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory, trackMD5Usage(Checksum.hasValue()); HasSource = (Source != None); } - if (isRootFile(RootFile, Directory, FileName, Checksum) && DwarfVersion >= 5) + if (DwarfVersion >= 5 && isRootFile(RootFile, Directory, FileName, Checksum)) return 0; if (FileNumber == 0) { // File numbers start with 1 and/or after any file numbers From 7f0f4cab18a9e3abf8d0583c1a87e352cd5577a6 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Fri, 21 Jan 2022 06:28:42 +0000 Subject: [PATCH 135/946] [libc][NFC] Add 'struct_' prefix to type headers defining struct types. This allows header generator to generate type inclusion boiler plate in a straightforward manner. --- libc/config/linux/api.td | 4 ++-- libc/include/CMakeLists.txt | 4 ++-- libc/include/llvm-libc-types/CMakeLists.txt | 4 ++-- .../llvm-libc-types/{__sigaction.h => struct_sigaction.h} | 0 libc/include/llvm-libc-types/{tm.h => struct_tm.h} | 0 5 files changed, 6 insertions(+), 6 deletions(-) rename libc/include/llvm-libc-types/{__sigaction.h => struct_sigaction.h} (100%) rename libc/include/llvm-libc-types/{tm.h => struct_tm.h} (100%) diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index f882ec304f192..48cd1c1113485 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -24,7 +24,7 @@ def SSizeT : TypeDecl<"ssize_t"> { def StructTm: TypeDecl<"struct tm"> { let Decl = [{ - #include + #include }]; } @@ -334,7 +334,7 @@ def SysMManAPI : PublicAPI<"sys/mman.h"> { def StructSigactionDefn : TypeDecl<"struct sigaction"> { let Decl = [{ - #include + #include }]; } diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 0b62563f3170a..d3b813b87b49e 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -67,7 +67,7 @@ add_gen_header( DEPENDS .llvm_libc_common_h .llvm-libc-types.time_t - .llvm-libc-types.tm + .llvm-libc-types.struct_tm ) add_gen_header( @@ -103,7 +103,7 @@ add_gen_header( DATA_FILES ../config/${LIBC_TARGET_OS}/signal.h.in DEPENDS - .llvm-libc-types.__sigaction + .llvm-libc-types.struct_sigaction .llvm-libc-types.__sighandler_t ) diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 73c295843e2a4..3c9bed7183b5d 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -1,7 +1,6 @@ add_header(__bsearchcompare_t HDR __bsearchcompare_t.h) add_header(__call_once_func_t HDR __call_once_func_t.h) add_header(__qsortcompare_t HDR __qsortcompare_t.h) -add_header(__sigaction HDR __sigaction.h) add_header(__sighandler_t HDR __sighandler_t.h) add_header(cnd_t HDR cnd_t.h) add_header(double_t HDR double_t.h) @@ -18,7 +17,8 @@ add_header(off_t HDR off_t.h) add_header(once_flag HDR once_flag.h) add_header(size_t HDR size_t.h) add_header(ssize_t HDR ssize_t.h) +add_header(struct_sigaction HDR struct_sigaction.h) +add_header(struct_tm HDR struct_tm.h) add_header(thrd_start_t HDR thrd_start_t.h) add_header(thrd_t HDR thrd_t.h) add_header(time_t HDR time_t.h) -add_header(tm HDR tm.h) diff --git a/libc/include/llvm-libc-types/__sigaction.h b/libc/include/llvm-libc-types/struct_sigaction.h similarity index 100% rename from libc/include/llvm-libc-types/__sigaction.h rename to libc/include/llvm-libc-types/struct_sigaction.h diff --git a/libc/include/llvm-libc-types/tm.h b/libc/include/llvm-libc-types/struct_tm.h similarity index 100% rename from libc/include/llvm-libc-types/tm.h rename to libc/include/llvm-libc-types/struct_tm.h From e6de53b4de4aecca4ac892500a0907805896ed27 Mon Sep 17 00:00:00 2001 From: eopXD Date: Thu, 20 Jan 2022 02:24:10 -0800 Subject: [PATCH 136/946] [RISCV] Bump rvv-related extensions from 0.10 to 1.0 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D112987 --- clang/test/Driver/riscv-arch.c | 12 +-- .../test/Preprocessor/riscv-target-features.c | 64 ++++++++-------- llvm/lib/Support/RISCVISAInfo.cpp | 36 ++++----- llvm/test/CodeGen/RISCV/attributes.ll | 8 +- llvm/test/MC/RISCV/attribute-arch.s | 76 +++++++++---------- 5 files changed, 98 insertions(+), 98 deletions(-) diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 656abde4f75e4..36043124e4565 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -412,20 +412,20 @@ // RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1' // RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension 'v' -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v" -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p10 -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-NOFLAG %s -// RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv0p10_zvl32b0p10' +// RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv1p0_zvl32b1p0' // RV32-EXPERIMENTAL-ZVL-NOFLAG: requires '-menable-experimental-extensions' -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p1 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b0p1 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-BADVERS %s -// RV32-EXPERIMENTAL-ZVL-BADVERS: error: invalid arch name 'rv32iv0p10_zvl32b0p1' +// RV32-EXPERIMENTAL-ZVL-BADVERS: error: invalid arch name 'rv32iv1p0_zvl32b0p1' // RV32-EXPERIMENTAL-ZVL-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p10 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-GOODVERS %s // RV32-EXPERIMENTAL-ZVL-GOODVERS: "-target-feature" "+experimental-zvl32b" diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index c69285f6e3996..ec356cee7426e 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -212,12 +212,12 @@ // CHECK-ZBT-EXT: __riscv_zbt 93000{{$}} // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv32iv0p10 -x c -E -dM %s \ +// RUN: -march=rv32iv1p0 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10 -x c -E -dM %s \ +// RUN: -march=rv64iv1p0 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// CHECK-V-EXT: __riscv_v 10000{{$}} +// CHECK-V-EXT: __riscv_v 1000000{{$}} // CHECK-V-EXT: __riscv_vector 1 // RUN: %clang -target riscv32-unknown-linux-gnu \ @@ -237,107 +237,107 @@ // CHECK-ZFH-EXT: __riscv_zfh 1000000{{$}} // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-V-MINVLEN %s // CHECK-V-MINVLEN: __riscv_v_elen 64 // CHECK-V-MINVLEN: __riscv_v_elen_fp 64 // CHECK-V-MINVLEN: __riscv_v_min_vlen 128 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl256b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl256b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL256b %s // CHECK-ZVL256b: __riscv_v_min_vlen 256 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl512b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl512b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL512b %s // CHECK-ZVL512b: __riscv_v_min_vlen 512 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl1024b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl1024b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL1024b %s // CHECK-ZVL1024b: __riscv_v_min_vlen 1024 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl2048b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl2048b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL2048b %s // CHECK-ZVL2048b: __riscv_v_min_vlen 2048 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl4096b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl4096b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL4096b %s // CHECK-ZVL4096b: __riscv_v_min_vlen 4096 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl8192b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl8192b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL8192b %s // CHECK-ZVL8192b: __riscv_v_min_vlen 8192 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl16384b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl16384b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL16384b %s // CHECK-ZVL16384b: __riscv_v_min_vlen 16384 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl32768b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl32768b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL32768b %s // CHECK-ZVL32768b: __riscv_v_min_vlen 32768 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64iv0p10_zvl65536b0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64iv1p0_zvl65536b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL65536b %s // CHECK-ZVL65536b: __riscv_v_min_vlen 65536 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64ifdzve64d0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64ifdzve64d1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE64D-EXT %s // CHECK-ZVE64D-EXT: __riscv_v_elen 64 // CHECK-ZVE64D-EXT: __riscv_v_elen_fp 64 // CHECK-ZVE64D-EXT: __riscv_v_min_vlen 64 // CHECK-ZVE64D-EXT: __riscv_vector 1 -// CHECK-ZVE64D-EXT: __riscv_zve32f 10000{{$}} -// CHECK-ZVE64D-EXT: __riscv_zve32x 10000{{$}} -// CHECK-ZVE64D-EXT: __riscv_zve64d 10000{{$}} -// CHECK-ZVE64D-EXT: __riscv_zve64f 10000{{$}} -// CHECK-ZVE64D-EXT: __riscv_zve64x 10000{{$}} +// CHECK-ZVE64D-EXT: __riscv_zve32f 1000000{{$}} +// CHECK-ZVE64D-EXT: __riscv_zve32x 1000000{{$}} +// CHECK-ZVE64D-EXT: __riscv_zve64d 1000000{{$}} +// CHECK-ZVE64D-EXT: __riscv_zve64f 1000000{{$}} +// CHECK-ZVE64D-EXT: __riscv_zve64x 1000000{{$}} // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64ifzve64f0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64ifzve64f1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE64F-EXT %s // CHECK-ZVE64F-EXT: __riscv_v_elen 64 // CHECK-ZVE64F-EXT: __riscv_v_elen_fp 32 // CHECK-ZVE64F-EXT: __riscv_v_min_vlen 64 // CHECK-ZVE64F-EXT: __riscv_vector 1 -// CHECK-ZVE64F-EXT: __riscv_zve32f 10000{{$}} -// CHECK-ZVE64F-EXT: __riscv_zve32x 10000{{$}} -// CHECK-ZVE64F-EXT: __riscv_zve64f 10000{{$}} -// CHECK-ZVE64F-EXT: __riscv_zve64x 10000{{$}} +// CHECK-ZVE64F-EXT: __riscv_zve32f 1000000{{$}} +// CHECK-ZVE64F-EXT: __riscv_zve32x 1000000{{$}} +// CHECK-ZVE64F-EXT: __riscv_zve64f 1000000{{$}} +// CHECK-ZVE64F-EXT: __riscv_zve64x 1000000{{$}} // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64izve64x0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64izve64x1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE64X-EXT %s // CHECK-ZVE64X-EXT: __riscv_v_elen 64 // CHECK-ZVE64X-EXT: __riscv_v_elen_fp 0 // CHECK-ZVE64X-EXT: __riscv_v_min_vlen 64 // CHECK-ZVE64X-EXT: __riscv_vector 1 -// CHECK-ZVE64X-EXT: __riscv_zve32x 10000{{$}} -// CHECK-ZVE64X-EXT: __riscv_zve64x 10000{{$}} +// CHECK-ZVE64X-EXT: __riscv_zve32x 1000000{{$}} +// CHECK-ZVE64X-EXT: __riscv_zve64x 1000000{{$}} // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64ifzve32f0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64ifzve32f1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE32F-EXT %s // CHECK-ZVE32F-EXT: __riscv_v_elen 32 // CHECK-ZVE32F-EXT: __riscv_v_elen_fp 32 // CHECK-ZVE32F-EXT: __riscv_v_min_vlen 32 // CHECK-ZVE32F-EXT: __riscv_vector 1 -// CHECK-ZVE32F-EXT: __riscv_zve32f 10000{{$}} -// CHECK-ZVE32F-EXT: __riscv_zve32x 10000{{$}} +// CHECK-ZVE32F-EXT: __riscv_zve32f 1000000{{$}} +// CHECK-ZVE32F-EXT: __riscv_zve32x 1000000{{$}} // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ -// RUN: -march=rv64izve32x0p10 -x c -E -dM %s -o - \ +// RUN: -march=rv64izve32x1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE32X-EXT %s // CHECK-ZVE32X-EXT: __riscv_v_elen 32 // CHECK-ZVE32X-EXT: __riscv_v_elen_fp 0 // CHECK-ZVE32X-EXT: __riscv_v_min_vlen 32 // CHECK-ZVE32X-EXT: __riscv_vector 1 -// CHECK-ZVE32X-EXT: __riscv_zve32x 10000{{$}} +// CHECK-ZVE32X-EXT: __riscv_zve32x 1000000{{$}} diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 0d9b6e4fa4bb5..80fae5510326d 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -60,7 +60,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = { }; static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { - {"v", RISCVExtensionVersion{0, 10}}, + {"v", RISCVExtensionVersion{1, 0}}, {"zbe", RISCVExtensionVersion{0, 93}}, {"zbf", RISCVExtensionVersion{0, 93}}, {"zbm", RISCVExtensionVersion{0, 93}}, @@ -68,23 +68,23 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zbr", RISCVExtensionVersion{0, 93}}, {"zbt", RISCVExtensionVersion{0, 93}}, - {"zvl32b", RISCVExtensionVersion{0, 10}}, - {"zvl64b", RISCVExtensionVersion{0, 10}}, - {"zvl128b", RISCVExtensionVersion{0, 10}}, - {"zvl256b", RISCVExtensionVersion{0, 10}}, - {"zvl512b", RISCVExtensionVersion{0, 10}}, - {"zvl1024b", RISCVExtensionVersion{0, 10}}, - {"zvl2048b", RISCVExtensionVersion{0, 10}}, - {"zvl4096b", RISCVExtensionVersion{0, 10}}, - {"zvl8192b", RISCVExtensionVersion{0, 10}}, - {"zvl16384b", RISCVExtensionVersion{0, 10}}, - {"zvl32768b", RISCVExtensionVersion{0, 10}}, - {"zvl65536b", RISCVExtensionVersion{0, 10}}, - {"zve32x", RISCVExtensionVersion{0, 10}}, - {"zve32f", RISCVExtensionVersion{0, 10}}, - {"zve64x", RISCVExtensionVersion{0, 10}}, - {"zve64f", RISCVExtensionVersion{0, 10}}, - {"zve64d", RISCVExtensionVersion{0, 10}}, + {"zvl32b", RISCVExtensionVersion{1, 0}}, + {"zvl64b", RISCVExtensionVersion{1, 0}}, + {"zvl128b", RISCVExtensionVersion{1, 0}}, + {"zvl256b", RISCVExtensionVersion{1, 0}}, + {"zvl512b", RISCVExtensionVersion{1, 0}}, + {"zvl1024b", RISCVExtensionVersion{1, 0}}, + {"zvl2048b", RISCVExtensionVersion{1, 0}}, + {"zvl4096b", RISCVExtensionVersion{1, 0}}, + {"zvl8192b", RISCVExtensionVersion{1, 0}}, + {"zvl16384b", RISCVExtensionVersion{1, 0}}, + {"zvl32768b", RISCVExtensionVersion{1, 0}}, + {"zvl65536b", RISCVExtensionVersion{1, 0}}, + {"zve32x", RISCVExtensionVersion{1, 0}}, + {"zve32f", RISCVExtensionVersion{1, 0}}, + {"zve64x", RISCVExtensionVersion{1, 0}}, + {"zve64f", RISCVExtensionVersion{1, 0}}, + {"zve64d", RISCVExtensionVersion{1, 0}}, }; static bool stripExperimentalPrefix(StringRef &Ext) { diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index dd4a340edeac4..fa02d72797350 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -59,8 +59,8 @@ ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93" ; RV32ZBS: .attribute 5, "rv32i2p0_zbs1p0" ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93" -; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" -; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0" ; RV64M: .attribute 5, "rv64i2p0_m2p0" @@ -80,8 +80,8 @@ ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93" ; RV64ZBS: .attribute 5, "rv64i2p0_zbs1p0" ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93" -; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" -; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0" define i32 @addi(i32 %a) { diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 703b6a6aa8105..d95e99348e434 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -35,8 +35,8 @@ ## Experimental extensions require version string to be explicitly specified -.attribute arch, "rv32iv0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" .attribute arch, "rv32izba1p0" # CHECK: attribute 5, "rv32i2p0_zba1p0" @@ -74,59 +74,59 @@ .attribute arch, "rv32ifzfh1p0" # CHECK: attribute 5, "rv32i2p0_f2p0_zfh1p0_zfhmin1p0" -.attribute arch, "rv32iv0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl32b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl32b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl64b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl128b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl128b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl256b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl256b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl512b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl512b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl1024b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl1024b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl2048b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl2048b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl4096b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10" +.attribute arch, "rv32iv1p0zvl4096b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0" -.attribute arch, "rv32iv0p10zvl8192b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10" +.attribute arch, "rv32iv1p0zvl8192b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" -.attribute arch, "rv32iv0p10zvl16384b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10" +.attribute arch, "rv32iv1p0zvl16384b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" -.attribute arch, "rv32iv0p10zvl32768b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10" +.attribute arch, "rv32iv1p0zvl32768b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" -.attribute arch, "rv32iv0p10zvl65536b0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl65536b0p10_zvl8192b0p10" +.attribute arch, "rv32iv1p0zvl65536b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0" -.attribute arch, "rv32i_zve32x0p10" -# CHECK: attribute 5, "rv32i2p0_zve32x0p10_zvl32b0p10" +.attribute arch, "rv32i_zve32x1p0" +# CHECK: attribute 5, "rv32i2p0_zve32x1p0_zvl32b1p0" -.attribute arch, "rv32if_zve32f0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zvl32b0p10" +.attribute arch, "rv32if_zve32f1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zvl32b1p0" -.attribute arch, "rv32i_zve64x0p10" -# CHECK: attribute 5, "rv32i2p0_zve32x0p10_zve64x0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32i_zve64x1p0" +# CHECK: attribute 5, "rv32i2p0_zve32x1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32if_zve64f0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32if_zve64f1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32ifd_zve64d0p10" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10" +.attribute arch, "rv32ifd_zve64d1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" .attribute arch, "rv32i_zbkb1p0" # CHECK: attribute 5, "rv32i2p0_zbkb1p0" From a99e06aa869b44588a18a423f58e0ab30c292d8e Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Thu, 20 Jan 2022 22:27:06 -0800 Subject: [PATCH 137/946] [mlir][Linalg] Avoid generating illegal operations during elementwise fusion. In some cases, fusion can produce illegal operations if after fusion the range of some of the loops cannot be computed from shapes of its operands. Check for this case and abort the fusion if this happens. Differential Revision: https://reviews.llvm.org/D117602 --- .../Linalg/Transforms/ElementwiseOpFusion.cpp | 7 +++++ .../Linalg/fusion-elementwise-ops.mlir | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp index 33286258543e5..be34ef8bbd625 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -318,6 +318,13 @@ fuseElementwiseOpsImpl(GenericOp producer, OpOperand *consumerOpOperand, consumer.iterator_types(), /*doc=*/nullptr, /*library_call=*/nullptr); + if (!fusedOp.getShapesToLoopsMap()) { + // Fused op has invalid indexing maps. Typically this means something is off + // in the input, but going ahead here would result in verification errors. + // So cleanup and abort. + rewriter.eraseOp(fusedOp); + return llvm::None; + } // Construct an AffineMap from consumer loops to producer loops. // consumer loop -> tensor index diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir index 6ae9e15543e1c..3f68820b18cc7 100644 --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -945,3 +945,33 @@ func @no_fusion_missing_reduction_shape(%arg0: tensor, %arg1: index) -> ten } -> tensor return %8 : tensor } + +// ----- + +func @illegal_fusion(%arg0 : tensor<5000xi64>, %arg1 : tensor<5000xi32>) -> tensor<5000xi32> { + %c1_i32 = arith.constant 1 : i32 + %0 = linalg.generic { + indexing_maps = [affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + outs(%arg0 : tensor<5000xi64>) { + ^bb0(%arg3: i64): // no predecessors + %22 = linalg.index 0 : index + %23 = arith.index_cast %22 : index to i64 + linalg.yield %23 : i64 + } -> tensor<5000xi64> + %1 = linalg.init_tensor [5000] : tensor<5000xi32> + %2 = linalg.generic { + indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%0 : tensor<5000xi64>) outs(%1 : tensor<5000xi32>) { + ^bb0(%arg3: i64, %arg5: i32): // no predecessors + %22 = arith.index_cast %arg3 : i64 to index + %23 = tensor.extract %arg1[%22] : tensor<5000xi32> + linalg.yield %23 : i32 + } -> tensor<5000xi32> + return %2 : tensor<5000xi32> +} +// CHECK-LABEL: func @illegal_fusion( +// CHECK: %[[PRODUCER:.+]] = linalg.generic +// CHECK: linalg.generic +// CHECK-SAME: ins(%[[PRODUCER]] From 05cd9a0596d8d2cc4fdb1d1dfa0957968aceaf92 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 09:06:35 +0100 Subject: [PATCH 138/946] [ConstantFold] Simplify type check in reinterpret load folding (NFC) Keep a list of allowed types, but then always construct the map type the same way. We need an integer with the same width as the original type. --- llvm/lib/Analysis/ConstantFolding.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 253a7243bcf59..d42086a10ee14 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -553,23 +553,16 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, // If this isn't an integer load we can't fold it directly. if (!IntType) { - // If this is a float/double load, we can try folding it as an int32/64 load - // and then bitcast the result. This can be useful for union cases. Note + // If this is a non-integer load, we can try folding it as an int load and + // then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. - Type *MapTy; - if (LoadTy->isHalfTy()) - MapTy = Type::getInt16Ty(C->getContext()); - else if (LoadTy->isFloatTy()) - MapTy = Type::getInt32Ty(C->getContext()); - else if (LoadTy->isDoubleTy()) - MapTy = Type::getInt64Ty(C->getContext()); - else if (LoadTy->isVectorTy()) { - MapTy = PointerType::getIntNTy( - C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize()); - } else + if (!LoadTy->isHalfTy() && !LoadTy->isFloatTy() && !LoadTy->isDoubleTy() && + !LoadTy->isVectorTy()) return nullptr; + Type *MapTy = Type::getIntNTy( + C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize()); if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) { if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && !LoadTy->isX86_AMXTy()) From 6a19cb837c9b2ca14642bb0a8f1234903e4430d0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 20 Jan 2022 16:48:19 +0100 Subject: [PATCH 139/946] [ConstantFold] Support pointers in reinterpret load folding Peculiarly, the necessary code to handle pointers (including the check for non-integral address spaces) is already in place, because we were already allowing vectors of pointers here, just not plain pointers. --- llvm/lib/Analysis/ConstantFolding.cpp | 2 +- llvm/test/Transforms/InstSimplify/ConstProp/loads.ll | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index d42086a10ee14..c834ecf107915 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -558,7 +558,7 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, // that address spaces don't matter here since we're not going to result in // an actual new load. if (!LoadTy->isHalfTy() && !LoadTy->isFloatTy() && !LoadTy->isDoubleTy() && - !LoadTy->isVectorTy()) + !LoadTy->isPointerTy() && !LoadTy->isVectorTy()) return nullptr; Type *MapTy = Type::getIntNTy( diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll index 65daa324f7468..990063be0468e 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll @@ -340,9 +340,11 @@ define i32 @load_all_undef() { @g_i8_data = constant [16 x i8] c"\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" define i64* @load_ptr_from_i8_data() { -; CHECK-LABEL: @load_ptr_from_i8_data( -; CHECK-NEXT: [[V:%.*]] = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**), align 8 -; CHECK-NEXT: ret i64* [[V]] +; LE-LABEL: @load_ptr_from_i8_data( +; LE-NEXT: ret i64* inttoptr (i64 1 to i64*) +; +; BE-LABEL: @load_ptr_from_i8_data( +; BE-NEXT: ret i64* inttoptr (i64 72057594037927936 to i64*) ; %v = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**) ret i64* %v From 7950010e4983a58d19a5d8a831f4c2467c04c56d Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 21 Jan 2022 09:15:50 +0100 Subject: [PATCH 140/946] [VE][NFC] Factor out helper functions Factor out some helper functions to cleanup VEISelLowering. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D117683 --- llvm/lib/Target/VE/VECustomDAG.cpp | 30 ++++++++++++++++++++++ llvm/lib/Target/VE/VECustomDAG.h | 6 +++++ llvm/lib/Target/VE/VEISelLowering.cpp | 37 ++++----------------------- 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp index 98348c504990d..2f9976e426129 100644 --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -19,9 +19,39 @@ namespace llvm { +/// \returns the VVP_* SDNode opcode corresponsing to \p OC. +Optional getVVPOpcode(unsigned Opcode) { + switch (Opcode) { +#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \ + case ISD::VPOPC: \ + return VEISD::VVPNAME; +#define ADD_VVP_OP(VVPNAME, SDNAME) \ + case VEISD::VVPNAME: \ + case ISD::SDNAME: \ + return VEISD::VVPNAME; +#include "VVPNodes.def" + } + return None; +} + +bool isVVPBinaryOp(unsigned VVPOpcode) { + switch (VVPOpcode) { +#define ADD_BINARY_VVP_OP(VVPNAME, ...) \ + case VEISD::VVPNAME: \ + return true; +#include "VVPNodes.def" + } + return false; +} + SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget, bool IsOpaque) const { return DAG.getConstant(Val, DL, VT, IsTarget, IsOpaque); } +SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar, + SDValue AVL) const { + return getNode(VEISD::VEC_BROADCAST, ResultVT, {Scalar, AVL}); +} + } // namespace llvm diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h index 05c4c603c2fa9..e78b5dda6828c 100644 --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -21,6 +21,10 @@ namespace llvm { +Optional getVVPOpcode(unsigned Opcode); + +bool isVVPBinaryOp(unsigned Opcode); + class VECustomDAG { SelectionDAG &DAG; SDLoc DL; @@ -64,6 +68,8 @@ class VECustomDAG { SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false, bool IsOpaque = false) const; + + SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index abedee4788d9d..3ab876aa05c99 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1661,8 +1661,7 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op, MVT LegalResVT = MVT::getVectorVT(ElemVT, 256); auto AVL = CDAG.getConstant(NumEls, MVT::i32); - return CDAG.getNode(VEISD::VEC_BROADCAST, LegalResVT, - {Op.getOperand(0), AVL}); + return CDAG.getBroadcast(LegalResVT, Op.getOperand(0), AVL); } // Expand @@ -2667,21 +2666,6 @@ bool VETargetLowering::hasAndNot(SDValue Y) const { return true; } -/// \returns the VVP_* SDNode opcode corresponsing to \p OC. -static Optional getVVPOpcode(unsigned Opcode) { - switch (Opcode) { -#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \ - case ISD::VPOPC: \ - return VEISD::VVPNAME; -#define ADD_VVP_OP(VVPNAME, SDNAME) \ - case VEISD::VVPNAME: \ - case ISD::SDNAME: \ - return VEISD::VVPNAME; -#include "VVPNodes.def" - } - return None; -} - SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { // Can we represent this as a VVP node. const unsigned Opcode = Op->getOpcode(); @@ -2711,26 +2695,15 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { // Materialize the VL parameter. AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32); SDValue ConstTrue = CDAG.getConstant(1, MVT::i32); - Mask = CDAG.getNode(VEISD::VEC_BROADCAST, MaskVT, - ConstTrue); // emit a VEISD::VEC_BROADCAST here. + Mask = CDAG.getBroadcast(MaskVT, ConstTrue, AVL); } - // Categories we are interested in. - bool IsBinaryOp = false; - - switch (VVPOpcode) { -#define ADD_BINARY_VVP_OP(VVPNAME, ...) \ - case VEISD::VVPNAME: \ - IsBinaryOp = true; \ - break; -#include "VVPNodes.def" - } - - if (IsBinaryOp) { + if (isVVPBinaryOp(VVPOpcode)) { assert(LegalVecVT.isSimple()); return CDAG.getNode(VVPOpcode, LegalVecVT, {Op->getOperand(0), Op->getOperand(1), Mask, AVL}); - } else if (VVPOpcode == VEISD::VVP_SELECT) { + } + if (VVPOpcode == VEISD::VVP_SELECT) { auto Mask = Op->getOperand(0); auto OnTrue = Op->getOperand(1); auto OnFalse = Op->getOperand(2); From 3f9d1f516e19cc9548cf17ec60b73300d4ab8360 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 09:20:54 +0100 Subject: [PATCH 141/946] [InstSimplify] Add tests for reinterpret load of floats (NFC) Add tests for currently unsupported float types. --- .../InstSimplify/ConstProp/loads.ll | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll index 990063be0468e..43038bcbdbfe7 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll @@ -100,17 +100,53 @@ define double @test8() { ; i128 load. -define i128 @test9() { -; LE-LABEL: @test9( +define i128 @test_i128() { +; LE-LABEL: @test_i128( ; LE-NEXT: ret i128 2071796475790618158476296315 ; -; BE-LABEL: @test9( +; BE-LABEL: @test_i128( ; BE-NEXT: ret i128 2268949521066387161080 ; %r = load i128, i128* bitcast({i64, i64}* @g3 to i128*) ret i128 %r } +define fp128 @test_fp128() { +; CHECK-LABEL: @test_fp128( +; CHECK-NEXT: [[R:%.*]] = load fp128, fp128* bitcast ({ i64, i64 }* @g3 to fp128*), align 16 +; CHECK-NEXT: ret fp128 [[R]] +; + %r = load fp128, fp128* bitcast({i64, i64}* @g3 to fp128*) + ret fp128 %r +} + +define ppc_fp128 @test_ppc_fp128() { +; CHECK-LABEL: @test_ppc_fp128( +; CHECK-NEXT: [[R:%.*]] = load ppc_fp128, ppc_fp128* bitcast ({ i64, i64 }* @g3 to ppc_fp128*), align 16 +; CHECK-NEXT: ret ppc_fp128 [[R]] +; + %r = load ppc_fp128, ppc_fp128* bitcast({i64, i64}* @g3 to ppc_fp128*) + ret ppc_fp128 %r +} + +define x86_fp80 @test_x86_fp80() { +; CHECK-LABEL: @test_x86_fp80( +; CHECK-NEXT: [[R:%.*]] = load x86_fp80, x86_fp80* bitcast ({ i64, i64 }* @g3 to x86_fp80*), align 16 +; CHECK-NEXT: ret x86_fp80 [[R]] +; + %r = load x86_fp80, x86_fp80* bitcast({i64, i64}* @g3 to x86_fp80*) + ret x86_fp80 %r +} + +define bfloat @test_bfloat() { +; CHECK-LABEL: @test_bfloat( +; CHECK-NEXT: [[R:%.*]] = load bfloat, bfloat* bitcast ({ i64, i64 }* @g3 to bfloat*), align 2 +; CHECK-NEXT: ret bfloat [[R]] +; + %r = load bfloat, bfloat* bitcast({i64, i64}* @g3 to bfloat*) + ret bfloat %r +} + ; vector load. define <2 x i64> @test10() { ; CHECK-LABEL: @test10( From b4900296e4a51b0076bac69b31871c7a29efa90f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 09:23:41 +0100 Subject: [PATCH 142/946] [ConstantFold] Allow all float types in reinterpret load folding Rather than hardcoding just half, float and double, allow all floating point types. --- llvm/lib/Analysis/ConstantFolding.cpp | 4 +-- .../InstSimplify/ConstProp/loads.ll | 32 ++++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index c834ecf107915..38c9cc7b9df29 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -557,8 +557,8 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, // then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. - if (!LoadTy->isHalfTy() && !LoadTy->isFloatTy() && !LoadTy->isDoubleTy() && - !LoadTy->isPointerTy() && !LoadTy->isVectorTy()) + if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() && + !LoadTy->isVectorTy()) return nullptr; Type *MapTy = Type::getIntNTy( diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll index 43038bcbdbfe7..5e28f166b9ebf 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll @@ -112,36 +112,44 @@ define i128 @test_i128() { } define fp128 @test_fp128() { -; CHECK-LABEL: @test_fp128( -; CHECK-NEXT: [[R:%.*]] = load fp128, fp128* bitcast ({ i64, i64 }* @g3 to fp128*), align 16 -; CHECK-NEXT: ret fp128 [[R]] +; LE-LABEL: @test_fp128( +; LE-NEXT: ret fp128 0xL000000000000007B0000000006B1BFF8 +; +; BE-LABEL: @test_fp128( +; BE-NEXT: ret fp128 0xL0000000006B1BFF8000000000000007B ; %r = load fp128, fp128* bitcast({i64, i64}* @g3 to fp128*) ret fp128 %r } define ppc_fp128 @test_ppc_fp128() { -; CHECK-LABEL: @test_ppc_fp128( -; CHECK-NEXT: [[R:%.*]] = load ppc_fp128, ppc_fp128* bitcast ({ i64, i64 }* @g3 to ppc_fp128*), align 16 -; CHECK-NEXT: ret ppc_fp128 [[R]] +; LE-LABEL: @test_ppc_fp128( +; LE-NEXT: ret ppc_fp128 bitcast (i128 2071796475790618158476296315 to ppc_fp128) +; +; BE-LABEL: @test_ppc_fp128( +; BE-NEXT: ret ppc_fp128 bitcast (i128 2268949521066387161080 to ppc_fp128) ; %r = load ppc_fp128, ppc_fp128* bitcast({i64, i64}* @g3 to ppc_fp128*) ret ppc_fp128 %r } define x86_fp80 @test_x86_fp80() { -; CHECK-LABEL: @test_x86_fp80( -; CHECK-NEXT: [[R:%.*]] = load x86_fp80, x86_fp80* bitcast ({ i64, i64 }* @g3 to x86_fp80*), align 16 -; CHECK-NEXT: ret x86_fp80 [[R]] +; LE-LABEL: @test_x86_fp80( +; LE-NEXT: ret x86_fp80 0xKFFFF000000000000007B +; +; BE-LABEL: @test_x86_fp80( +; BE-NEXT: ret x86_fp80 0xK000000000000007B0000 ; %r = load x86_fp80, x86_fp80* bitcast({i64, i64}* @g3 to x86_fp80*) ret x86_fp80 %r } define bfloat @test_bfloat() { -; CHECK-LABEL: @test_bfloat( -; CHECK-NEXT: [[R:%.*]] = load bfloat, bfloat* bitcast ({ i64, i64 }* @g3 to bfloat*), align 2 -; CHECK-NEXT: ret bfloat [[R]] +; LE-LABEL: @test_bfloat( +; LE-NEXT: ret bfloat 0xR007B +; +; BE-LABEL: @test_bfloat( +; BE-NEXT: ret bfloat 0xR0000 ; %r = load bfloat, bfloat* bitcast({i64, i64}* @g3 to bfloat*) ret bfloat %r From 99b5a8049be49563bd7541dd4ea93ad2f6516299 Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Fri, 21 Jan 2022 09:49:55 +0100 Subject: [PATCH 143/946] Match bazel config with cmake after f29256a64 --- utils/bazel/llvm_configs/llvm-config.h.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/utils/bazel/llvm_configs/llvm-config.h.cmake b/utils/bazel/llvm_configs/llvm-config.h.cmake index a5edc2084a8a5..ec18b40fe04d9 100644 --- a/utils/bazel/llvm_configs/llvm-config.h.cmake +++ b/utils/bazel/llvm_configs/llvm-config.h.cmake @@ -91,9 +91,6 @@ /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */ #cmakedefine LLVM_HAVE_TF_API -/* Define if LLVM was built with a dependency to the tensorflow compiler */ -#cmakedefine LLVM_HAVE_TF_AOT - /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYSEXITS_H ${HAVE_SYSEXITS_H} From 69825f369302184aecb1ee53d9224e49ba15d9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 21 Jan 2022 09:56:26 +0100 Subject: [PATCH 144/946] [fir] Add array operations documentation This patch adds documentation on FIR array operations and their usage. Reviewed By: schweitz Differential Revision: https://reviews.llvm.org/D115077 --- flang/docs/FIRArrayOperations.md | 342 +++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 flang/docs/FIRArrayOperations.md diff --git a/flang/docs/FIRArrayOperations.md b/flang/docs/FIRArrayOperations.md new file mode 100644 index 0000000000000..822bafca84eb5 --- /dev/null +++ b/flang/docs/FIRArrayOperations.md @@ -0,0 +1,342 @@ + + +# Design: FIR Array operations + +```eval_rst +.. contents:: + :local: +``` + +## General + +The array operations in FIR model the copy-in/copy-out semantics over Fortran +statements. + +Fortran language semantics sometimes require the compiler to make a temporary +copy of an array or array slice. Situations where this can occur include: + +* Passing a non-contiguous array to a procedure that does not declare it as + assumed-shape. +* Array expressions, especially those involving `RESHAPE`, `PACK`, and `MERGE`. +* Assignments of arrays where the array appears on both the left and right-hand + sides of the assignment. +* Assignments of `POINTER` arrays. + +There are currently the following operations: +- `fir.array_load` +- `fir.array_merge_store` +- `fir.array_fetch` +- `fir.array_update` +- `fir.array_access` +- `fir.array_amend` + +`array_load`(s) and `array_merge_store` are a pairing that brackets the lifetime +of the array copies. + +`array_fetch` and `array_update` are defined to work as getter/setter pairs on +values of elements from loaded array copies. These have "GEP-like" syntax and +semantics. + +Fortran arrays are implicitly memory bound as are some other Fortran type/kind +entities. For entities that can be atomically promoted to the value domain, +we use `array_fetch` and `array_update`. + +`array_access` and `array_amend` are defined to work as getter/setter pairs on +references to elements in loaded array copies. `array_access` has "GEP-like" +syntax. `array_amend` annotates which loaded array copy is being written to. +It is invalid to update an array copy without `array_amend`; doing so will +result in undefined behavior. +For those type/kinds that cannot be promoted to values, we must leave them in a +memory reference domain, and we use `array_access` and `array_amend`. + +## array_load + +This operation taken with `array_merge_store` captures Fortran's +copy-in/copy-out semantics. One way to think of this is that array_load +creates a snapshot copy of the entire array. This copy can then be used +as the "original value" of the array while the array's new value is +computed. The `array_merge_store` operation is the copy-out semantics, which +merge the updates with the original array value to produce the final array +result. This abstracts the copy operations as opposed to always creating +copies or requiring dependence analysis be performed on the syntax trees +and before lowering to the IR. + +Load an entire array as a single SSA value. + +```fortran + real :: a(o:n,p:m) + ... + ... = ... a ... +``` + +One can use `fir.array_load` to produce an ssa-value that captures an +immutable value of the entire array `a`, as in the Fortran array expression +shown above. Subsequent changes to the memory containing the array do not +alter its composite value. This operation lets one load an array as a +value while applying a runtime shape, shift, or slice to the memory +reference, and its semantics guarantee immutability. + +```mlir +%s = fir.shape_shift %lb1, %ex1, %lb2, %ex2 : (index, index, index, index) -> !fir.shape<2> +// load the entire array 'a' +%v = fir.array_load %a(%s) : (!fir.ref>, !fir.shape<2>) -> !fir.array +// a fir.store here into array %a does not change %v +``` + +# array_merge_store + +The `array_merge_store` operation stores a merged array value to memory. + + +```fortran + real :: a(n,m) + ... + a = ... +``` + +One can use `fir.array_merge_store` to merge/copy the value of `a` in an +array expression as shown above. + +```mlir + %v = fir.array_load %a(%shape) : ... + %r = fir.array_update %v, %f, %i, %j : (!fir.array, f32, index, index) -> !fir.array + fir.array_merge_store %v, %r to %a : !fir.ref> +``` + +This operation merges the original loaded array value, `%v`, with the +chained updates, `%r`, and stores the result to the array at address, `%a`. + +This operation taken with `array_load`'s captures Fortran's +copy-in/copy-out semantics. The first operands of `array_merge_store` is the +result of the initial `array_load` operation. While this value could be +retrieved by reference chasiing through the different array operations it is +useful to have it on hand directly for analysis passes since this directly +defines the "bounds" of the Fortran statement represented by these operations. +The intention is to allow copy-in/copy-out regions to be easily delineated, +analyzed, and optimized. + +## array_fetch + +The `array_fetch` operation fetches the value of an element in an array value. + +```fortran + real :: a(n,m) + ... + ... a ... + ... a(r,s+1) ... +``` + +One can use `fir.array_fetch` to fetch the (implied) value of `a(i,j)` in +an array expression as shown above. It can also be used to extract the +element `a(r,s+1)` in the second expression. + +```mlir + %s = fir.shape %n, %m : (index, index) -> !fir.shape<2> + // load the entire array 'a' + %v = fir.array_load %a(%s) : (!fir.ref>, !fir.shape<2>) -> !fir.array + // fetch the value of one of the array value's elements + %1 = fir.array_fetch %v, %i, %j : (!fir.array, index, index) -> f32 +``` + +It is only possible to use `array_fetch` on an `array_load` result value or a +value that can be trace back transitively to an `array_load` as the dominating +source. Other array operation such as `array_update` can be in between. + +## array_update + +The `array_update` operation is used to update the value of an element in an +array value. A new array value is returned where all element values of the input +array are identical except for the selected element which is the value passed in +the update. + +```fortran + real :: a(n,m) + ... + a = ... +``` + +One can use `fir.array_update` to update the (implied) value of `a(i,j)` +in an array expression as shown above. + +```mlir + %s = fir.shape %n, %m : (index, index) -> !fir.shape<2> + // load the entire array 'a' + %v = fir.array_load %a(%s) : (!fir.ref>, !fir.shape<2>) -> !fir.array + // update the value of one of the array value's elements + // %r_{ij} = %f if (i,j) = (%i,%j), %v_{ij} otherwise + %r = fir.array_update %v, %f, %i, %j : (!fir.array, f32, index, index) -> !fir.array + fir.array_merge_store %v, %r to %a : !fir.ref> +``` + +An array value update behaves as if a mapping function from the indices +to the new value has been added, replacing the previous mapping. These +mappings can be added to the ssa-value, but will not be materialized in +memory until the `fir.array_merge_store` is performed. +`fir.array_update` can be seen as an array access with a notion that the array +will be changed at the accessed position when `fir.array_merge_store` is +performed. + +## array_access + +The `array_access` provides a reference to a single element from an array value. +This is *not* a view in the immutable array, otherwise it couldn't be stored to. +It can be see as a logical copy of the element and its position in the array. +Tis reference can be written to and modified withoiut changing the original +array. + +The `array_access` operation is used to fetch the memory reference of an element +in an array value. + +```fortran + real :: a(n,m) + ... + ... a ... + ... a(r,s+1) ... +``` + +One can use `fir.array_access` to recover the implied memory reference to +the element `a(i,j)` in an array expression `a` as shown above. It can also +be used to recover the reference element `a(r,s+1)` in the second +expression. + +```mlir + %s = fir.shape %n, %m : (index, index) -> !fir.shape<2> + // load the entire array 'a' + %v = fir.array_load %a(%s) : (!fir.ref>, !fir.shape<2>) -> !fir.array + // fetch the value of one of the array value's elements + %1 = fir.array_access %v, %i, %j : (!fir.array, index, index) -> !fir.ref +``` + +It is only possible to use `array_access` on an `array_load` result value or a +value that can be trace back transitively to an `array_load` as the dominating +source. Other array operation such as `array_amend` can be in between. + +`array_access` if mainly used with `character`'s arrays and arrays of derived +types where because they might have a non-compile time sizes that would be +useless too load entirely or too big to load. + +Here is a simple example with a `character` array assignment. + +Fortran +``` +subroutine foo(c1, c2, n) + integer(8) :: n + character(n) :: c1(:), c2(:) + c1 = c2 +end subroutine +``` + +It results in this cleaned-up FIR: +``` +func @_QPfoo(%arg0: !fir.box>>, %arg1: !fir.box>>, %arg2: !fir.ref) { + %0 = fir.load %arg2 : !fir.ref + %c0 = arith.constant 0 : index + %1:3 = fir.box_dims %arg0, %c0 : (!fir.box>>, index) -> (index, index, index) + %2 = fir.array_load %arg0 : (!fir.box>>) -> !fir.array> + %3 = fir.array_load %arg1 : (!fir.box>>) -> !fir.array> + %c1 = arith.constant 1 : index + %4 = arith.subi %1#1, %c1 : index + %5 = fir.do_loop %arg3 = %c0 to %4 step %c1 unordered iter_args(%arg4 = %2) -> (!fir.array>) { + %6 = fir.array_access %3, %arg3 : (!fir.array>, index) -> !fir.ref> + %7 = fir.array_access %arg4, %arg3 : (!fir.array>, index) -> !fir.ref> + %false = arith.constant false + %8 = fir.convert %7 : (!fir.ref>) -> !fir.ref + %9 = fir.convert %6 : (!fir.ref>) -> !fir.ref + fir.call @llvm.memmove.p0i8.p0i8.i64(%8, %9, %0, %false) : (!fir.ref, !fir.ref, i64, i1) -> () + %10 = fir.array_amend %arg4, %7 : (!fir.array>, !fir.ref>) -> !fir.array> + fir.result %10 : !fir.array> + } + fir.array_merge_store %2, %5 to %arg0 : !fir.array>, !fir.array>, !fir.box>> + return + } + func private @llvm.memmove.p0i8.p0i8.i64(!fir.ref, !fir.ref, i64, i1) +} +``` + +`fir.array_access` and `fir.array_amend` split the two purposes of +`fir.array_update` into two distinct operations to work on type/kind that must +reside in the memory reference domain. `fir.array_access` captures the array +access semantics and `fir.array_amend` denotes which `fir.array_access` is the +lhs. + +We do not want to start loading the entire `!fir.ref>` here since +it has dynamic length, and even if constant, could be too long to do so. + +## array_amend + +The `array_amend` operation marks an array value as having been changed via a +reference obtain by an `array_access`. It acts as a logical transaction log +that is used to merge the final result back with an `array_merge_store` +operation. + +```mlir + // fetch the value of one of the array value's elements + %1 = fir.array_access %v, %i, %j : (!fir.array, index, index) -> !fir.ref + // modify the element by storing data using %1 as a reference + %2 = ... %1 ... + // mark the array value + %new_v = fir.array_amend %v, %2 : (!fir.array, !fir.ref) -> !fir.array +``` + +## Example + +Here is an example of a FIR code using several array operations together. The +example below is a simplified version of the FIR code comiing from the +following Fortran code snippet. + +```fortran +subroutine s(a,l,u) + type t + integer m + end type t + type(t) :: a(:) + integer :: l, u + forall (i=l:u) + a(i) = a(u-i+1) + end forall +end +``` + +``` +func @_QPs(%arg0: !fir.box>>, %arg1: !fir.ref, %arg2: !fir.ref) { + %l = fir.load %arg1 : !fir.ref + %l_index = fir.convert %l : (i32) -> index + %u = fir.load %arg2 : !fir.ref + %u_index = fir.convert %u : (i32) -> index + %c1 = arith.constant 1 : index + // This is the "copy-in" array used on the RHS of the expression. It will be indexed into and loaded at each iteration. + %array_a_src = fir.array_load %arg0 : (!fir.box>>) -> !fir.array> + + // This is the "seed" for the "copy-out" array on the LHS. It'll flow from iteration to iteration and gets + // updated at each iteration. + %array_a_dest_init = fir.array_load %arg0 : (!fir.box>>) -> !fir.array> + + %array_a_final = fir.do_loop %i = %l_index to %u_index step %c1 unordered iter_args(%array_a_dest = %array_a_dest_init) -> (!fir.array>) { + // Compute indexing for the RHS and array the element. + %u_minus_i = arith.subi %u_index, %i : index // u-i + %u_minus_i_plus_one = arith.addi %u_minus_i, %c1: index // u-i+1 + %a_src_ref = fir.array_access %array_a_src, %u_minus_i_plus_one {Fortran.offsets} : (!fir.array>, index) -> !fir.ref> + %a_src_elt = fir.load %a_src_ref : !fir.ref> + + // Get the reference to the element in the array on the LHS + %a_dst_ref = fir.array_access %array_a_dest, %i {Fortran.offsets} : (!fir.array>, index) -> !fir.ref> + + // Store the value, and update the array + fir.store %a_src_elt to %a_dst_ref : !fir.ref> + %updated_array_a = fir.array_amend %array_a_dest, %a_dst_ref : (!fir.array>, !fir.ref>) -> !fir.array> + + // Forward the current updated array to the next iteration. + fir.result %updated_array_a : !fir.array> + } + // Store back the result by merging the initial value loaded before the loop + // with the final one produced by the loop. + fir.array_merge_store %array_a_dest_init, %array_a_final to %arg0 : !fir.array>, !fir.array>, !fir.box>> + return +} +``` From d03c5bc8d437e47ae424ac9611ae441cd5225526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Fri, 21 Jan 2022 10:03:48 +0100 Subject: [PATCH 145/946] [mlir] Fully qualify return types in OpAsmInterface.td and FunctionInterfaces.td --- mlir/include/mlir/IR/FunctionInterfaces.td | 6 +++--- mlir/include/mlir/IR/OpAsmInterface.td | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/IR/FunctionInterfaces.td b/mlir/include/mlir/IR/FunctionInterfaces.td index b6993ff6ffc76..20c7d7bbd51b7 100644 --- a/mlir/include/mlir/IR/FunctionInterfaces.td +++ b/mlir/include/mlir/IR/FunctionInterfaces.td @@ -55,13 +55,13 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> { the type (to allow for this method may be called on function declarations). }], - "ArrayRef", "getArgumentTypes">, + "::llvm::ArrayRef<::mlir::Type>", "getArgumentTypes">, InterfaceMethod<[{ Returns the function result types based exclusively on the type (to allow for this method may be called on function declarations). }], - "ArrayRef", "getResultTypes">, + "::llvm::ArrayRef<::mlir::Type>", "getResultTypes">, InterfaceMethod<[{ Returns a clone of the function type with the given argument and result types. @@ -70,7 +70,7 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> { an appropriate clone method: `Type clone(ArrayRef inputs, ArrayRef results)` }], - "Type", "cloneTypeWith", (ins + "::mlir::Type", "cloneTypeWith", (ins "::mlir::TypeRange":$inputs, "::mlir::TypeRange":$results ), /*methodBody=*/[{}], /*defaultImplementation=*/[{ return $_op.getType().clone(inputs, results); diff --git a/mlir/include/mlir/IR/OpAsmInterface.td b/mlir/include/mlir/IR/OpAsmInterface.td index b49e12ea9a85e..c13e59fb1b466 100644 --- a/mlir/include/mlir/IR/OpAsmInterface.td +++ b/mlir/include/mlir/IR/OpAsmInterface.td @@ -70,7 +70,7 @@ def OpAsmOpInterface : OpInterface<"OpAsmOpInterface"> { returned `spv`. The default implementation returns an empty string which is ignored. }], - "StringRef", "getDefaultDialect", (ins), "", "return \"\";" + "::llvm::StringRef", "getDefaultDialect", (ins), "", "return \"\";" >, ]; } From a2f6921ef2a1564a52aa3ecd7e30697250ccaf2e Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 20 Jan 2022 11:08:24 +0100 Subject: [PATCH 146/946] [llvm] Remove unused headers in LLVMDemangle As an hint to the impact of the cleanup, running clang++ -E -Iinclude -I../llvm/include ../llvm/lib/Demangle/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l before: 208053 lines after: 203965 lines --- llvm/include/llvm/Demangle/MicrosoftDemangle.h | 2 -- llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 1 - llvm/lib/Demangle/ItaniumDemangle.cpp | 2 -- llvm/lib/Demangle/MicrosoftDemangleNodes.cpp | 1 - 4 files changed, 6 deletions(-) diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h index 0403136616019..6f2d0416901ec 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h @@ -9,10 +9,8 @@ #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H #define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H -#include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/MicrosoftDemangleNodes.h" #include "llvm/Demangle/StringView.h" -#include "llvm/Demangle/Utility.h" #include diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h index 1455dad3d1b2b..8ad2472364b4c 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -13,7 +13,6 @@ #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H #define LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H -#include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/StringView.h" #include #include diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp index 3f68f76761ce0..1a5db755e37b5 100644 --- a/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -19,9 +19,7 @@ #include #include #include -#include #include -#include using namespace llvm; using namespace llvm::itanium_demangle; diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp index 32d8dff66c3f8..d07d05a08c556 100644 --- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Demangle/MicrosoftDemangleNodes.h" -#include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/Utility.h" #include #include From c0cf209076a29076ebf43c59dff3cc3c8400e4d7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 21 Jan 2022 09:34:38 +0000 Subject: [PATCH 147/946] [VPlan] Add VPWidenIntOrFpInductionRecipe::isCanonical, use it (NFCI). This patch adds VPWidenIntOrFpInductionRecipe::isCanonical to check if an induction recipe is canonical. The code is also updated to use it instead of isCanonicalID. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D117551 --- .../Transforms/Vectorize/LoopVectorize.cpp | 24 +++++++------------ llvm/lib/Transforms/Vectorize/VPlan.cpp | 6 +++++ llvm/lib/Transforms/Vectorize/VPlan.h | 5 ++++ llvm/lib/Transforms/Vectorize/VPlanValue.h | 6 +++++ 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 56b8bd8564f2a..d186ae59a74a2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -510,8 +510,7 @@ class InnerLoopVectorizer { /// is provided, the integer induction variable will first be truncated to /// the corresponding type. \p CanonicalIV is the scalar value generated for /// the canonical induction variable. - void widenIntOrFpInduction(PHINode *IV, const InductionDescriptor &ID, - Value *Start, TruncInst *Trunc, VPValue *Def, + void widenIntOrFpInduction(PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, Value *CanonicalIV); /// Construct the vector value of a scalarized value \p V one lane at a time. @@ -2478,17 +2477,12 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { return llvm::any_of(IV->users(), isScalarInst); } -/// Returns true if \p ID starts at 0 and has a step of 1. -static bool isCanonicalID(const InductionDescriptor &ID) { - if (!ID.getConstIntStepValue() || !ID.getConstIntStepValue()->isOne()) - return false; - auto *StartC = dyn_cast(ID.getStartValue()); - return StartC && StartC->isZero(); -} - void InnerLoopVectorizer::widenIntOrFpInduction( - PHINode *IV, const InductionDescriptor &ID, Value *Start, TruncInst *Trunc, - VPValue *Def, VPTransformState &State, Value *CanonicalIV) { + PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State, + Value *CanonicalIV) { + Value *Start = Def->getStartValue()->getLiveInIRValue(); + const InductionDescriptor &ID = Def->getInductionDescriptor(); + TruncInst *Trunc = Def->getTruncInst(); IRBuilder<> &Builder = State.Builder; assert(IV->getType() == ID.getStartValue()->getType() && "Types must match"); assert(!State.VF.isZero() && "VF must be non-zero"); @@ -2519,7 +2513,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction( auto CreateScalarIV = [&](Value *&Step) -> Value * { Value *ScalarIV = CanonicalIV; Type *NeededType = IV->getType(); - if (!isCanonicalID(ID) || ScalarIV->getType() != NeededType) { + if (!Def->isCanonical() || ScalarIV->getType() != NeededType) { ScalarIV = NeededType->isIntegerTy() ? Builder.CreateSExtOrTrunc(ScalarIV, NeededType) @@ -9702,9 +9696,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0); - State.ILV->widenIntOrFpInduction(IV, getInductionDescriptor(), - getStartValue()->getLiveInIRValue(), - getTruncInst(), this, State, CanonicalIV); + State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV); } void VPWidenPHIRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 04c8e399c5438..0ec2361390590 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1263,6 +1263,12 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, O << " " << VPlanIngredient(IV); } +bool VPWidenIntOrFpInductionRecipe::isCanonical() const { + auto *StartC = dyn_cast(getStartValue()->getLiveInIRValue()); + auto *StepC = dyn_cast(getInductionDescriptor().getStep()); + return StartC && StartC->isZero() && StepC && StepC->isOne(); +} + void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-GEP "; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c7946b6d2adef..10d5c1b3409a5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1059,6 +1059,7 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { /// Returns the start value of the induction. VPValue *getStartValue() { return getOperand(0); } + const VPValue *getStartValue() const { return getOperand(0); } /// Returns the first defined value as TruncInst, if it is one or nullptr /// otherwise. @@ -1071,6 +1072,10 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { /// Returns the induction descriptor for the recipe. const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } + + /// Returns true if the induction is canonical, i.e. starting at 0 and + /// incremented by UF * VF (= the original IV is incremented by 1). + bool isCanonical() const; }; /// A pure virtual base class for all recipes modeling header phis, including diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 2df547e960ba9..5296d2b9485cc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -178,6 +178,7 @@ class VPValue { void replaceAllUsesWith(VPValue *New); VPDef *getDef() { return Def; } + const VPDef *getDef() const { return Def; } /// Returns the underlying IR value, if this VPValue is defined outside the /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef @@ -187,6 +188,11 @@ class VPValue { "VPValue is not a live-in; it is defined by a VPDef inside a VPlan"); return getUnderlyingValue(); } + const Value *getLiveInIRValue() const { + assert(!getDef() && + "VPValue is not a live-in; it is defined by a VPDef inside a VPlan"); + return getUnderlyingValue(); + } }; typedef DenseMap Value2VPValueTy; From 55689904d2e5afcc5309f7234d6369307ee305d0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 21 Jan 2022 09:44:31 +0000 Subject: [PATCH 148/946] [VPlan] Move ::isCanonical outside ifdef. This fixes a build failure with assertions disabled. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0ec2361390590..a96c122db2a97 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1262,6 +1262,7 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, } else O << " " << VPlanIngredient(IV); } +#endif bool VPWidenIntOrFpInductionRecipe::isCanonical() const { auto *StartC = dyn_cast(getStartValue()->getLiveInIRValue()); @@ -1269,6 +1270,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const { return StartC && StartC->isZero() && StepC && StepC->isOne(); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-GEP "; From 1f9e18b6565fd1bb69c4b649b9efd3467b3c7c7d Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 20 Jan 2022 11:21:47 +0100 Subject: [PATCH 149/946] [llvm] Remove (some) LLVMDemangle header dependencies - Avoid using for std::end on a plain array (using instead) - Avoid using for std::min and std::equal (using alternate logic and std::strcmp instead) As an hint to the impact of the cleanup, running clang++ -E -Iinclude -I../llvm/include ../llvm/lib/Demangle/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l before: 203965 lines after: 169704 lines --- llvm/include/llvm/Demangle/ItaniumDemangle.h | 3 ++- llvm/include/llvm/Demangle/StringView.h | 14 +++++++------- llvm/include/llvm/Demangle/Utility.h | 9 +++++---- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index b25139d8a72ba..01f414a7257bf 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -21,12 +21,13 @@ #include "DemangleConfig.h" #include "StringView.h" #include "Utility.h" +#include #include #include #include #include #include -#include +#include #include #define FOR_EACH_NODE_KIND(X) \ diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h index 1e4d3803f06cd..7c8cb482ae1c1 100644 --- a/llvm/include/llvm/Demangle/StringView.h +++ b/llvm/include/llvm/Demangle/StringView.h @@ -14,7 +14,6 @@ #define DEMANGLE_STRINGVIEW_H #include "DemangleConfig.h" -#include #include #include @@ -38,15 +37,16 @@ class StringView { StringView substr(size_t Pos, size_t Len = npos) const { assert(Pos <= size()); - return StringView(begin() + Pos, std::min(Len, size() - Pos)); + if (Len > size() - Pos) + Len = size() - Pos; + return StringView(begin() + Pos, Len); } size_t find(char C, size_t From = 0) const { - size_t FindBegin = std::min(From, size()); // Avoid calling memchr with nullptr. - if (FindBegin < size()) { + if (From < size()) { // Just forward to memchr, which is faster than a hand-rolled loop. - if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin)) + if (const void *P = ::memchr(First + From, C, size() - From)) return size_t(static_cast(P) - First); } return npos; @@ -98,7 +98,7 @@ class StringView { bool startsWith(StringView Str) const { if (Str.size() > size()) return false; - return std::equal(Str.begin(), Str.end(), begin()); + return std::strncmp(Str.begin(), begin(), Str.size()) == 0; } const char &operator[](size_t Idx) const { return *(begin() + Idx); } @@ -111,7 +111,7 @@ class StringView { inline bool operator==(const StringView &LHS, const StringView &RHS) { return LHS.size() == RHS.size() && - std::equal(LHS.begin(), LHS.end(), RHS.begin()); + std::strncmp(LHS.begin(), RHS.begin(), LHS.size()) == 0; } DEMANGLE_NAMESPACE_END diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index 733d83ad1b6ba..b816aa22c24e0 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -14,10 +14,11 @@ #define DEMANGLE_UTILITY_H #include "StringView.h" +#include #include #include #include -#include +#include #include DEMANGLE_NAMESPACE_BEGIN @@ -48,8 +49,8 @@ class OutputBuffer { return; } - char Temp[21]; - char *TempPtr = std::end(Temp); + std::array Temp; + char *TempPtr = Temp.end(); while (N) { *--TempPtr = char('0' + N % 10); @@ -59,7 +60,7 @@ class OutputBuffer { // Add negative sign... if (isNeg) *--TempPtr = '-'; - this->operator<<(StringView(TempPtr, std::end(Temp))); + this->operator<<(StringView(TempPtr, Temp.end())); } public: From e7762653d3b071dfb86d4e2d3bcf7c1455683d37 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 11:19:54 +0100 Subject: [PATCH 150/946] [Attributor] Avoid some pointer element type accesses --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 0723402c19ee0..d0e13dc269385 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1216,7 +1216,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { } UsrOI.Offset = PtrOI.Offset + DL.getIndexedOffsetInType( - CurPtr->getType()->getPointerElementType(), Indices); + GEP->getSourceElementType(), Indices); Follow = true; return true; } @@ -6650,9 +6650,10 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { IRBuilder IRB(IP); const DataLayout &DL = IP->getModule()->getDataLayout(); - if (Base->getType()->getPointerElementType() != PrivType) - Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(), - "", ACS.getInstruction()); + Type *PrivPtrType = PrivType->getPointerTo(); + if (Base->getType() != PrivPtrType) + Base = BitCastInst::CreateBitOrPointerCast(Base, PrivPtrType, "", + ACS.getInstruction()); // Traverse the type, build GEPs and loads. if (auto *PrivStructType = dyn_cast(PrivType)) { @@ -6794,7 +6795,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { if (auto *AI = dyn_cast(Obj)) if (auto *CI = dyn_cast(AI->getArraySize())) if (CI->isOne()) - return Obj->getType()->getPointerElementType(); + return AI->getAllocatedType(); if (auto *Arg = dyn_cast(Obj)) { auto &PrivArgAA = A.getAAFor( *this, IRPosition::argument(*Arg), DepClassTy::REQUIRED); From 065044c443f4041f32e0a8d6e633f9d92580fbca Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 11:56:32 +0100 Subject: [PATCH 151/946] Fix 1f9e18b6565fd1bb69c4b649b9efd3467b3c7c7d Don't assume iterator on std::array are char*, use .data() instead --- llvm/include/llvm/Demangle/Utility.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index b816aa22c24e0..989b41701e4c9 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -50,7 +50,7 @@ class OutputBuffer { } std::array Temp; - char *TempPtr = Temp.end(); + char *TempPtr = Temp.data() + Temp.size(); while (N) { *--TempPtr = char('0' + N % 10); From 329feeb938ac63602136bcb3c5ec3a64109be94c Mon Sep 17 00:00:00 2001 From: Sameer Rahmani Date: Fri, 21 Jan 2022 21:27:55 +1100 Subject: [PATCH 152/946] [ORC][docs] Describe removing JITDylibs, using custom program representations. Add documentation around: * Removing JITDylib from the session * Add support for custom program representation Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D116476 --- llvm/docs/ORCv2.rst | 92 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/llvm/docs/ORCv2.rst b/llvm/docs/ORCv2.rst index ec372f575b195..8012820d60e52 100644 --- a/llvm/docs/ORCv2.rst +++ b/llvm/docs/ORCv2.rst @@ -579,6 +579,98 @@ calling the ``ExecutionSession::createJITDylib`` method with a unique name: The JITDylib is owned by the ``ExecutionEngine`` instance and will be freed when it is destroyed. +How to remove a JITDylib +------------------------ +JITDylibs can be removed completely by calling ``ExecutionSession::removeJITDylib``. +Calling that function will close the give JITDylib and clear all the resources held for +it. No code can be added to a closed JITDylib. + +Please note that closing a JITDylib won't update any pointers, you are responsible for +ensuring that any code/data contained in the JITDylib is no longer in use. + +Also You can use a custom resource tracker to remove individual modules from a JITDylib. + +How to add the support for custom program representation +-------------------------------------------------------- +In order to add the support for a custom program representation, a custom ``MaterializationUnit`` +for the program representation, and a custom ``Layer`` are needed. The Layer will have two +operations: ``add`` and ``emit``. The ``add`` operation takes an instance of your program +representation, builds one of your custom ``MaterializationUnits`` to hold it, then adds it +to a ``JITDylib``. The emit operation takes a ``MaterializationResponsibility`` object and an +instance of your program representation and materializes it, usually by compiling it and handing +the resulting object off to an ``ObjectLinkingLayer``. + +Your custom ``MaterializationUnit`` will have two operations: ``materialize`` and ``discard``. The +``materialize`` function will be called for you when any symbol provided by the unit is looked up, +and it should just call the ``emit`` function on your layer, passing in the given +``MaterializationResponsibility`` and the wrapped program representation. The ``discard`` function +will be called if some weak symbol provided by your unit is not needed (because the JIT found an +overriding definition). You can use this to drop your definition early, or just ignore it and let +the linker drops the definition later. + +Here is an example of an ASTLayer: + + .. code-block:: c++ + + // ... In you JIT class + AstLayer astLayer; + // ... + + + class AstMaterializationUnit : public orc::MaterializationUnit { + public: + AstMaterializationUnit(AstLayer &l, Ast &ast) + : llvm::orc::MaterializationUnit(l.getInterface(ast)), astLayer(l), + ast(ast) {}; + + llvm::StringRef getName() const override { + return "AstMaterializationUnit"; + } + + void materialize(std::unique_ptr r) override { + astLayer.emit(std::move(r), ast); + }; + + private: + void discard(const llvm::orc::JITDylib &jd, const llvm::orc::SymbolStringPtr &sym) override { + llvm_unreachable("functions are not overridable"); + } + + + AstLayer &astLayer; + Ast * + }; + + class AstLayer { + llvhm::orc::IRLayer &baseLayer; + llvhm::orc::MangleAndInterner &mangler; + + public: + AstLayer(llvm::orc::IRLayer &baseLayer, llvm::orc::MangleAndInterner &mangler) + : baseLayer(baseLayer), mangler(mangler){}; + + llvm::Error add(llvm::orc::ResourceTrackerSP &rt, Ast &ast) { + return rt->getJITDylib().define(std::make_unique(*this, ast), rt); + } + + void emit(std::unique_ptr mr, Ast &ast) { + // compileAst is just function that compiles the given AST and returns + // a `llvm::orc::ThreadSafeModule` + baseLayer.emit(std::move(mr), compileAst(ast)); + } + + llvm::orc::MaterializationUnit::Interface getInterface(Ast &ast) { + SymbolFlagsMap Symbols; + // Find all the symbols in the AST and for each of them + // add it to the Symbols map. + Symbols[mangler(someNameFromAST)] = + JITSymbolFlags(JITSymbolFlags::Exported | JITSymbolFlags::Callable); + return MaterializationUnit::Interface(std::move(Symbols), nullptr); + } + }; + +Take look at the source code of `Building A JIT's Chapter 4 `_ for a complete example. + How to use ThreadSafeModule and ThreadSafeContext ------------------------------------------------- From b351ac3873db15b16c2aa6d1e0e08ff9fab44f1f Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Mon, 20 Dec 2021 14:25:16 +0100 Subject: [PATCH 153/946] [AMDGPU][NFC] Regenerate InstCombine test --- .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 306 +++++++++--------- 1 file changed, 153 insertions(+), 153 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index b9fba55e5026b..a6ddcdd0a4a0e 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind { define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp { ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) [[ATTR11:#.*]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR12:[0-9]+]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone @@ -1662,7 +1662,7 @@ define i64 @icmp_constant_inputs_false() { define i64 @icmp_constant_inputs_true() { ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12:#.*]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR13:[0-9]+]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) @@ -2369,7 +2369,7 @@ define i64 @fcmp_constant_inputs_false() { define i64 @fcmp_constant_inputs_true() { ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) @@ -2411,7 +2411,7 @@ define i64 @ballot_zero_64() { define i64 @ballot_one_64() { ; CHECK-LABEL: @ballot_one_64( -; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]] +; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]] ; CHECK-NEXT: ret i64 [[B]] ; %b = call i64 @llvm.amdgcn.ballot.i64(i1 1) @@ -2437,7 +2437,7 @@ define i32 @ballot_zero_32() { define i32 @ballot_one_32() { ; CHECK-LABEL: @ballot_one_32( -; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata !1) [[ATTR12]] +; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR13]] ; CHECK-NEXT: ret i32 [[B]] ; %b = call i32 @llvm.amdgcn.ballot.i32(i1 1) @@ -2861,8 +2861,8 @@ declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2873,8 +2873,8 @@ define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, < define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2886,8 +2886,8 @@ define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, < define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { ; CHECK-LABEL: @image_sample_a16_3d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2901,8 +2901,8 @@ define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, < define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { ; ; CHECK-LABEL: @image_sample_a16_cube( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2915,8 +2915,8 @@ define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { ; CHECK-LABEL: @image_sample_a16_1darray( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2928,8 +2928,8 @@ define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_2darray( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2942,8 +2942,8 @@ define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { ; CHECK-LABEL: @image_sample_a16_c_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2954,8 +2954,8 @@ define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2967,8 +2967,8 @@ define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2980,8 +2980,8 @@ define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -2994,8 +2994,8 @@ define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3007,8 +3007,8 @@ define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3021,8 +3021,8 @@ define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { ; CHECK-LABEL: @image_sample_a16_b_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3033,8 +3033,8 @@ define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_b_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3046,8 +3046,8 @@ define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { ; CHECK-LABEL: @image_sample_a16_c_b_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3058,8 +3058,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_b_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3071,8 +3071,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_b_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3084,8 +3084,8 @@ define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_b_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3098,8 +3098,8 @@ define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_b_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3111,8 +3111,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_b_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3125,8 +3125,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_d_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3139,8 +3139,8 @@ define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_d_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3156,8 +3156,8 @@ define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { ; CHECK-LABEL: @image_sample_a16_d_3d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3176,8 +3176,8 @@ define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_c_d_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3190,8 +3190,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_d_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3207,8 +3207,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_d_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3222,8 +3222,8 @@ define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_d_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3240,8 +3240,8 @@ define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3255,8 +3255,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3273,8 +3273,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_cd_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3287,8 +3287,8 @@ define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_cd_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3304,8 +3304,8 @@ define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { ; CHECK-LABEL: @image_sample_a16_c_cd_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3318,8 +3318,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_cd_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3335,8 +3335,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cd_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3350,8 +3350,8 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* % define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_cd_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3368,8 +3368,8 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* % define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3383,8 +3383,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3401,8 +3401,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { ; CHECK-LABEL: @image_sample_a16_l_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3414,8 +3414,8 @@ define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { ; CHECK-LABEL: @image_sample_a16_l_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3428,8 +3428,8 @@ define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { ; CHECK-LABEL: @image_sample_a16_c_l_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3441,8 +3441,8 @@ define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { ; CHECK-LABEL: @image_sample_a16_c_l_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3455,8 +3455,8 @@ define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_lz_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3467,8 +3467,8 @@ define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_lz_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3480,8 +3480,8 @@ define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) { ; CHECK-LABEL: @image_sample_a16_c_lz_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3492,8 +3492,8 @@ define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_c_lz_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3505,8 +3505,8 @@ define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1( -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store float [[RES]], float addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3523,8 +3523,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)* define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8 +; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3545,8 +3545,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspa define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_d_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3558,8 +3558,8 @@ define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_d_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3573,8 +3573,8 @@ define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { ; CHECK-LABEL: @image_sample_g16_d_3d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3590,8 +3590,8 @@ define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out, define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_c_d_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3603,8 +3603,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_c_d_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3618,8 +3618,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %ou define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_d_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3631,8 +3631,8 @@ define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_d_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3646,8 +3646,8 @@ define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3659,8 +3659,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3674,8 +3674,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_cd_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3687,8 +3687,8 @@ define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_cd_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3702,8 +3702,8 @@ define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { ; CHECK-LABEL: @image_sample_g16_c_cd_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3715,8 +3715,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { ; CHECK-LABEL: @image_sample_g16_c_cd_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3730,8 +3730,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_cd_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3743,8 +3743,8 @@ define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* % define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_cd_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3758,8 +3758,8 @@ define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* % define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3771,8 +3771,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3786,8 +3786,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1( -; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store float [[RES]], float addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3801,8 +3801,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)* define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8 +; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8 ; CHECK-NEXT: ret void ; %dsdh32 = fpext half %dsdh to float @@ -3820,8 +3820,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspa define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d_nnan( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3832,8 +3832,8 @@ define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3844,8 +3844,8 @@ define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspa define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; CHECK-LABEL: @image_sample_a16_1d_fast( -; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3856,8 +3856,8 @@ define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { ; CHECK-LABEL: @image_sample_a16_2d_nnan( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3869,8 +3869,8 @@ define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) { ; CHECK-LABEL: @image_sample_a16_3d_nnan( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3884,8 +3884,8 @@ define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %o define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { ; ; CHECK-LABEL: @image_sample_a16_cube_nnan( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3898,8 +3898,8 @@ define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)* define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) { ; CHECK-LABEL: @image_sample_a16_1darray_nnan( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float @@ -3911,8 +3911,8 @@ define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1 define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { ; CHECK-LABEL: @image_sample_a16_2darray_nnan( -; CHECK-NEXT: [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; %s32 = fpext half %s to float From 0530fdbbbb84ea3024a4a8f7156ff716f00ffd48 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Mon, 20 Dec 2021 15:11:01 +0100 Subject: [PATCH 154/946] [AMDGPU] Fix LOD bias in A16 combine As the codegen fix in D111754, the LOD bias needs to be converted to 16 bits. Fix this in the combine. Differential Revision: https://reviews.llvm.org/D116038 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 23 ++- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 31 ++- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 7 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 5 +- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 14 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 +- .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 176 +++++++++++++++--- 7 files changed, 229 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 861545b445a33..c5d266eb57ecf 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -558,6 +558,9 @@ class AMDGPUSampleVariant extra_addr // {offset} {bias} {z-compare} list ExtraAddrArgs = extra_addr; + bit Offset = false; + bit Bias = false; + bit ZCompare = false; bit Gradients = false; // Name of the {lod} or {clamp} argument that is appended to the coordinates, @@ -571,6 +574,7 @@ defset list AMDGPUSampleVariants = { multiclass AMDGPUSampleHelper_Offset extra_addr> { def NAME#lcmod : AMDGPUSampleVariant; + let Offset = true in def NAME#lcmod#_o : AMDGPUSampleVariant< ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg], extra_addr)>; } @@ -578,6 +582,7 @@ defset list AMDGPUSampleVariants = { multiclass AMDGPUSampleHelper_Compare extra_addr> { defm NAME : AMDGPUSampleHelper_Offset; + let ZCompare = true in defm NAME : AMDGPUSampleHelper_Offset< "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg])>; } @@ -591,6 +596,7 @@ defset list AMDGPUSampleVariants = { defset list AMDGPUSampleVariantsNoGradients = { defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>; + let Bias = true in defm AMDGPUSample : AMDGPUSampleHelper_Clamp< "_B", "_b", [AMDGPUArg]>; let LodOrClamp = "lod" in @@ -618,6 +624,9 @@ class AMDGPUDimProfile RetTypes = []; list DataArgs = []; list ExtraAddrArgs = []; + bit Offset = false; + bit Bias = false; + bit ZCompare = false; bit Gradients = false; string LodClampMip = ""; @@ -652,6 +661,9 @@ class AMDGPUDimProfileCopy : AMDGPUDimProfile class AMDGPUImageDimIntrinsicEval { int NumDataArgs = !size(P_.DataArgs); int NumDmaskArgs = !not(P_.IsAtomic); - int NumExtraAddrArgs = !size(P_.ExtraAddrArgs); + int NumOffsetArgs = !if(P_.Offset, 1, 0); + int NumBiasArgs = !if(P_.Bias, 1, 0); + int NumZCompareArgs = !if(P_.ZCompare, 1, 0); + int NumExtraAddrArgs = !add(NumOffsetArgs, NumBiasArgs, NumZCompareArgs); int NumVAddrArgs = !size(P_.AddrArgs); int NumGradientArgs = !if(P_.Gradients, !size(P_.Dim.GradientArgs), 0); int NumCoordArgs = !if(P_.IsSample, !size(P_.Dim.CoordSliceArgs), !size(P_.Dim.CoordSliceIntArgs)); @@ -710,6 +728,9 @@ class AMDGPUImageDimIntrinsicEval { int NumSampArgs = !if(P_.IsSample, 2, 0); int DmaskArgIndex = NumDataArgs; int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs); + int OffsetArgIndex = VAddrArgIndex; + int BiasArgIndex = !add(VAddrArgIndex, NumOffsetArgs); + int ZCompareArgIndex = !add(BiasArgIndex, NumBiasArgs); int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs); int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs); int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index db84b87669241..5eb7cf89abb24 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -127,14 +127,20 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, FloatCoord = Coord->getType()->isFloatingPointTy(); } - if (OnlyDerivatives) { - if (!ST->hasG16()) - return None; - } else { - if (!ST->hasA16()) - OnlyDerivatives = true; // Only supports G16 + if (!OnlyDerivatives && !ST->hasA16()) + OnlyDerivatives = true; // Only supports G16 + + // Check if there is a bias parameter and if it can be converted to f16 + if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { + Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); + if (!canSafelyConvertTo16Bit(*Bias)) + OnlyDerivatives = true; } + if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart == + ImageDimIntr->CoordStart)) + return None; + Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) : Type::getInt16Ty(II.getContext()); @@ -143,8 +149,13 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, return None; ArgTys[ImageDimIntr->GradientTyArg] = CoordType; - if (!OnlyDerivatives) + if (!OnlyDerivatives) { ArgTys[ImageDimIntr->CoordTyArg] = CoordType; + + // Change the bias type + if (ImageDimIntr->NumBiasArgs != 0) + ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); + } Function *I = Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys); @@ -158,6 +169,12 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); } + // Convert the bias + if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { + Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); + Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); + } + CallInst *NewCall = IC.Builder.CreateCall(I, Args); NewCall->takeName(&II); NewCall->copyMetadata(II); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 673011f48289e..e7ee364476824 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -49,6 +49,9 @@ struct ImageDimIntrinsicInfo { unsigned BaseOpcode; MIMGDim Dim; + uint8_t NumOffsetArgs; + uint8_t NumBiasArgs; + uint8_t NumZCompareArgs; uint8_t NumGradients; uint8_t NumDmask; uint8_t NumData; @@ -57,6 +60,9 @@ struct ImageDimIntrinsicInfo { uint8_t DMaskIndex; uint8_t VAddrStart; + uint8_t OffsetIndex; + uint8_t BiasIndex; + uint8_t ZCompareIndex; uint8_t GradientStart; uint8_t CoordStart; uint8_t LodIndex; @@ -68,6 +74,7 @@ struct ImageDimIntrinsicInfo { uint8_t TexFailCtrlIndex; uint8_t CachePolicyIndex; + uint8_t BiasTyArg; uint8_t GradientTyArg; uint8_t CoordTyArg; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 20b2b0f1be0ce..5092e0f553e2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4273,15 +4273,18 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI, if ((I < Intr->GradientStart) || (I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) || (I >= Intr->CoordStart && !IsA16)) { - // Handle any gradient or coordinate operands that should not be packed if ((I < Intr->GradientStart) && IsA16 && (B.getMRI()->getType(AddrReg) == S16)) { + assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument"); // Special handling of bias when A16 is on. Bias is of type half but // occupies full 32-bit. PackedAddrs.push_back( B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) .getReg(0)); } else { + assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) && + "Bias needs to be converted to 16 bit in A16 mode"); + // Handle any gradient or coordinate operands that should not be packed AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0); PackedAddrs.push_back(AddrReg); } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 6dd886367302a..1d8a558359378 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -1070,6 +1070,9 @@ class ImageDimIntrinsicInfo { AMDGPUDimProps Dim = I.P.Dim; AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval; + bits<8> NumOffsetArgs = DimEval.NumOffsetArgs; + bits<8> NumBiasArgs = DimEval.NumBiasArgs; + bits<8> NumZCompareArgs = DimEval.NumZCompareArgs; bits<8> NumGradients = DimEval.NumGradientArgs; bits<8> NumDmask = DimEval.NumDmaskArgs; bits<8> NumData = DimEval.NumDataArgs; @@ -1078,6 +1081,9 @@ class ImageDimIntrinsicInfo { bits<8> DMaskIndex = DimEval.DmaskArgIndex; bits<8> VAddrStart = DimEval.VAddrArgIndex; + bits<8> OffsetIndex = DimEval.OffsetArgIndex; + bits<8> BiasIndex = DimEval.BiasArgIndex; + bits<8> ZCompareIndex = DimEval.ZCompareArgIndex; bits<8> GradientStart = DimEval.GradientArgIndex; bits<8> CoordStart = DimEval.CoordArgIndex; bits<8> LodIndex = DimEval.LodArgIndex; @@ -1089,6 +1095,8 @@ class ImageDimIntrinsicInfo { bits<8> TexFailCtrlIndex = DimEval.TexFailCtrlArgIndex; bits<8> CachePolicyIndex = DimEval.CachePolicyArgIndex; + bits<8> BiasTyArg = !add(I.P.NumRetAndDataAnyTypes, + !if(!eq(NumOffsetArgs, 0), 0, I.P.ExtraAddrArgs[0].Type.isAny)); bits<8> GradientTyArg = !add(I.P.NumRetAndDataAnyTypes, !foldl(0, I.P.ExtraAddrArgs, cnt, arg, !add(cnt, arg.Type.isAny))); bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0)); @@ -1096,10 +1104,10 @@ class ImageDimIntrinsicInfo { def ImageDimIntrinsicTable : GenericTable { let FilterClass = "ImageDimIntrinsicInfo"; - let Fields = ["Intr", "BaseOpcode", "Dim", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs", - "DMaskIndex", "VAddrStart", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd", + let Fields = ["Intr", "BaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs", + "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd", "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex", - "GradientTyArg", "CoordTyArg"]; + "BiasTyArg", "GradientTyArg", "CoordTyArg"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; string TypeOf_Dim = "MIMGDim"; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5176ba44afad6..26229b40f4dc5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6316,12 +6316,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op, // Push back extra arguments. for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) { if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) { + assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument"); // Special handling of bias when A16 is on. Bias is of type half but // occupies full 32-bit. - SDValue bias = DAG.getBuildVector( MVT::v2f16, DL, {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)}); - VAddrs.push_back(bias); - } else + SDValue Bias = DAG.getBuildVector( + MVT::v2f16, DL, + {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)}); + VAddrs.push_back(Bias); + } else { + assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) && + "Bias needs to be converted to 16 bit in A16 mode"); VAddrs.push_back(Op.getOperand(ArgOffset + I)); + } } if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) { diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index a6ddcdd0a4a0e..9607fe63f4637 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -3019,9 +3019,23 @@ define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %o ret void } -define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { -; CHECK-LABEL: @image_sample_a16_b_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_b16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) { +; CHECK-LABEL: @image_sample_a16_b16_1d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_b32_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) { +; CHECK-LABEL: @image_sample_a16_b32_1d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3031,9 +3045,25 @@ define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, ret void } -define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { -; CHECK-LABEL: @image_sample_a16_b_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_b16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) { +; CHECK-LABEL: @image_sample_a16_b16_2d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %t32 = fpext half %t to float + %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_b32_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { +; CHECK-LABEL: @image_sample_a16_b32_2d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3044,9 +3074,23 @@ define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, ret void } -define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { -; CHECK-LABEL: @image_sample_a16_c_b_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_c_b16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) { +; CHECK-LABEL: @image_sample_a16_c_b16_1d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_c_b32_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) { +; CHECK-LABEL: @image_sample_a16_c_b32_1d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3056,9 +3100,25 @@ define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %ou ret void } -define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { -; CHECK-LABEL: @image_sample_a16_c_b_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_c_b16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) { +; CHECK-LABEL: @image_sample_a16_c_b16_2d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %t32 = fpext half %t to float + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_c_b32_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { +; CHECK-LABEL: @image_sample_a16_c_b32_2d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3069,9 +3129,25 @@ define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %ou ret void } -define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { -; CHECK-LABEL: @image_sample_a16_b_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_b16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) { +; CHECK-LABEL: @image_sample_a16_b16_cl_1d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %clamp32 = fpext half %clamp to float + %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_b32_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) { +; CHECK-LABEL: @image_sample_a16_b32_cl_1d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3082,9 +3158,27 @@ define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %o ret void } -define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { -; CHECK-LABEL: @image_sample_a16_b_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_b16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) { +; CHECK-LABEL: @image_sample_a16_b16_cl_2d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %t32 = fpext half %t to float + %clamp32 = fpext half %clamp to float + %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_b32_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { +; CHECK-LABEL: @image_sample_a16_b32_cl_2d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float +; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3096,9 +3190,25 @@ define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %o ret void } -define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { -; CHECK-LABEL: @image_sample_a16_c_b_cl_1d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_c_b16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) { +; CHECK-LABEL: @image_sample_a16_c_b16_cl_1d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %clamp32 = fpext half %clamp to float + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_c_b32_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) { +; CHECK-LABEL: @image_sample_a16_c_b32_cl_1d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; @@ -3109,9 +3219,27 @@ define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* ret void } -define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { -; CHECK-LABEL: @image_sample_a16_c_b_cl_2d( -; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) { +; CHECK-LABEL: @image_sample_a16_c_b16_cl_2d( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; + %bias32 = fpext half %bias to float + %s32 = fpext half %s to float + %t32 = fpext half %t to float + %clamp32 = fpext half %clamp to float + %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %res, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @image_sample_a16_c_b32_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { +; CHECK-LABEL: @image_sample_a16_c_b32_cl_2d( +; CHECK-NEXT: [[S32:%.*]] = fpext half [[S:%.*]] to float +; CHECK-NEXT: [[T32:%.*]] = fpext half [[T:%.*]] to float +; CHECK-NEXT: [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) ; CHECK-NEXT: store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 ; CHECK-NEXT: ret void ; From 603d18033c510c99ad84f26b6603db1ca68a500f Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 21 Dec 2021 17:27:14 +0100 Subject: [PATCH 155/946] [AMDGPU][InstCombine] Remove zero LOD bias If the bias is zero, we can remove it from the image instruction. Also copy other image optimizations (l->lz, mip->nomip) to IR combines. Differential Revision: https://reviews.llvm.org/D116042 --- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 138 +++- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 34 + .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 1 + llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 9 + .../InstCombine/AMDGPU/amdgcn-intrinsics.ll | 589 +++++++++++++++++- 5 files changed, 732 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 5eb7cf89abb24..84363d3c6aa1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -97,10 +97,92 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) { llvm_unreachable("Should never be called!"); } +/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with +/// the modified arguments. +static Optional modifyIntrinsicCall( + IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC, + std::function &, SmallVectorImpl &)> + Func) { + SmallVector ArgTys; + if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) + return None; + + SmallVector Args(II.args()); + + // Modify arguments and types + Func(Args, ArgTys); + + Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys); + + CallInst *NewCall = IC.Builder.CreateCall(I, Args); + NewCall->takeName(&II); + NewCall->copyMetadata(II); + if (isa(NewCall)) + NewCall->copyFastMathFlags(&II); + + // Erase and replace uses + if (!II.getType()->isVoidTy()) + IC.replaceInstUsesWith(II, NewCall); + return IC.eraseInstFromFunction(II); +} + static Optional simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC) { + // Optimize _L to _LZ when _L is zero + if (const auto *LZMappingInfo = + AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantLod = + dyn_cast(II.getOperand(ImageDimIntr->LodIndex))) { + if (ConstantLod->isZero() || ConstantLod->isNegative()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->LodIndex); + }); + } + } + } + + // Optimize _mip away, when 'lod' is zero + if (const auto *MIPMappingInfo = + AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantMip = + dyn_cast(II.getOperand(ImageDimIntr->MipIndex))) { + if (ConstantMip->isZero()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->MipIndex); + }); + } + } + } + + // Optimize _bias away when 'bias' is zero + if (const auto *BiasMappingInfo = + AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) { + if (auto *ConstantBias = + dyn_cast(II.getOperand(ImageDimIntr->BiasIndex))) { + if (ConstantBias->isZero()) { + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias, + ImageDimIntr->Dim); + return modifyIntrinsicCall( + II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) { + Args.erase(Args.begin() + ImageDimIntr->BiasIndex); + ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg); + }); + } + } + } + + // Try to use A16 or G16 if (!ST->hasA16() && !ST->hasG16()) return None; @@ -144,43 +226,31 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext()) : Type::getInt16Ty(II.getContext()); - SmallVector ArgTys; - if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys)) - return None; - - ArgTys[ImageDimIntr->GradientTyArg] = CoordType; - if (!OnlyDerivatives) { - ArgTys[ImageDimIntr->CoordTyArg] = CoordType; - - // Change the bias type - if (ImageDimIntr->NumBiasArgs != 0) - ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); - } - Function *I = - Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys); - - SmallVector Args(II.args()); + return modifyIntrinsicCall( + II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) { + ArgTys[ImageDimIntr->GradientTyArg] = CoordType; + if (!OnlyDerivatives) { + ArgTys[ImageDimIntr->CoordTyArg] = CoordType; - unsigned EndIndex = - OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; - for (unsigned OperandIndex = ImageDimIntr->GradientStart; - OperandIndex < EndIndex; OperandIndex++) { - Args[OperandIndex] = - convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); - } + // Change the bias type + if (ImageDimIntr->NumBiasArgs != 0) + ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext()); + } - // Convert the bias - if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { - Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); - Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); - } + unsigned EndIndex = + OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd; + for (unsigned OperandIndex = ImageDimIntr->GradientStart; + OperandIndex < EndIndex; OperandIndex++) { + Args[OperandIndex] = + convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder); + } - CallInst *NewCall = IC.Builder.CreateCall(I, Args); - NewCall->takeName(&II); - NewCall->copyMetadata(II); - if (isa(NewCall)) - NewCall->copyFastMathFlags(&II); - return IC.replaceInstUsesWith(II, NewCall); + // Convert the bias + if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) { + Value *Bias = II.getOperand(ImageDimIntr->BiasIndex); + Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder); + } + }); } bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 1d8a558359378..49eaa1499bb76 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -131,6 +131,22 @@ def MIMGMIPMappingTable : GenericTable { let PrimaryKeyName = "getMIMGMIPMappingInfo"; } +class MIMGBiasMapping { + MIMGBaseOpcode Bias = bias; + MIMGBaseOpcode NoBias = nobias; +} + +def MIMGBiasMappingTable : GenericTable { + let FilterClass = "MIMGBiasMapping"; + let CppTypeName = "MIMGBiasMappingInfo"; + let Fields = ["Bias", "NoBias"]; + string TypeOf_Bias = "MIMGBaseOpcode"; + string TypeOf_NoBias = "MIMGBaseOpcode"; + + let PrimaryKey = ["Bias"]; + let PrimaryKeyName = "getMIMGBiasMappingInfo"; +} + class MIMGG16Mapping { MIMGBaseOpcode G = g; MIMGBaseOpcode G16 = g16; @@ -1140,6 +1156,24 @@ def : MIMGLZMapping; def : MIMGMIPMapping; def : MIMGMIPMapping; +// Bias to NoBias Optimization Mapping +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; +def : MIMGBiasMapping; + // G to G16 Optimization Mapping def : MIMGG16Mapping; def : MIMGG16Mapping; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 6c7d73aebe0c4..fa1fa5b850d57 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -132,6 +132,7 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) { #define GET_MIMGInfoTable_IMPL #define GET_MIMGLZMappingTable_IMPL #define GET_MIMGMIPMappingTable_IMPL +#define GET_MIMGBiasMappingTable_IMPL #define GET_MIMGG16MappingTable_IMPL #include "AMDGPUGenSearchableTables.inc" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 061c74c0ace69..cabae3d1ab7e1 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -64,6 +64,7 @@ struct GcnBufferFormatInfo { #define GET_MIMGEncoding_DECL #define GET_MIMGLZMapping_DECL #define GET_MIMGMIPMapping_DECL +#define GET_MIMGBiASMapping_DECL #include "AMDGPUGenSearchableTables.inc" namespace IsaInfo { @@ -330,6 +331,11 @@ struct MIMGMIPMappingInfo { MIMGBaseOpcode NONMIP; }; +struct MIMGBiasMappingInfo { + MIMGBaseOpcode Bias; + MIMGBaseOpcode NoBias; +}; + struct MIMGG16MappingInfo { MIMGBaseOpcode G; MIMGBaseOpcode G16; @@ -341,6 +347,9 @@ const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); LLVM_READONLY const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); +LLVM_READONLY +const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); + LLVM_READONLY const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 9607fe63f4637..bac4c7826a4fe 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind { define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp { ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR12:[0-9]+]] +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]] ; CHECK-NEXT: ret float [[VAL]] ; %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone @@ -1662,7 +1662,7 @@ define i64 @icmp_constant_inputs_false() { define i64 @icmp_constant_inputs_true() { ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR13:[0-9]+]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR15:[0-9]+]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) @@ -2369,7 +2369,7 @@ define i64 @fcmp_constant_inputs_false() { define i64 @fcmp_constant_inputs_true() { ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]] +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]] ; CHECK-NEXT: ret i64 [[RESULT]] ; %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) @@ -2411,7 +2411,7 @@ define i64 @ballot_zero_64() { define i64 @ballot_one_64() { ; CHECK-LABEL: @ballot_one_64( -; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]] +; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]] ; CHECK-NEXT: ret i64 [[B]] ; %b = call i64 @llvm.amdgcn.ballot.i64(i1 1) @@ -2437,7 +2437,7 @@ define i32 @ballot_zero_32() { define i32 @ballot_one_32() { ; CHECK-LABEL: @ballot_one_32( -; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR13]] +; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR15]] ; CHECK-NEXT: ret i32 [[B]] ; %b = call i32 @llvm.amdgcn.ballot.i32(i1 1) @@ -4051,6 +4051,585 @@ define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1 ret void } +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample l to lz +; -------------------------------------------------------------------- + +declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +define amdgpu_kernel void @sample_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { +; CHECK-LABEL: @sample_l_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { +; CHECK-LABEL: @sample_l_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { +; CHECK-LABEL: @sample_c_l_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { +; CHECK-LABEL: @sample_c_l_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { +; CHECK-LABEL: @sample_l_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { +; CHECK-LABEL: @sample_l_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { +; CHECK-LABEL: @sample_c_l_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { +; CHECK-LABEL: @sample_c_l_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { +; CHECK-LABEL: @gather4_l_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { +; CHECK-LABEL: @gather4_c_l_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { +; CHECK-LABEL: @gather4_l_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { +; CHECK-LABEL: @gather4_c_l_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_c_l_o_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) { +; CHECK-LABEL: @gather4_c_l_o_2darray( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample mipmap zero +; -------------------------------------------------------------------- + +define amdgpu_kernel void @load_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s) { +; CHECK-LABEL: @load_mip_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @load_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { +; CHECK-LABEL: @load_mip_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @load_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { +; CHECK-LABEL: @load_mip_3d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @load_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) { +; CHECK-LABEL: @load_mip_1darray( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @load_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { +; CHECK-LABEL: @load_mip_2darray( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @load_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { +; CHECK-LABEL: @load_mip_cube( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + + +define amdgpu_kernel void @store_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { +; CHECK-LABEL: @store_mip_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: ret void +; +main_body: + call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_kernel void @store_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { +; CHECK-LABEL: @store_mip_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: ret void +; +main_body: + call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_kernel void @store_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { +; CHECK-LABEL: @store_mip_3d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: ret void +; +main_body: + call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_kernel void @store_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { +; CHECK-LABEL: @store_mip_1darray( +; CHECK-NEXT: main_body: +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: ret void +; +main_body: + call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_kernel void @store_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { +; CHECK-LABEL: @store_mip_2darray( +; CHECK-NEXT: main_body: +; CHECK-NEXT: call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: ret void +; +main_body: + call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_kernel void @store_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { +; CHECK-LABEL: @store_mip_cube( +; CHECK-NEXT: main_body: +; CHECK-NEXT: call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0) +; CHECK-NEXT: ret void +; +main_body: + call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 + + +declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 +declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 + +; -------------------------------------------------------------------- +; llvm.amdgcn.image.sample bias zero +; -------------------------------------------------------------------- + +declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32, i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 + +define amdgpu_kernel void @sample_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; CHECK-LABEL: @sample_b_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; CHECK-LABEL: @sample_b_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { +; CHECK-LABEL: @sample_c_b_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; CHECK-LABEL: @sample_c_b_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) { +; CHECK-LABEL: @sample_b_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { +; CHECK-LABEL: @sample_b_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) { +; CHECK-LABEL: @sample_c_b_o_1d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { +; CHECK-LABEL: @sample_c_b_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { +; CHECK-LABEL: @gather4_b_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { +; CHECK-LABEL: @gather4_c_b_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { +; CHECK-LABEL: @gather4_b_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @gather4_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { +; CHECK-LABEL: @gather4_c_b_o_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +define amdgpu_kernel void @sample_c_b_o_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) { +; CHECK-LABEL: @sample_c_b_o_a16_2d( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +; Check that bias is not optimized away if > 0 +define amdgpu_kernel void @sample_b_1d_pos(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; CHECK-LABEL: @sample_b_1d_pos( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float 1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +; Check that bias is not optimized away if < 0 +define amdgpu_kernel void @sample_b_1d_neg(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { +; CHECK-LABEL: @sample_b_1d_neg( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float -1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + +; Zero bias + A16 +define amdgpu_kernel void @sample_b_1d_a16(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { +; CHECK-LABEL: @sample_b_1d_a16( +; CHECK-NEXT: main_body: +; CHECK-NEXT: [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0) +; CHECK-NEXT: store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16 +; CHECK-NEXT: ret void +; +main_body: + %s32 = fpext half %s to float + %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) + store <4 x float> %v, <4 x float> addrspace(1)* %out + ret void +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.is.shared ; -------------------------------------------------------------------- From ae2f9c8be89768086b9f335d60bbe8312b212f95 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Tue, 21 Dec 2021 17:31:24 +0100 Subject: [PATCH 156/946] [AMDGPU] Remove lz and nomip combine from codegen These combines have been moved into the IR combiner in D116042. Differential Revision: https://reviews.llvm.org/D116116 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 24 - .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 38 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 26 - .../GlobalISel/image_ls_mipmap_zero.a16.ll | 667 ------------------ .../AMDGPU/GlobalISel/image_ls_mipmap_zero.ll | 403 ----------- .../llvm.amdgcn.image.sample.ltolz.a16.ll | 565 --------------- .../llvm.amdgcn.image.sample.ltolz.ll | 293 -------- llvm/test/CodeGen/AMDGPU/cluster_stores.ll | 19 +- .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 28 +- .../CodeGen/AMDGPU/image_ls_mipmap_zero.ll | 132 ---- .../AMDGPU/llvm.amdgcn.image.sample.ltolz.ll | 113 --- llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 9 +- 12 files changed, 31 insertions(+), 2286 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll delete mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 6ab2cb6df3bda..e48dca3cc9572 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1510,10 +1510,6 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); - const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = - AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode); - const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo = - AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode); unsigned IntrOpcode = Intr->BaseOpcode; const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI); @@ -1586,26 +1582,6 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( } } - // Optimize _L to _LZ when _L is zero - if (LZMappingInfo) { - // The legalizer replaced the register with an immediate 0 if we need to - // change the opcode. - const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->LodIndex); - if (Lod.isImm()) { - assert(Lod.getImm() == 0); - IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l - } - } - - // Optimize _mip away, when 'lod' is zero - if (MIPMappingInfo) { - const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->MipIndex); - if (Lod.isImm()) { - assert(Lod.getImm() == 0); - IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip - } - } - // Set G16 opcode if (IsG16 && !IsA16) { const AMDGPU::MIMGG16MappingInfo *G16MappingInfo = diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5092e0f553e2a..04c6f67ed3390 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4450,44 +4450,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( unsigned CorrectedNumVAddrs = Intr->NumVAddrs; - // Optimize _L to _LZ when _L is zero - if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = - AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode)) { - const ConstantFP *ConstantLod; - - if (mi_match(MI.getOperand(ArgOffset + Intr->LodIndex).getReg(), *MRI, - m_GFCst(ConstantLod))) { - if (ConstantLod->isZero() || ConstantLod->isNegative()) { - // Set new opcode to _lz variant of _l, and change the intrinsic ID. - const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = - AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ, - Intr->Dim); - - // The starting indexes should remain in the same place. - --CorrectedNumVAddrs; - - MI.getOperand(NumDefs).setIntrinsicID( - static_cast(NewImageDimIntr->Intr)); - MI.RemoveOperand(ArgOffset + Intr->LodIndex); - Intr = NewImageDimIntr; - } - } - } - - // Optimize _mip away, when 'lod' is zero - if (AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode)) { - int64_t ConstantLod; - if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI, - m_ICst(ConstantLod))) { - if (ConstantLod == 0) { - // TODO: Change intrinsic opcode and remove operand instead or replacing - // it with 0, as the _L to _LZ handling is done above. - MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0); - --CorrectedNumVAddrs; - } - } - } - // Rewrite the addressing register layout before doing anything else. if (BaseOpcode->Gradients && !ST.hasG16() && (IsA16 != IsG16)) { // 16 bit gradients are supported, but are tied to the A16 control diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 26229b40f4dc5..4008c7e36e4f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6188,10 +6188,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op, const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); - const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = - AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode); - const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo = - AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode); unsigned IntrOpcode = Intr->BaseOpcode; bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget); @@ -6279,28 +6275,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op, unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd; SmallVector VAddrs; - // Optimize _L to _LZ when _L is zero - if (LZMappingInfo) { - if (auto *ConstantLod = dyn_cast( - Op.getOperand(ArgOffset + Intr->LodIndex))) { - if (ConstantLod->isZero() || ConstantLod->isNegative()) { - IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l - VAddrEnd--; // remove 'lod' - } - } - } - - // Optimize _mip away, when 'lod' is zero - if (MIPMappingInfo) { - if (auto *ConstantLod = dyn_cast( - Op.getOperand(ArgOffset + Intr->MipIndex))) { - if (ConstantLod->isZero()) { - IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip - VAddrEnd--; // remove 'mip' - } - } - } - // Check for 16 bit addresses or derivatives and pack if true. MVT VAddrVT = Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll deleted file mode 100644 index af7509fd0897d..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll +++ /dev/null @@ -1,667 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s - -define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i16 %s) { - ; GFX9-LABEL: name: load_mip_1d - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9: $vgpr0 = COPY [[UV]](s32) - ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-LABEL: name: load_mip_1d - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10: $vgpr0 = COPY [[UV]](s32) - ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i16 %s, i16 %t) { - ; GFX9-LABEL: name: load_mip_2d - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9: $vgpr0 = COPY [[UV]](s32) - ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-LABEL: name: load_mip_2d - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10: $vgpr0 = COPY [[UV]](s32) - ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %u) { - ; GFX9-LABEL: name: load_mip_3d - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9: $vgpr0 = COPY [[UV]](s32) - ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-LABEL: name: load_mip_3d - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10: $vgpr0 = COPY [[UV]](s32) - ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i16 %s, i16 %t) { - ; GFX9-LABEL: name: load_mip_1darray - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9: $vgpr0 = COPY [[UV]](s32) - ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-LABEL: name: load_mip_1darray - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10: $vgpr0 = COPY [[UV]](s32) - ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %u) { - ; GFX9-LABEL: name: load_mip_2darray - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9: $vgpr0 = COPY [[UV]](s32) - ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-LABEL: name: load_mip_2darray - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10: $vgpr0 = COPY [[UV]](s32) - ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %u) { - ; GFX9-LABEL: name: load_mip_cube - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX9: $vgpr0 = COPY [[UV]](s32) - ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-LABEL: name: load_mip_cube - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; GFX10: $vgpr0 = COPY [[UV]](s32) - ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s) { - ; GFX9-LABEL: name: store_mip_1d - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 - ; GFX10-LABEL: name: store_mip_1d - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 -main_body: - call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t) { - ; GFX9-LABEL: name: store_mip_2d - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 - ; GFX10-LABEL: name: store_mip_2d - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 -main_body: - call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t, i16 %u) { - ; GFX9-LABEL: name: store_mip_3d - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 - ; GFX10-LABEL: name: store_mip_3d - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 -main_body: - call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t) { - ; GFX9-LABEL: name: store_mip_1darray - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 - ; GFX10-LABEL: name: store_mip_1darray - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 -main_body: - call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t, i16 %u) { - ; GFX9-LABEL: name: store_mip_2darray - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 - ; GFX10-LABEL: name: store_mip_2darray - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 -main_body: - call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t, i16 %u) { - ; GFX9-LABEL: name: store_mip_cube - ; GFX9: bb.1.main_body: - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 - ; GFX10-LABEL: name: store_mip_cube - ; GFX10: bb.1.main_body: - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) - ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 -main_body: - call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 -declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1 - -attributes #0 = { nounwind readonly } -attributes #1 = { nounwind writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll deleted file mode 100644 index a5bf0dbf6c8b3..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll +++ /dev/null @@ -1,403 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s - -define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) { -; GFX9-LABEL: load_mip_1d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf unorm -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: load_mip_1d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { -; GFX9-LABEL: load_mip_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: load_mip_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { -; GFX9-LABEL: load_mip_3d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: load_mip_3d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { -; GFX9-LABEL: load_mip_1darray: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: load_mip_1darray: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { -; GFX9-LABEL: load_mip_2darray: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: load_mip_2darray: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { -; GFX9-LABEL: load_mip_cube: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: load_mip_cube: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { -; GFX9-LABEL: store_mip_1d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: store_mip_1d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm -; GFX10-NEXT: s_endpgm -main_body: - call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { -; GFX9-LABEL: store_mip_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: store_mip_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm -; GFX10-NEXT: s_endpgm -main_body: - call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { -; GFX9-LABEL: store_mip_3d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: store_mip_3d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm -; GFX10-NEXT: s_endpgm -main_body: - call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { -; GFX9-LABEL: store_mip_1darray: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: store_mip_1darray: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm -; GFX10-NEXT: s_endpgm -main_body: - call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { -; GFX9-LABEL: store_mip_2darray: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: store_mip_2darray: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm -; GFX10-NEXT: s_endpgm -main_body: - call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { -; GFX9-LABEL: store_mip_cube: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da -; GFX9-NEXT: s_endpgm -; -; GFX10-LABEL: store_mip_cube: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm -; GFX10-NEXT: s_endpgm -main_body: - call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 -declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -attributes #0 = { nounwind readonly } -attributes #1 = { nounwind writeonly } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll deleted file mode 100644 index 48a027502360c..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll +++ /dev/null @@ -1,565 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s - -define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) { -; GFX9-LABEL: sample_l_1d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_l_1d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { -; GFX9-LABEL: sample_l_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 -; GFX9-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_l_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) { -; GFX9-LABEL: sample_c_l_1d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: s_lshl_b32 s12, s0, 16 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s12 -; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_c_l_1d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_lshl_b32 s12, s0, 16 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s12 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { -; GFX9-LABEL: sample_c_l_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 -; GFX9-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_c_l_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %lod) { -; GFX9-LABEL: sample_l_o_1d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: s_lshl_b32 s12, s0, 16 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s12 -; GFX9-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_l_o_1d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_lshl_b32 s12, s0, 16 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s12 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 15, i32 %offset, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) { -; GFX9-LABEL: sample_l_o_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 -; GFX9-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_l_o_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %lod) { -; GFX9-LABEL: sample_c_l_o_1d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff -; GFX9-NEXT: s_lshl_b32 s12, s0, 16 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v2, v2, v3, s12 -; GFX9-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_c_l_o_1d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_lshl_b32 s12, s0, 16 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, s12 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) { -; GFX9-LABEL: sample_c_l_o_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 -; GFX9-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: sample_c_l_o_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { -; GFX9-LABEL: gather4_l_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 -; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: gather4_l_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 15, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { -; GFX9-LABEL: gather4_c_l_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 -; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: gather4_c_l_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) { -; GFX9-LABEL: gather4_l_o_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 -; GFX9-NEXT: image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: gather4_l_o_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) { -; GFX9-LABEL: gather4_c_l_o_2d: -; GFX9: ; %bb.0: ; %main_body -; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX9-NEXT: s_mov_b32 s0, s2 -; GFX9-NEXT: s_mov_b32 s1, s3 -; GFX9-NEXT: s_mov_b32 s2, s4 -; GFX9-NEXT: s_mov_b32 s3, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s6, s8 -; GFX9-NEXT: s_mov_b32 s7, s9 -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 -; GFX9-NEXT: image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: gather4_c_l_o_2d: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: s_mov_b32 s8, s10 -; GFX10-NEXT: s_mov_b32 s9, s11 -; GFX10-NEXT: s_mov_b32 s10, s12 -; GFX10-NEXT: s_mov_b32 s11, s13 -; GFX10-NEXT: image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 immarg, i32, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 immarg, i32, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 - -attributes #0 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll deleted file mode 100644 index 516e92e08b16e..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll +++ /dev/null @@ -1,293 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GCN %s - -define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { -; GCN-LABEL: sample_l_1d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { -; GCN-LABEL: sample_l_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { -; GCN-LABEL: sample_c_l_1d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { -; GCN-LABEL: sample_c_l_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { -; GCN-LABEL: sample_l_o_1d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { -; GCN-LABEL: sample_l_o_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { -; GCN-LABEL: sample_c_l_o_1d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { -; GCN-LABEL: sample_c_l_o_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { -; GCN-LABEL: gather4_l_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { -; GCN-LABEL: gather4_c_l_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { -; GCN-LABEL: gather4_l_o_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { -; GCN-LABEL: gather4_c_l_o_2d: -; GCN: ; %bb.0: ; %main_body -; GCN-NEXT: s_mov_b32 s0, s2 -; GCN-NEXT: s_mov_b32 s1, s3 -; GCN-NEXT: s_mov_b32 s2, s4 -; GCN-NEXT: s_mov_b32 s3, s5 -; GCN-NEXT: s_mov_b32 s4, s6 -; GCN-NEXT: s_mov_b32 s5, s7 -; GCN-NEXT: s_mov_b32 s6, s8 -; GCN-NEXT: s_mov_b32 s7, s9 -; GCN-NEXT: s_mov_b32 s8, s10 -; GCN-NEXT: s_mov_b32 s9, s11 -; GCN-NEXT: s_mov_b32 s10, s12 -; GCN-NEXT: s_mov_b32 s11, s13 -; GCN-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ; return to shader part epilog -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - ret <4 x float> %v -} - -declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 - -attributes #0 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll index 8fab3d392c98f..763ead034f612 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll +++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll @@ -269,10 +269,10 @@ define amdgpu_ps void @cluster_image_load(<8 x i32> inreg %src, <8 x i32> inreg entry: %x1 = add i32 %x, 1 %y1 = add i32 %y, 1 - %val1 = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %x1, i32 %y1, i32 0, <8 x i32> %src, i32 0, i32 0) + %val1 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %x1, i32 %y1, <8 x i32> %src, i32 0, i32 0) %x2 = add i32 %x, 2 %y2 = add i32 %y, 2 - %val2 = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %x2, i32 %y2, i32 0, <8 x i32> %src, i32 0, i32 0) + %val2 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %x2, i32 %y2, <8 x i32> %src, i32 0, i32 0) %val = fadd fast <4 x float> %val1, %val2 call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %val, i32 15, i32 %x, i32 %y, <8 x i32> %dst, i32 0, i32 0) ret void @@ -286,20 +286,22 @@ entry: define amdgpu_ps void @no_cluster_image_load(<8 x i32> inreg %src1, <8 x i32> inreg %src2, <8 x i32> inreg %dst, i32 %x, i32 %y) { ; GFX9-LABEL: no_cluster_image_load: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: image_load v[2:5], v[0:1], s[0:7] dmask:0xf unorm -; GFX9-NEXT: image_load v[6:9], v[0:1], s[8:15] dmask:0xf unorm +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: image_load_mip v[3:6], v[0:2], s[0:7] dmask:0xf unorm +; GFX9-NEXT: image_load_mip v[7:10], v[0:2], s[8:15] dmask:0xf unorm ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v6, v6, v10 ; GFX9-NEXT: v_add_f32_e32 v5, v5, v9 ; GFX9-NEXT: v_add_f32_e32 v4, v4, v8 ; GFX9-NEXT: v_add_f32_e32 v3, v3, v7 -; GFX9-NEXT: v_add_f32_e32 v2, v2, v6 -; GFX9-NEXT: image_store v[2:5], v[0:1], s[16:23] dmask:0xf unorm +; GFX9-NEXT: image_store v[3:6], v[0:1], s[16:23] dmask:0xf unorm ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: no_cluster_image_load: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: image_load v[2:5], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm -; GFX10-NEXT: image_load v[6:9], v[0:1], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D unorm +; GFX10-NEXT: v_mov_b32_e32 v10, 0 +; GFX10-NEXT: image_load_mip v[2:5], [v0, v1, v10], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm +; GFX10-NEXT: image_load_mip v[6:9], [v0, v1, v10], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_add_f32_e32 v5, v5, v9 ; GFX10-NEXT: v_add_f32_e32 v4, v4, v8 @@ -389,6 +391,7 @@ entry: ret void } +declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll index 661b1ac056e8f..67c11cc5beaef 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll @@ -5,6 +5,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GCN-LABEL: _amdgpu_ps_main: ; GCN: ; %bb.0: ; %.entry ; GCN-NEXT: s_mov_b32 s0, 0 +; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: s_mov_b32 s1, s0 ; GCN-NEXT: s_mov_b32 s2, s0 ; GCN-NEXT: s_mov_b32 s3, s0 @@ -14,10 +15,11 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GCN-NEXT: s_mov_b32 s7, s0 ; GCN-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_clause 0x2 +; GCN-NEXT: s_clause 0x1 ; GCN-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D ; GCN-NEXT: image_sample v3, v[0:1], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D -; GCN-NEXT: image_load v4, v[0:1], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: image_load_mip v4, v[2:4], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm ; GCN-NEXT: s_clause 0x3 ; GCN-NEXT: s_buffer_load_dword s24, s[0:3], 0x5c ; GCN-NEXT: s_buffer_load_dword s28, s[0:3], 0x7c @@ -44,33 +46,31 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GCN-NEXT: v_sub_f32_e32 v8, s0, v1 ; GCN-NEXT: v_fma_f32 v7, -s2, v6, s6 ; GCN-NEXT: v_fma_f32 v5, v6, v5, 1.0 +; GCN-NEXT: v_mad_f32 v10, s2, v6, v2 ; GCN-NEXT: s_mov_b32 s0, 0x3c23d70a ; GCN-NEXT: v_fmac_f32_e32 v1, v6, v8 +; GCN-NEXT: v_mac_f32_e32 v10, v7, v6 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mul_f32_e32 v9, s10, v0 ; GCN-NEXT: v_fma_f32 v0, -v0, s10, s14 -; GCN-NEXT: v_fmac_f32_e32 v9, v0, v6 -; GCN-NEXT: v_sub_f32_e32 v0, v1, v5 -; GCN-NEXT: v_fmac_f32_e32 v5, v0, v6 -; GCN-NEXT: s_waitcnt vmcnt(2) -; GCN-NEXT: v_mad_f32 v10, s2, v6, v2 ; GCN-NEXT: v_mul_f32_e32 v8, s18, v2 -; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_mul_f32_e32 v3, s22, v3 -; GCN-NEXT: v_mac_f32_e32 v10, v7, v6 +; GCN-NEXT: v_fmac_f32_e32 v9, v0, v6 +; GCN-NEXT: v_sub_f32_e32 v0, v1, v5 ; GCN-NEXT: v_mul_f32_e32 v1, v8, v6 ; GCN-NEXT: v_mul_f32_e32 v7, v6, v3 ; GCN-NEXT: v_fma_f32 v3, -v6, v3, v9 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_f32_e32 v4, v4, v10 +; GCN-NEXT: v_fmac_f32_e32 v5, v0, v6 ; GCN-NEXT: v_fma_f32 v0, v2, s26, -v1 ; GCN-NEXT: v_fmac_f32_e32 v7, v3, v6 -; GCN-NEXT: v_mul_f32_e32 v3, v4, v6 -; GCN-NEXT: v_fma_f32 v4, v5, s0, 0x3ca3d70a ; GCN-NEXT: v_fmac_f32_e32 v1, v0, v6 ; GCN-NEXT: v_mul_f32_e32 v0, v2, v6 -; GCN-NEXT: v_mul_f32_e32 v2, v7, v4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_f32_e32 v4, v4, v10 +; GCN-NEXT: v_mul_f32_e32 v3, v4, v6 +; GCN-NEXT: v_fma_f32 v4, v5, s0, 0x3ca3d70a ; GCN-NEXT: v_mul_f32_e32 v1, v3, v1 +; GCN-NEXT: v_mul_f32_e32 v2, v7, v4 ; GCN-NEXT: v_fmac_f32_e32 v1, v2, v0 ; GCN-NEXT: v_max_f32_e32 v0, 0, v1 ; GCN-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll b/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll deleted file mode 100644 index 7e3270fa288ca..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll +++ /dev/null @@ -1,132 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s - - -; GCN-LABEL: {{^}}load_mip_1d: -; GCN-NOT: image_load_mip -; GCN: image_load -define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_mip_2d: -; GCN-NOT: image_load_mip -; GCN: image_load -define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_mip_3d: -; GCN-NOT: image_load_mip -; GCN: image_load -define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_mip_1darray: -; GCN-NOT: image_load_mip -; GCN: image_load -define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_mip_2darray: -; GCN-NOT: image_load_mip -; GCN: image_load -define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}load_mip_cube: -; GCN-NOT: image_load_mip -; GCN: image_load -define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret <4 x float> %v -} - - - -; GCN-LABEL: {{^}}store_mip_1d: -; GCN-NOT: image_store_mip -; GCN: image_store -define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { -main_body: - call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -; GCN-LABEL: {{^}}store_mip_2d: -; GCN-NOT: image_store_mip -; GCN: image_store -define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { -main_body: - call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -; GCN-LABEL: {{^}}store_mip_3d: -; GCN-NOT: image_store_mip -; GCN: image_store -define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { -main_body: - call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -; GCN-LABEL: {{^}}store_mip_1darray: -; GCN-NOT: image_store_mip -; GCN: image_store -define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { -main_body: - call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -; GCN-LABEL: {{^}}store_mip_2darray: -; GCN-NOT: image_store_mip -; GCN: image_store -define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { -main_body: - call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -; GCN-LABEL: {{^}}store_mip_cube: -; GCN-NOT: image_store_mip -; GCN: image_store -define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) { -main_body: - call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0) - ret void -} - -declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 - - -declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 -declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 - -attributes #0 = { nounwind } -attributes #1 = { nounwind readonly } - diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll deleted file mode 100644 index 330e2f4ce1496..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll +++ /dev/null @@ -1,113 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s - - -; GCN-LABEL: {{^}}sample_l_1d: -; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}sample_l_2d: -; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}sample_c_l_1d: -; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}sample_c_l_2d: -; GCN: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}sample_l_o_1d: -; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}sample_l_o_2d: -; GCN: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}sample_c_l_o_1d: -; GCN: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}sample_c_l_o_2d: -; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}gather4_l_2d: -; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}gather4_c_l_2d: -; GCN: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}gather4_l_o_2d: -; GCN: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -; GCN-LABEL: {{^}}gather4_c_l_o_2d: -; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}} -define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { -main_body: - %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - ret <4 x float> %v -} - -declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 - -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index 84cec8366259d..f212b46432ffb 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1235,6 +1235,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; SI: ; %bb.0: ; %.entry ; SI-NEXT: s_mov_b32 s4, 0 ; SI-NEXT: s_mov_b64 s[0:1], exec +; SI-NEXT: v_mov_b32_e32 v4, 0 ; SI-NEXT: v_mov_b32_e32 v2, v1 ; SI-NEXT: v_mov_b32_e32 v3, v1 ; SI-NEXT: s_mov_b32 s5, s4 @@ -1244,7 +1245,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; SI-NEXT: s_mov_b32 s9, s4 ; SI-NEXT: s_mov_b32 s10, s4 ; SI-NEXT: s_mov_b32 s11, s4 -; SI-NEXT: image_sample_lz v1, v[1:3], s[4:11], s[0:3] dmask:0x1 da +; SI-NEXT: image_sample_l v1, v[1:4], s[4:11], s[0:3] dmask:0x1 da ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 ; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -1274,6 +1275,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; ; GFX10-WAVE64-LABEL: cbranch_kill: ; GFX10-WAVE64: ; %bb.0: ; %.entry +; GFX10-WAVE64-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-WAVE64-NEXT: s_mov_b32 s4, 0 ; GFX10-WAVE64-NEXT: s_mov_b64 s[0:1], exec ; GFX10-WAVE64-NEXT: s_mov_b32 s5, s4 @@ -1283,7 +1285,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; GFX10-WAVE64-NEXT: s_mov_b32 s9, s4 ; GFX10-WAVE64-NEXT: s_mov_b32 s10, s4 ; GFX10-WAVE64-NEXT: s_mov_b32 s11, s4 -; GFX10-WAVE64-NEXT: image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10-WAVE64-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-WAVE64-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1 ; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -1313,6 +1315,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; ; GFX10-WAVE32-LABEL: cbranch_kill: ; GFX10-WAVE32: ; %bb.0: ; %.entry +; GFX10-WAVE32-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-WAVE32-NEXT: s_mov_b32 s4, 0 ; GFX10-WAVE32-NEXT: s_mov_b32 s0, exec_lo ; GFX10-WAVE32-NEXT: s_mov_b32 s5, s4 @@ -1322,7 +1325,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) { ; GFX10-WAVE32-NEXT: s_mov_b32 s9, s4 ; GFX10-WAVE32-NEXT: s_mov_b32 s10, s4 ; GFX10-WAVE32-NEXT: s_mov_b32 s11, s4 -; GFX10-WAVE32-NEXT: image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX10-WAVE32-NEXT: image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-WAVE32-NEXT: s_waitcnt vmcnt(0) ; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1 ; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo From f53d359816e66a107195e1e4b581e2a33bbafaa4 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 12:12:16 +0100 Subject: [PATCH 157/946] Fix 1f9e18b6565fd1bb69c4b649b9efd3467b3c7c7d Part 2 --- llvm/include/llvm/Demangle/Utility.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index 989b41701e4c9..587c0e4bec36d 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -60,7 +60,7 @@ class OutputBuffer { // Add negative sign... if (isNeg) *--TempPtr = '-'; - this->operator<<(StringView(TempPtr, Temp.end())); + this->operator<<(StringView(TempPtr, Temp.data() + Temp.size())); } public: From 9c5b856dac5c0ccbe755410b826f683ef01d7f08 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 12:20:23 +0100 Subject: [PATCH 158/946] [CoroSplit] Avoid pointer element type accesses Use isOpaqueOrPointeeTypeMatches() for the assertions instead. --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 2e4661fa1d0a1..b5129809c6a6a 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -618,7 +618,8 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, Value *CachedSlot = nullptr; auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * { if (CachedSlot) { - assert(CachedSlot->getType()->getPointerElementType() == ValueTy && + assert(cast(CachedSlot->getType()) + ->isOpaqueOrPointeeTypeMatches(ValueTy) && "multiple swifterror slots in function with different types"); return CachedSlot; } @@ -627,7 +628,8 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape, for (auto &Arg : F.args()) { if (Arg.isSwiftError()) { CachedSlot = &Arg; - assert(Arg.getType()->getPointerElementType() == ValueTy && + assert(cast(Arg.getType()) + ->isOpaqueOrPointeeTypeMatches(ValueTy) && "swifterror argument does not have expected type"); return &Arg; } From 0ca426d6ac65b84c70ac7fd9511628ce5115423e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 21 Jan 2022 11:22:36 +0000 Subject: [PATCH 159/946] [llvm-mca] Improve barriers for strict region marking (PR52198) As suggested on the bug, to help (but not completely....) stop folded instructions crossing the inline asm barriers used for llvm-mca analysis, we should recommend tagging with memory captures/attributes. Differential Revision: https://reviews.llvm.org/D117788 --- llvm/docs/CommandGuide/llvm-mca.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index b08f088762799..fdb45783ea663 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -299,9 +299,9 @@ C++. As a workaround, inline assembly directives may be used: .. code-block:: c++ int foo(int a, int b) { - __asm volatile("# LLVM-MCA-BEGIN foo"); + __asm volatile("# LLVM-MCA-BEGIN foo":::"memory"); a += 42; - __asm volatile("# LLVM-MCA-END"); + __asm volatile("# LLVM-MCA-END":::"memory"); a *= b; return a; } From bfbdb5e43e50484e179122bfb66cff8165e4b084 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 12:34:35 +0100 Subject: [PATCH 160/946] [Coroutines] Avoid some pointer element type accesses These are just verifying that pointer types are correct, which is no longer relevant under opaque pointers. --- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 1078aac257d72..a8123aee319ef 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -676,6 +676,9 @@ static void checkAsyncFuncPointer(const Instruction *I, Value *V) { if (!AsyncFuncPtrAddr) fail(I, "llvm.coro.id.async async function pointer not a global", V); + if (AsyncFuncPtrAddr->getType()->isOpaquePointerTy()) + return; + auto *StructTy = cast(AsyncFuncPtrAddr->getType()->getPointerElementType()); if (StructTy->isOpaque() || !StructTy->isPacked() || @@ -701,14 +704,16 @@ void CoroIdAsyncInst::checkWellFormed() const { static void checkAsyncContextProjectFunction(const Instruction *I, Function *F) { auto *FunTy = cast(F->getValueType()); - if (!FunTy->getReturnType()->isPointerTy() || - !FunTy->getReturnType()->getPointerElementType()->isIntegerTy(8)) + Type *Int8Ty = Type::getInt8Ty(F->getContext()); + auto *RetPtrTy = dyn_cast(FunTy->getReturnType()); + if (!RetPtrTy || !RetPtrTy->isOpaqueOrPointeeTypeMatches(Int8Ty)) fail(I, "llvm.coro.suspend.async resume function projection function must " "return an i8* type", F); if (FunTy->getNumParams() != 1 || !FunTy->getParamType(0)->isPointerTy() || - !FunTy->getParamType(0)->getPointerElementType()->isIntegerTy(8)) + !cast(FunTy->getParamType(0)) + ->isOpaqueOrPointeeTypeMatches(Int8Ty)) fail(I, "llvm.coro.suspend.async resume function projection function must " "take one i8* type as parameter", @@ -723,8 +728,7 @@ void CoroAsyncEndInst::checkWellFormed() const { auto *MustTailCallFunc = getMustTailCallFunction(); if (!MustTailCallFunc) return; - auto *FnTy = - cast(MustTailCallFunc->getType()->getPointerElementType()); + auto *FnTy = MustTailCallFunc->getFunctionType(); if (FnTy->getNumParams() != (arg_size() - 3)) fail(this, "llvm.coro.end.async must tail call function argument type must " From 597eae998a874a872b67d1a22a04d7c45d2ef94b Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 20 Jan 2022 17:22:09 +0100 Subject: [PATCH 161/946] [clangd][Background] Make index validation logs verbose These errors are non-harmful and should be transient. They either imply: - compilation database returned stale results for TUs and it'll be fixed once it's updated to match project state. - a TUs dependencies has changed and some headers no longer exist. this should be fixed with the next indexing cycle. In either case the user will have some stale symbols in their index until clangd restarts and the underlying issue is resolved. On the downside these logs are confusing users when there's another issue. Differential Revision: https://reviews.llvm.org/D117792 --- clang-tools-extra/clangd/index/Background.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index ddfe962d31890..a6ee1d980e049 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -82,7 +82,7 @@ llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) { auto Buf = FS->getBufferForFile(LS.AbsolutePath); if (!Buf) { - elog("Background-index: Couldn't read {0} to validate stored index: {1}", + vlog("Background-index: Couldn't read {0} to validate stored index: {1}", LS.AbsolutePath, Buf.getError().message()); // There is no point in indexing an unreadable file. return false; From b6a41fddcfd375ce30487ef87ca2cd65a6be0bcc Mon Sep 17 00:00:00 2001 From: OCHyams Date: Fri, 21 Jan 2022 10:54:53 +0000 Subject: [PATCH 162/946] [DWARF][DebugInfo] Fix off-by-one error in size of DW_TAG_base_type types Fix PR53163 by rounding the byte size of DW_TAG_base_type types up. Without this fix we risk emitting types with a truncated size (including rounding less-than-byte-sized types' sizes down to zero). Reviewed By: probinson Differential Revision: https://reviews.llvm.org/D117124 --- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 3 +- llvm/test/DebugInfo/X86/base-type-size.ll | 50 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 llvm/test/DebugInfo/X86/base-type-size.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 3ab73d128aed1..ab3c9f486670e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1581,7 +1581,8 @@ void DwarfCompileUnit::createBaseTypeDIEs() { Twine(dwarf::AttributeEncodingString(Btr.Encoding) + "_" + Twine(Btr.BitSize)).toStringRef(Str)); addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding); - addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8); + // Round up to smallest number of bytes that contains this number of bits. + addUInt(Die, dwarf::DW_AT_byte_size, None, divideCeil(Btr.BitSize, 8)); Btr.Die = &Die; } diff --git a/llvm/test/DebugInfo/X86/base-type-size.ll b/llvm/test/DebugInfo/X86/base-type-size.ll new file mode 100644 index 0000000000000..3a8dc37bdc65f --- /dev/null +++ b/llvm/test/DebugInfo/X86/base-type-size.ll @@ -0,0 +1,50 @@ +; RUN: llc %s --filetype=obj -o - | llvm-dwarfdump - -o - | FileCheck %s + +;; cat test.cpp +;; void ext(bool); +;; void fun(bool b) { ext(b); } +;; $ clang++ test.cpp -o - -emit-llvm -S -O2 -gdwarf-5 +;; +;; Check that the DW_TAG_base_type DIE for the 1u conversion in the DIExpression +;; has a non-zero DW_AT_byte_size attribute. + +; CHECK: DW_TAG_base_type +; CHECK-NEXT: DW_AT_name ("DW_ATE_unsigned_1") +; CHECK-NEXT: DW_AT_encoding (DW_ATE_unsigned) +; CHECK-NEXT: DW_AT_byte_size (0x01) + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @_Z3funb(i1 zeroext %b) local_unnamed_addr #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata i1 %b, metadata !12, metadata !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !13 + tail call void @_Z3extb(i1 zeroext %b), !dbg !14 + ret void, !dbg !15 +} + +declare !dbg !16 dso_local void @_Z3extb(i1 zeroext) local_unnamed_addr +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funb", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean) +!11 = !{!12} +!12 = !DILocalVariable(name: "b", arg: 1, scope: !7, file: !1, line: 2, type: !10) +!13 = !DILocation(line: 0, scope: !7) +!14 = !DILocation(line: 2, column: 20, scope: !7) +!15 = !DILocation(line: 2, column: 28, scope: !7) +!16 = !DISubprogram(name: "ext", linkageName: "_Z3extb", scope: !1, file: !1, line: 1, type: !8, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !17) +!17 = !{} From 357f2d9ccf204981fd90481ba5ebb942ea46d7f9 Mon Sep 17 00:00:00 2001 From: Siddharth Bhat Date: Fri, 21 Jan 2022 17:02:39 +0530 Subject: [PATCH 163/946] [mlir][LangRef] Add top-level production to the MLIR grammar The LangRef currently lacks a top-level production, leaving the productions attribute-alias-def and type-alias-defunused. Clarify the situation by declaring what is to be parsed by an MLIR parser at the toplevel. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D117668 --- mlir/docs/LangRef.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md index 357778a54f087..4b956c581cb55 100644 --- a/mlir/docs/LangRef.md +++ b/mlir/docs/LangRef.md @@ -179,6 +179,19 @@ string-literal ::= `"` [^"\n\f\v\r]* `"` TODO: define escaping rules Not listed here, but MLIR does support comments. They use standard BCPL syntax, starting with a `//` and going until the end of the line. + +### Top level Productions + +``` +// Top level production +toplevel := (operation | attribute-alias-def | type-alias-def)* +``` + +The production `toplevel` is the top level production that is parsed by any parsing +consuming the MLIR syntax. [Operations](#operations), +[Attribute alises](#attribute-value-aliases), and [Type aliases](#type-aliases) +can be declared on the toplevel. + ### Identifiers and keywords Syntax: From 4d268dc94a6bef0221c94d4b7e4c2b112e75fe1b Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 19 Jan 2022 17:49:33 +0000 Subject: [PATCH 164/946] [RISCV] Enable CGP to sink splat operands of VP intrinsics This patch brings better splat-matching to our VP support, by sinking splat operands of VP intrinsics back into the same block as the VP operation. The list of VP intrinsics we are interested in matches that of the regular instructions. Some optimization is still lacking. For instance, our VL nodes aren't recognized as commutative, so splats must be on the RHS. Because of this, we limit our sinking of splats to just the RHS operand for now. Improvement in this regard can come in another patch. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117703 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 24 + .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 771 ++++++++++++++++++ 2 files changed, 795 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 507a21b16e4eb..4aba42b014d17 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1260,6 +1260,30 @@ bool RISCVTargetLowering::shouldSinkOperands( switch (II->getIntrinsicID()) { case Intrinsic::fma: return Operand == 0 || Operand == 1; + // FIXME: Our patterns can only match vx/vf instructions when the splat + // it on the RHS, because TableGen doesn't recognize our VP operations + // as commutative. + case Intrinsic::vp_add: + case Intrinsic::vp_mul: + case Intrinsic::vp_and: + case Intrinsic::vp_or: + case Intrinsic::vp_xor: + case Intrinsic::vp_fadd: + case Intrinsic::vp_fsub: + case Intrinsic::vp_fmul: + case Intrinsic::vp_fdiv: + case Intrinsic::vp_shl: + case Intrinsic::vp_lshr: + case Intrinsic::vp_ashr: + case Intrinsic::vp_udiv: + case Intrinsic::vp_sdiv: + case Intrinsic::vp_urem: + case Intrinsic::vp_srem: + return Operand == 1; + // ... the one exception is vp.sub which has explicit patterns for both + // LHS and RHS (as vrsub). + case Intrinsic::vp_sub: + return Operand == 0 || Operand == 1; default: return false; } diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 2acb185d59ead..f6a348418f5b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -2961,3 +2961,774 @@ for.body: ; preds = %for.body.preheader, %cmp.not = icmp eq i64 %indvars.iv.next, 1024 br i1 %cmp.not, label %for.cond.cleanup, label %for.body } + +declare <4 x i32> @llvm.vp.mul.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_mul(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_mul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB46_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB46_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_add(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB47_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vadd.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB47_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +; FIXME: This doesn't match against vadd.vx because our patterns aren't +; commutative. + +define void @sink_splat_vp_add_commute(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_add_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB48_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB48_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_sub(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_sub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB49_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB49_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +define void @sink_splat_vp_rsub(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_rsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB50_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vrsub.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB50_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.shl.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_shl(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_shl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB51_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vsll.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB51_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_lshr(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_lshr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB52_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB52_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_ashr(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_ashr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB53_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vsra.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB53_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @sink_splat_vp_fmul(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_fmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB54_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB54_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float, float* %a, i64 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %2 = call <4 x float> @llvm.vp.fmul.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast float* %0 to <4 x float>* + store <4 x float> %2, <4 x float>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @sink_splat_vp_fdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_fdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB55_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB55_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float, float* %a, i64 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %2 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast float* %0 to <4 x float>* + store <4 x float> %2, <4 x float>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +; FIXME: vfrdiv.vf doesn't match against masked instructions + +define void @sink_splat_vp_frdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_frdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB56_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vfdiv.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB56_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float, float* %a, i64 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %2 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl) + %3 = bitcast float* %0 to <4 x float>* + store <4 x float> %2, <4 x float>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @sink_splat_vp_fadd(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_fadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB57_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB57_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float, float* %a, i64 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %2 = call <4 x float> @llvm.vp.fadd.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast float* %0 to <4 x float>* + store <4 x float> %2, <4 x float>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @sink_splat_vp_fsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_fsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB58_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB58_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float, float* %a, i64 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %2 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast float* %0 to <4 x float>* + store <4 x float> %2, <4 x float>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) + +; FIXME: vfrsub.vf doesn't match against masked instructions + +define void @sink_splat_vp_frsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_frsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.w.x ft0, a1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v8, ft0 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB59_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vfsub.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB59_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0 + %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds float, float* %a, i64 %index + %1 = bitcast float* %0 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %2 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl) + %3 = bitcast float* %0 to <4 x float>* + store <4 x float> %2, <4 x float>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_udiv(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_udiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB60_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vdivu.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB60_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_sdiv(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_sdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB61_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vdiv.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB61_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.urem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_urem(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_urem: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB62_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vremu.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB62_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +declare <4 x i32> @llvm.vp.srem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define void @sink_splat_vp_srem(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_srem: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: .LBB63_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vrem.vx v8, v8, a1, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: addi a3, a3, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a3, .LBB63_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} + +; Check that we don't sink a splat operand that has no chance of being folded. + +define void @sink_splat_vp_srem_commute(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: sink_splat_vp_srem_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: .LBB64_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vrem.vv v9, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: bnez a1, .LBB64_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0 + %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] + %0 = getelementptr inbounds i32, i32* %a, i64 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %2 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl) + %3 = bitcast i32* %0 to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret void +} From 825a3cd6b6972b6a50b80bed7d951d7ea7f90669 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 20 Jan 2022 14:11:15 +0100 Subject: [PATCH 165/946] [clangd] Fail inlayHints requests on content changes This should improve the overall UX by making the labels less jumpy. Differential Revision: https://reviews.llvm.org/D117776 --- clang-tools-extra/clangd/ClangdServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 0760ed5317be8..a7210e0526a41 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -775,7 +775,7 @@ void ClangdServer::inlayHints(PathRef File, llvm::Optional RestrictRange, return CB(InpAST.takeError()); CB(clangd::inlayHints(InpAST->AST, std::move(RestrictRange))); }; - WorkScheduler->runWithAST("InlayHints", File, std::move(Action)); + WorkScheduler->runWithAST("InlayHints", File, std::move(Action), Transient); } void ClangdServer::onFileEvent(const DidChangeWatchedFilesParams &Params) { From 4727d29d908f9dd608dd97a58c0af1ad579fd3ca Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 21 Jan 2022 11:55:40 +0000 Subject: [PATCH 166/946] [X86] Remove __builtin_ia32_pabs intrinsics and use generic __builtin_elementwise_abs D111986 added the generic `__builtin_elementwise_abs()` intrinsic with the same integer absolute behaviour as the SSE/AVX instructions (abs(INT_MIN) == INT_MIN) This patch removes the `__builtin_ia32_pabs*` intrinsics and just uses `__builtin_elementwise_abs` - the existing tests see no changes: ``` __m256i test_mm256_abs_epi8(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi8 // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) return _mm256_abs_epi8(a); } ``` This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`). Differential Revision: https://reviews.llvm.org/D117791 --- clang/include/clang/Basic/BuiltinsX86.def | 12 ------------ clang/lib/CodeGen/CGBuiltin.cpp | 15 --------------- clang/lib/Headers/avx2intrin.h | 6 +++--- clang/lib/Headers/avx512bwintrin.h | 4 ++-- clang/lib/Headers/avx512fintrin.h | 8 ++++++-- clang/lib/Headers/avx512vlintrin.h | 4 ++-- clang/lib/Headers/tmmintrin.h | 6 +++--- clang/test/CodeGen/builtins-x86.c | 3 --- 8 files changed, 16 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index bc6208be45606..9b7c763b0c6c7 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -296,9 +296,6 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse") TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse") @@ -558,9 +555,6 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx") // AVX2 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2") @@ -927,8 +921,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") @@ -1045,8 +1037,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") @@ -1198,8 +1188,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a49c035002786..49f054ec1a982 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14285,21 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops[0]); } } - case X86::BI__builtin_ia32_pabsb128: - case X86::BI__builtin_ia32_pabsw128: - case X86::BI__builtin_ia32_pabsd128: - case X86::BI__builtin_ia32_pabsb256: - case X86::BI__builtin_ia32_pabsw256: - case X86::BI__builtin_ia32_pabsd256: - case X86::BI__builtin_ia32_pabsq128: - case X86::BI__builtin_ia32_pabsq256: - case X86::BI__builtin_ia32_pabsb512: - case X86::BI__builtin_ia32_pabsw512: - case X86::BI__builtin_ia32_pabsd512: - case X86::BI__builtin_ia32_pabsq512: { - Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); - } case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 5064c87c2bb19..c9ad74ce3fa42 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -26,19 +26,19 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { - return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); + return (__m256i)__builtin_elementwise_abs((__v32qs)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { - return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); + return (__m256i)__builtin_elementwise_abs((__v16hi)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { - return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); + return (__m256i)__builtin_elementwise_abs((__v8si)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 6aee8aed84871..53319eb23011d 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi8 (__m512i __A) { - return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); + return (__m512i)__builtin_elementwise_abs((__v64qs)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi16 (__m512i __A) { - return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); + return (__m512i)__builtin_elementwise_abs((__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index df298640523b7..9b02a7cffc64d 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -26,6 +26,10 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64))); typedef unsigned long long __v8du __attribute__((__vector_size__(64))); typedef unsigned int __v16su __attribute__((__vector_size__(64))); +/* We need an explicitly signed variant for char. Note that this shouldn't + * appear in the interface though. */ +typedef signed char __v64qs __attribute__((__vector_size__(64))); + typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); @@ -1846,7 +1850,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A) { - return (__m512i)__builtin_ia32_pabsq512((__v8di)__A); + return (__m512i)__builtin_elementwise_abs((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1868,7 +1872,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A) { - return (__m512i)__builtin_ia32_pabsd512((__v16si) __A); + return (__m512i)__builtin_elementwise_abs((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 0519dba59081a..eddb99902e3d5 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { - return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); + return (__m128i)__builtin_elementwise_abs((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { - return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); + return (__m256i)__builtin_elementwise_abs((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index bcffa8187801c..cb9be2349de5a 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a) { - return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); + return (__m128i)__builtin_elementwise_abs((__v16qs)__a); } /// Computes the absolute value of each of the packed 16-bit signed @@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a) { - return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); + return (__m128i)__builtin_elementwise_abs((__v8hi)__a); } /// Computes the absolute value of each of the packed 32-bit signed @@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a) { - return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); + return (__m128i)__builtin_elementwise_abs((__v4si)__a); } /// Concatenates the two 128-bit integer vector operands, and diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 61b9d53c74f9d..bfcd30072fc1f 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -259,11 +259,8 @@ void f0() { tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s); tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i); tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i); - tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c); tmp_V8c = __builtin_ia32_pabsb(tmp_V8c); - tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s); tmp_V4s = __builtin_ia32_pabsw(tmp_V4s); - tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i); tmp_V2i = __builtin_ia32_pabsd(tmp_V2i); tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi); tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi); From ced077e1ba52ec2937aab538e9b6fa5149f8c567 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 21 Jan 2022 10:54:27 +0100 Subject: [PATCH 167/946] [clang][deps] NFC: Simplify handling of cached FS errors The return types of some `CachedFileSystemEntry` member function are needlessly complex. This patch attempts to simplify the code by unwrapping cached entries that represent errors early, and then asserting `!isError()`. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D115935 --- .../DependencyScanningFilesystem.h | 61 ++++++++++++------- .../DependencyScanningFilesystem.cpp | 13 ++-- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 1358950b437c8..08a60fe780f50 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -57,25 +57,26 @@ class CachedFileSystemEntry { return !MaybeStat || MaybeStat->isStatusKnown(); } + /// \returns True if the entry is a filesystem error. + bool isError() const { return !MaybeStat; } + /// \returns True if the current entry points to a directory. - bool isDirectory() const { return MaybeStat && MaybeStat->isDirectory(); } + bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } - /// \returns The error or the file's original contents. - llvm::ErrorOr getOriginalContents() const { - if (!MaybeStat) - return MaybeStat.getError(); - assert(!MaybeStat->isDirectory() && "not a file"); + /// \returns Original contents of the file. + StringRef getOriginalContents() const { assert(isInitialized() && "not initialized"); + assert(!isError() && "error"); + assert(!MaybeStat->isDirectory() && "not a file"); assert(OriginalContents && "not read"); return OriginalContents->getBuffer(); } - /// \returns The error or the file's minimized contents. - llvm::ErrorOr getMinimizedContents() const { - if (!MaybeStat) - return MaybeStat.getError(); - assert(!MaybeStat->isDirectory() && "not a file"); + /// \returns Minimized contents of the file. + StringRef getMinimizedContents() const { assert(isInitialized() && "not initialized"); + assert(!isError() && "error"); + assert(!isDirectory() && "not a file"); llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load(); assert(Buffer && "not minimized"); return Buffer->getBuffer(); @@ -94,21 +95,31 @@ class CachedFileSystemEntry { return ShouldBeMinimized && !MinimizedContentsAccess.load(); } - /// \returns The error or the status of the entry. - llvm::ErrorOr getStatus() const { + /// \returns The error. + std::error_code getError() const { + assert(isInitialized() && "not initialized"); + return MaybeStat.getError(); + } + + /// \returns The entry status. + llvm::vfs::Status getStatus() const { assert(isInitialized() && "not initialized"); - return MaybeStat; + assert(!isError() && "error"); + return *MaybeStat; } /// \returns the name of the file. StringRef getName() const { assert(isInitialized() && "not initialized"); + assert(!isError() && "error"); return MaybeStat->getName(); } /// Return the mapping between location -> distance that is used to speed up /// the block skipping in the preprocessor. const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const { + assert(!isError() && "error"); + assert(!isDirectory() && "not a file"); return PPSkippedRangeMapping; } @@ -183,19 +194,25 @@ class EntryRef { EntryRef(bool Minimized, const CachedFileSystemEntry &Entry) : Minimized(Minimized), Entry(Entry) {} - llvm::ErrorOr getStatus() const { - auto MaybeStat = Entry.getStatus(); - if (!MaybeStat || MaybeStat->isDirectory()) - return MaybeStat; - return llvm::vfs::Status::copyWithNewSize(*MaybeStat, - getContents()->size()); + llvm::vfs::Status getStatus() const { + llvm::vfs::Status Stat = Entry.getStatus(); + if (Stat.isDirectory()) + return Stat; + return llvm::vfs::Status::copyWithNewSize(Stat, getContents().size()); } + bool isError() const { return Entry.isError(); } bool isDirectory() const { return Entry.isDirectory(); } - StringRef getName() const { return Entry.getName(); } - llvm::ErrorOr getContents() const { + /// If the cached entry represents an error, promotes it into `ErrorOr`. + llvm::ErrorOr unwrapError() const { + if (isError()) + return Entry.getError(); + return *this; + } + + StringRef getContents() const { return Minimized ? Entry.getMinimizedContents() : Entry.getOriginalContents(); } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index acceec690c11e..6b8c692335bd6 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -164,7 +164,7 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( const auto *Entry = LocalCache.getCachedEntry(Filename); if (Entry && !Entry->needsUpdate(ShouldBeMinimized)) - return EntryRef(ShouldBeMinimized, *Entry); + return EntryRef(ShouldBeMinimized, *Entry).unwrapError(); // FIXME: Handle PCM/PCH files. // FIXME: Handle module map files. @@ -194,7 +194,7 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( // Store the result in the local cache. Entry = &SharedCacheEntry.Value; - return EntryRef(ShouldBeMinimized, *Entry); + return EntryRef(ShouldBeMinimized, *Entry).unwrapError(); } llvm::ErrorOr @@ -241,16 +241,15 @@ class MinimizedVFSFile final : public llvm::vfs::File { llvm::ErrorOr> MinimizedVFSFile::create( EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) { + assert(!Entry.isError() && "error"); + if (Entry.isDirectory()) return std::make_error_code(std::errc::is_a_directory); - llvm::ErrorOr Contents = Entry.getContents(); - if (!Contents) - return Contents.getError(); auto Result = std::make_unique( - llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(), + llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getName(), /*RequiresNullTerminator=*/false), - *Entry.getStatus()); + Entry.getStatus()); const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping(); if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings) From 5daeada33051aa85777593d3f69eb29f26e7fb2f Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 21 Jan 2022 10:55:34 +0100 Subject: [PATCH 168/946] [clang][deps] Ensure filesystem cache consistency The minimizing filesystem used by the dependency scanner isn't great when it comes to the consistency of its caches. There are two problems that can be exposed by a filesystem that changes during dependency scan: 1. In-memory cache entries for original and minimized files are distinct, populated at different times using separate stat/open syscalls. This means that when a file is read with minimization disabled, its contents might be inconsistent when the same file is read with minimization enabled at later point (and vice versa). 2. In-memory cache entries are indexed by filename. This is problematic for symlinks, where the contents of the symlink might be inconsistent with contents of the original file (for the same reason as in problem 1). This patch ensures consistency by always stating/reading a file exactly once. The original contents are always cached and minimized contents are derived from that on demand. The cache entries are now indexed by their `UniqueID` ensuring consistency for symlinks too. Moreover, the stat/read syscalls are now issued outside of critical section. Depends on D115935. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D114966 --- .../DependencyScanningFilesystem.h | 302 +++++++++++++----- .../DependencyScanningFilesystem.cpp | 224 +++++++++---- .../Tooling/DependencyScannerTest.cpp | 4 + 3 files changed, 379 insertions(+), 151 deletions(-) diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 08a60fe780f50..70e9c4a3ffea2 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -22,6 +22,26 @@ namespace clang { namespace tooling { namespace dependencies { +/// Original and minimized contents of a cached file entry. Single instance can +/// be shared between multiple entries. +struct CachedFileContents { + CachedFileContents(std::unique_ptr Original) + : Original(std::move(Original)), MinimizedAccess(nullptr) {} + + /// Owning storage for the minimized contents. + std::unique_ptr Original; + + /// The mutex that must be locked before mutating minimized contents. + std::mutex ValueLock; + /// Owning storage for the minimized contents. + std::unique_ptr MinimizedStorage; + /// Accessor to the minimized contents that's atomic to avoid data races. + std::atomic MinimizedAccess; + /// Skipped range mapping of the minimized contents. + /// This is initialized iff `MinimizedAccess != nullptr`. + PreprocessorSkippedRangeMapping PPSkippedRangeMapping; +}; + /// An in-memory representation of a file system entity that is of interest to /// the dependency scanning filesystem. /// @@ -29,111 +49,99 @@ namespace dependencies { /// - opened file with original contents and a stat value, /// - opened file with original contents, minimized contents and a stat value, /// - directory entry with its stat value, -/// - filesystem error, -/// - uninitialized entry with unknown status. +/// - filesystem error. +/// +/// Single instance of this class can be shared across different filenames (e.g. +/// a regular file and a symlink). For this reason the status filename is empty +/// and is only materialized by \c EntryRef that knows the requested filename. class CachedFileSystemEntry { public: - /// Creates an uninitialized entry. - CachedFileSystemEntry() - : MaybeStat(llvm::vfs::Status()), MinimizedContentsAccess(nullptr) {} - - /// Initialize the cached file system entry. - void init(llvm::ErrorOr &&MaybeStatus, StringRef Filename, - llvm::vfs::FileSystem &FS); + /// Creates an entry without contents: either a filesystem error or + /// a directory with stat value. + CachedFileSystemEntry(llvm::ErrorOr Stat) + : MaybeStat(std::move(Stat)), Contents(nullptr) { + clearStatName(); + } - /// Initialize the entry as file with minimized or original contents. - /// - /// The filesystem opens the file even for `stat` calls open to avoid the - /// issues with stat + open of minimized files that might lead to a - /// mismatching size of the file. - llvm::ErrorOr initFile(StringRef Filename, - llvm::vfs::FileSystem &FS); - - /// Minimize contents of the file. - void minimizeFile(); - - /// \returns True if the entry is initialized. - bool isInitialized() const { - return !MaybeStat || MaybeStat->isStatusKnown(); + /// Creates an entry representing a file with contents. + CachedFileSystemEntry(llvm::ErrorOr Stat, + CachedFileContents *Contents) + : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) { + clearStatName(); } /// \returns True if the entry is a filesystem error. bool isError() const { return !MaybeStat; } - /// \returns True if the current entry points to a directory. + /// \returns True if the current entry represents a directory. bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } /// \returns Original contents of the file. StringRef getOriginalContents() const { - assert(isInitialized() && "not initialized"); assert(!isError() && "error"); assert(!MaybeStat->isDirectory() && "not a file"); - assert(OriginalContents && "not read"); - return OriginalContents->getBuffer(); + assert(Contents && "contents not initialized"); + return Contents->Original->getBuffer(); } /// \returns Minimized contents of the file. StringRef getMinimizedContents() const { - assert(isInitialized() && "not initialized"); assert(!isError() && "error"); - assert(!isDirectory() && "not a file"); - llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load(); + assert(!MaybeStat->isDirectory() && "not a file"); + assert(Contents && "contents not initialized"); + llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load(); assert(Buffer && "not minimized"); return Buffer->getBuffer(); } - /// \returns True if this entry represents a file that can be read. - bool isReadable() const { return MaybeStat && !MaybeStat->isDirectory(); } - - /// \returns True if this cached entry needs to be updated. - bool needsUpdate(bool ShouldBeMinimized) const { - return isReadable() && needsMinimization(ShouldBeMinimized); - } - - /// \returns True if the contents of this entry need to be minimized. - bool needsMinimization(bool ShouldBeMinimized) const { - return ShouldBeMinimized && !MinimizedContentsAccess.load(); - } - /// \returns The error. - std::error_code getError() const { - assert(isInitialized() && "not initialized"); - return MaybeStat.getError(); - } + std::error_code getError() const { return MaybeStat.getError(); } - /// \returns The entry status. + /// \returns The entry status with empty filename. llvm::vfs::Status getStatus() const { - assert(isInitialized() && "not initialized"); assert(!isError() && "error"); + assert(MaybeStat->getName().empty() && "stat name must be empty"); return *MaybeStat; } - /// \returns the name of the file. - StringRef getName() const { - assert(isInitialized() && "not initialized"); + /// \returns The unique ID of the entry. + llvm::sys::fs::UniqueID getUniqueID() const { assert(!isError() && "error"); - return MaybeStat->getName(); + return MaybeStat->getUniqueID(); } - /// Return the mapping between location -> distance that is used to speed up + /// \returns The mapping between location -> distance that is used to speed up /// the block skipping in the preprocessor. const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const { assert(!isError() && "error"); assert(!isDirectory() && "not a file"); - return PPSkippedRangeMapping; + assert(Contents && "contents not initialized"); + return Contents->PPSkippedRangeMapping; + } + + /// \returns The data structure holding both original and minimized contents. + CachedFileContents *getContents() const { + assert(!isError() && "error"); + assert(!isDirectory() && "not a file"); + return Contents; } private: - llvm::ErrorOr MaybeStat; - std::unique_ptr OriginalContents; + void clearStatName() { + if (MaybeStat) + MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, ""); + } - /// Owning storage for the minimized file contents. - std::unique_ptr MinimizedContentsStorage; - /// Atomic view of the minimized file contents. - /// This prevents data races when multiple threads call `needsMinimization`. - std::atomic MinimizedContentsAccess; + /// Either the filesystem error or status of the entry. + /// The filename is empty and only materialized by \c EntryRef. + llvm::ErrorOr MaybeStat; - PreprocessorSkippedRangeMapping PPSkippedRangeMapping; + /// Non-owning pointer to the file contents. + /// + /// We're using pointer here to keep the size of this class small. Instances + /// representing directories and filesystem errors don't hold any contents + /// anyway. + CachedFileContents *Contents; }; /// This class is a shared cache, that caches the 'stat' and 'open' calls to the @@ -144,24 +152,59 @@ class CachedFileSystemEntry { /// the worker threads. class DependencyScanningFilesystemSharedCache { public: - struct SharedFileSystemEntry { - std::mutex ValueLock; - CachedFileSystemEntry Value; + struct CacheShard { + /// The mutex that needs to be locked before mutation of any member. + mutable std::mutex CacheLock; + + /// Map from filenames to cached entries. + llvm::StringMap + EntriesByFilename; + + /// Map from unique IDs to cached entries. + llvm::DenseMap + EntriesByUID; + + /// The backing storage for cached entries. + llvm::SpecificBumpPtrAllocator EntryStorage; + + /// The backing storage for cached contents. + llvm::SpecificBumpPtrAllocator ContentsStorage; + + /// Returns entry associated with the filename or nullptr if none is found. + const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; + + /// Returns entry associated with the unique ID or nullptr if none is found. + const CachedFileSystemEntry * + findEntryByUID(llvm::sys::fs::UniqueID UID) const; + + /// Returns entry associated with the filename if there is some. Otherwise, + /// constructs new one with the given status, associates it with the + /// filename and returns the result. + const CachedFileSystemEntry & + getOrEmplaceEntryForFilename(StringRef Filename, + llvm::ErrorOr Stat); + + /// Returns entry associated with the unique ID if there is some. Otherwise, + /// constructs new one with the given status and contents, associates it + /// with the unique ID and returns the result. + const CachedFileSystemEntry & + getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, + std::unique_ptr Contents); + + /// Returns entry associated with the filename if there is some. Otherwise, + /// associates the given entry with the filename and returns it. + const CachedFileSystemEntry & + getOrInsertEntryForFilename(StringRef Filename, + const CachedFileSystemEntry &Entry); }; DependencyScanningFilesystemSharedCache(); - /// Returns a cache entry for the corresponding key. - /// - /// A new cache entry is created if the key is not in the cache. This is a - /// thread safe call. - SharedFileSystemEntry &get(StringRef Key); + /// Returns shard for the given key. + CacheShard &getShardForFilename(StringRef Filename) const; + CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const; private: - struct CacheShard { - std::mutex CacheLock; - llvm::StringMap Cache; - }; std::unique_ptr CacheShards; unsigned NumShards; }; @@ -173,8 +216,20 @@ class DependencyScanningFilesystemLocalCache { llvm::StringMap Cache; public: - const CachedFileSystemEntry *getCachedEntry(StringRef Filename) { - return Cache[Filename]; + /// Returns entry associated with the filename or nullptr if none is found. + const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { + auto It = Cache.find(Filename); + return It == Cache.end() ? nullptr : It->getValue(); + } + + /// Associates the given entry with the filename and returns the given entry + /// pointer (for convenience). + const CachedFileSystemEntry & + insertEntryForFilename(StringRef Filename, + const CachedFileSystemEntry &Entry) { + const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second; + assert(InsertedEntry == &Entry && "entry already present"); + return *InsertedEntry; } }; @@ -187,23 +242,25 @@ class EntryRef { /// are minimized. bool Minimized; + /// The filename used to access this entry. + std::string Filename; + /// The underlying cached entry. const CachedFileSystemEntry &Entry; public: - EntryRef(bool Minimized, const CachedFileSystemEntry &Entry) - : Minimized(Minimized), Entry(Entry) {} + EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry) + : Minimized(Minimized), Filename(Name), Entry(Entry) {} llvm::vfs::Status getStatus() const { llvm::vfs::Status Stat = Entry.getStatus(); - if (Stat.isDirectory()) - return Stat; - return llvm::vfs::Status::copyWithNewSize(Stat, getContents().size()); + if (!Stat.isDirectory()) + Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size()); + return llvm::vfs::Status::copyWithNewName(Stat, Filename); } bool isError() const { return Entry.isError(); } bool isDirectory() const { return Entry.isDirectory(); } - StringRef getName() const { return Entry.getName(); } /// If the cached entry represents an error, promotes it into `ErrorOr`. llvm::ErrorOr unwrapError() const { @@ -253,8 +310,87 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// Check whether the file should be minimized. bool shouldMinimize(StringRef Filename); + /// Returns entry for the given filename. + /// + /// Attempts to use the local and shared caches first, then falls back to + /// using the underlying filesystem. llvm::ErrorOr getOrCreateFileSystemEntry(StringRef Filename); + /// For a filename that's not yet associated with any entry in the caches, + /// uses the underlying filesystem to either look up the entry based in the + /// shared cache indexed by unique ID, or creates new entry from scratch. + llvm::ErrorOr + computeAndStoreResult(StringRef Filename); + + /// Minimizes the given entry if necessary and returns a wrapper object with + /// reference semantics. + EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry, + StringRef Filename); + + /// Represents a filesystem entry that has been stat-ed (and potentially read) + /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. + struct TentativeEntry { + llvm::vfs::Status Status; + std::unique_ptr Contents; + + TentativeEntry(llvm::vfs::Status Status, + std::unique_ptr Contents = nullptr) + : Status(std::move(Status)), Contents(std::move(Contents)) {} + }; + + /// Reads file at the given path. Enforces consistency between the file size + /// in status and size of read contents. + llvm::ErrorOr readFile(StringRef Filename); + + /// Returns entry associated with the unique ID of the given tentative entry + /// if there is some in the shared cache. Otherwise, constructs new one, + /// associates it with the unique ID and returns the result. + const CachedFileSystemEntry & + getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); + + /// Returns entry associated with the filename or nullptr if none is found. + /// + /// Returns entry from local cache if there is some. Otherwise, if the entry + /// is found in the shared cache, writes it through the local cache and + /// returns it. Otherwise returns nullptr. + const CachedFileSystemEntry * + findEntryByFilenameWithWriteThrough(StringRef Filename); + + /// Returns entry associated with the unique ID in the shared cache or nullptr + /// if none is found. + const CachedFileSystemEntry * + findSharedEntryByUID(llvm::vfs::Status Stat) const { + return SharedCache.getShardForUID(Stat.getUniqueID()) + .findEntryByUID(Stat.getUniqueID()); + } + + /// Associates the given entry with the filename in the local cache and + /// returns it. + const CachedFileSystemEntry & + insertLocalEntryForFilename(StringRef Filename, + const CachedFileSystemEntry &Entry) { + return LocalCache.insertEntryForFilename(Filename, Entry); + } + + /// Returns entry associated with the filename in the shared cache if there is + /// some. Otherwise, constructs new one with the given error code, associates + /// it with the filename and returns the result. + const CachedFileSystemEntry & + getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) { + return SharedCache.getShardForFilename(Filename) + .getOrEmplaceEntryForFilename(Filename, EC); + } + + /// Returns entry associated with the filename in the shared cache if there is + /// some. Otherwise, associates the given entry with the filename and returns + /// it. + const CachedFileSystemEntry & + getOrInsertSharedEntryForFilename(StringRef Filename, + const CachedFileSystemEntry &Entry) { + return SharedCache.getShardForFilename(Filename) + .getOrInsertEntryForFilename(Filename, Entry); + } + /// The global cache shared between worker threads. DependencyScanningFilesystemSharedCache &SharedCache; /// The local cache is used by the worker thread to cache file system queries diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 6b8c692335bd6..cc8968a7b680f 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -16,10 +16,10 @@ using namespace clang; using namespace tooling; using namespace dependencies; -llvm::ErrorOr -CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) { +llvm::ErrorOr +DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { // Load the file and its content from the file system. - auto MaybeFile = FS.openFileForRead(Filename); + auto MaybeFile = getUnderlyingFS().openFileForRead(Filename); if (!MaybeFile) return MaybeFile.getError(); auto File = std::move(*MaybeFile); @@ -34,24 +34,42 @@ CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) { return MaybeBuffer.getError(); auto Buffer = std::move(*MaybeBuffer); - OriginalContents = std::move(Buffer); - return Stat; + // If the file size changed between read and stat, pretend it didn't. + if (Stat.getSize() != Buffer->getBufferSize()) + Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize()); + + return TentativeEntry(Stat, std::move(Buffer)); } -void CachedFileSystemEntry::minimizeFile() { - assert(OriginalContents && "minimizing missing contents"); +EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary( + const CachedFileSystemEntry &Entry, StringRef Filename) { + if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename)) + return EntryRef(/*Minimized=*/false, Filename, Entry); + + CachedFileContents *Contents = Entry.getContents(); + assert(Contents && "contents not initialized"); + + // Double-checked locking. + if (Contents->MinimizedAccess.load()) + return EntryRef(/*Minimized=*/true, Filename, Entry); + + std::lock_guard GuardLock(Contents->ValueLock); + + // Double-checked locking. + if (Contents->MinimizedAccess.load()) + return EntryRef(/*Minimized=*/true, Filename, Entry); llvm::SmallString<1024> MinimizedFileContents; // Minimize the file down to directives that might affect the dependencies. SmallVector Tokens; - if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(), + if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(), MinimizedFileContents, Tokens)) { // FIXME: Propagate the diagnostic if desired by the client. // Use the original file if the minimization failed. - MinimizedContentsStorage = - llvm::MemoryBuffer::getMemBuffer(*OriginalContents); - MinimizedContentsAccess.store(MinimizedContentsStorage.get()); - return; + Contents->MinimizedStorage = + llvm::MemoryBuffer::getMemBuffer(*Contents->Original); + Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); + return EntryRef(/*Minimized=*/true, Filename, Entry); } // The contents produced by the minimizer must be null terminated. @@ -74,16 +92,17 @@ void CachedFileSystemEntry::minimizeFile() { } Mapping[Range.Offset] = Range.Length; } - PPSkippedRangeMapping = std::move(Mapping); + Contents->PPSkippedRangeMapping = std::move(Mapping); - MinimizedContentsStorage = std::make_unique( + Contents->MinimizedStorage = std::make_unique( std::move(MinimizedFileContents)); - // The algorithm in `getOrCreateFileSystemEntry` uses the presence of - // minimized contents to decide whether an entry is up-to-date or not. - // If it is up-to-date, the skipped range mappings must be already computed. - // This is why we need to store the minimized contents **after** storing the - // skipped range mappings. Failing to do so would lead to a data race. - MinimizedContentsAccess.store(MinimizedContentsStorage.get()); + // This function performed double-checked locking using `MinimizedAccess`. + // Assigning it must be the last thing this function does. If we were to + // assign it before `PPSkippedRangeMapping`, other threads may skip the + // critical section (`MinimizedAccess != nullptr`) and access the mappings + // that are about to be initialized, leading to a data race. + Contents->MinimizedAccess.store(Contents->MinimizedStorage.get()); + return EntryRef(/*Minimized=*/true, Filename, Entry); } DependencyScanningFilesystemSharedCache:: @@ -98,12 +117,70 @@ DependencyScanningFilesystemSharedCache:: CacheShards = std::make_unique(NumShards); } -DependencyScanningFilesystemSharedCache::SharedFileSystemEntry & -DependencyScanningFilesystemSharedCache::get(StringRef Key) { - CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards]; - std::lock_guard LockGuard(Shard.CacheLock); - auto It = Shard.Cache.try_emplace(Key); - return It.first->getValue(); +DependencyScanningFilesystemSharedCache::CacheShard & +DependencyScanningFilesystemSharedCache::getShardForFilename( + StringRef Filename) const { + return CacheShards[llvm::hash_value(Filename) % NumShards]; +} + +DependencyScanningFilesystemSharedCache::CacheShard & +DependencyScanningFilesystemSharedCache::getShardForUID( + llvm::sys::fs::UniqueID UID) const { + auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile()); + return CacheShards[Hash % NumShards]; +} + +const CachedFileSystemEntry * +DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename( + StringRef Filename) const { + std::lock_guard LockGuard(CacheLock); + auto It = EntriesByFilename.find(Filename); + return It == EntriesByFilename.end() ? nullptr : It->getValue(); +} + +const CachedFileSystemEntry * +DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID( + llvm::sys::fs::UniqueID UID) const { + std::lock_guard LockGuard(CacheLock); + auto It = EntriesByUID.find(UID); + return It == EntriesByUID.end() ? nullptr : It->getSecond(); +} + +const CachedFileSystemEntry & +DependencyScanningFilesystemSharedCache::CacheShard:: + getOrEmplaceEntryForFilename(StringRef Filename, + llvm::ErrorOr Stat) { + std::lock_guard LockGuard(CacheLock); + auto Insertion = EntriesByFilename.insert({Filename, nullptr}); + if (Insertion.second) + Insertion.first->second = + new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat)); + return *Insertion.first->second; +} + +const CachedFileSystemEntry & +DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID( + llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, + std::unique_ptr Contents) { + std::lock_guard LockGuard(CacheLock); + auto Insertion = EntriesByUID.insert({UID, nullptr}); + if (Insertion.second) { + CachedFileContents *StoredContents = nullptr; + if (Contents) + StoredContents = new (ContentsStorage.Allocate()) + CachedFileContents(std::move(Contents)); + Insertion.first->second = new (EntryStorage.Allocate()) + CachedFileSystemEntry(std::move(Stat), StoredContents); + } + return *Insertion.first->second; +} + +const CachedFileSystemEntry & +DependencyScanningFilesystemSharedCache::CacheShard:: + getOrInsertEntryForFilename(StringRef Filename, + const CachedFileSystemEntry &Entry) { + std::lock_guard LockGuard(CacheLock); + return *EntriesByFilename.insert({Filename, &Entry}).first->getValue(); } /// Whitelist file extensions that should be minimized, treating no extension as @@ -148,53 +225,63 @@ bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) { return !NotToBeMinimized.contains(Filename); } -void CachedFileSystemEntry::init(llvm::ErrorOr &&MaybeStatus, - StringRef Filename, - llvm::vfs::FileSystem &FS) { - if (!MaybeStatus || MaybeStatus->isDirectory()) - MaybeStat = std::move(MaybeStatus); - else - MaybeStat = initFile(Filename, FS); +const CachedFileSystemEntry & +DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID( + TentativeEntry TEntry) { + auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID()); + return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(), + std::move(TEntry.Status), + std::move(TEntry.Contents)); } -llvm::ErrorOr -DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( +const CachedFileSystemEntry * +DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( StringRef Filename) { - bool ShouldBeMinimized = shouldMinimize(Filename); - - const auto *Entry = LocalCache.getCachedEntry(Filename); - if (Entry && !Entry->needsUpdate(ShouldBeMinimized)) - return EntryRef(ShouldBeMinimized, *Entry).unwrapError(); - - // FIXME: Handle PCM/PCH files. - // FIXME: Handle module map files. - - auto &SharedCacheEntry = SharedCache.get(Filename); - { - std::lock_guard LockGuard(SharedCacheEntry.ValueLock); - CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; - - if (!CacheEntry.isInitialized()) { - auto MaybeStatus = getUnderlyingFS().status(Filename); - if (!MaybeStatus && !shouldCacheStatFailures(Filename)) - // HACK: We need to always restat non source files if the stat fails. - // This is because Clang first looks up the module cache and module - // files before building them, and then looks for them again. If we - // cache the stat failure, it won't see them the second time. - return MaybeStatus.getError(); - CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS()); - } + if (const auto *Entry = LocalCache.findEntryByFilename(Filename)) + return Entry; + auto &Shard = SharedCache.getShardForFilename(Filename); + if (const auto *Entry = Shard.findEntryByFilename(Filename)) + return &LocalCache.insertEntryForFilename(Filename, *Entry); + return nullptr; +} - // Checking `needsUpdate` verifies the entry represents an opened file. - // Only checking `needsMinimization` could lead to minimization of files - // that we failed to load (such files don't have `OriginalContents`). - if (CacheEntry.needsUpdate(ShouldBeMinimized)) - CacheEntry.minimizeFile(); +llvm::ErrorOr +DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { + llvm::ErrorOr Stat = getUnderlyingFS().status(Filename); + if (!Stat) { + if (!shouldCacheStatFailures(Filename)) + return Stat.getError(); + const auto &Entry = + getOrEmplaceSharedEntryForFilename(Filename, Stat.getError()); + return insertLocalEntryForFilename(Filename, Entry); } - // Store the result in the local cache. - Entry = &SharedCacheEntry.Value; - return EntryRef(ShouldBeMinimized, *Entry).unwrapError(); + if (const auto *Entry = findSharedEntryByUID(*Stat)) + return insertLocalEntryForFilename(Filename, *Entry); + + auto TEntry = + Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename); + + const CachedFileSystemEntry *SharedEntry = [&]() { + if (TEntry) { + const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry)); + return &getOrInsertSharedEntryForFilename(Filename, UIDEntry); + } + return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError()); + }(); + + return insertLocalEntryForFilename(Filename, *SharedEntry); +} + +llvm::ErrorOr +DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( + StringRef Filename) { + if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) + return minimizeIfNecessary(*Entry, Filename).unwrapError(); + auto MaybeEntry = computeAndStoreResult(Filename); + if (!MaybeEntry) + return MaybeEntry.getError(); + return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError(); } llvm::ErrorOr @@ -247,7 +334,8 @@ llvm::ErrorOr> MinimizedVFSFile::create( return std::make_error_code(std::errc::is_a_directory); auto Result = std::make_unique( - llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getName(), + llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), + Entry.getStatus().getName(), /*RequiresNullTerminator=*/false), Entry.getStatus()); diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp index 90ab1df267530..784d759986375 100644 --- a/clang/unittests/Tooling/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -224,6 +224,8 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) { EXPECT_TRUE(StatusFull1); EXPECT_EQ(StatusMinimized0->getSize(), 17u); EXPECT_EQ(StatusFull1->getSize(), 30u); + EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h")); + EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h")); } TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) { @@ -245,6 +247,8 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) { EXPECT_TRUE(StatusMinimized1); EXPECT_EQ(StatusFull0->getSize(), 30u); EXPECT_EQ(StatusMinimized1->getSize(), 17u); + EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h")); + EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h")); } } // end namespace dependencies From 8cc2a137270462bc191377dbab97c739583814dd Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 21 Jan 2022 11:18:22 +0100 Subject: [PATCH 169/946] [clang][deps] Handle symlinks in minimizing FS The minimizing and caching filesystem used by the dependency scanner can be configured to **not** minimize some files. That's necessary when scanning a TU with prebuilt inputs (i.e. PCH) that refer to the original (non-minimized) files. Minimizing such files in the dependency scanner would cause discrepancy between the current perceived state of the filesystem and the file sizes stored in the AST file. By not minimizing such files, we avoid creating the discrepancy. The problem with the current approach is that files that should not be minimized are identified by their path. This breaks down when the prebuilt input (PCH) and the current TU refer to the same file via different paths (i.e. symlinks). This patch switches from paths to `llvm::sys::fs::UniqueID` when identifying ignored files. This is consistent with how the rest of Clang treats files. Depends on D114966. Reviewed By: dexonsmith, arphaman Differential Revision: https://reviews.llvm.org/D114971 --- .../DependencyScanningFilesystem.h | 12 +++-- .../DependencyScanningFilesystem.cpp | 34 ++++++------ clang/test/ClangScanDeps/modules-symlink.c | 54 +++++++++++++++++++ 3 files changed, 79 insertions(+), 21 deletions(-) create mode 100644 clang/test/ClangScanDeps/modules-symlink.c diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 70e9c4a3ffea2..7c830d3f27333 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -11,8 +11,8 @@ #include "clang/Basic/LLVM.h" #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/VirtualFileSystem.h" @@ -308,13 +308,15 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { private: /// Check whether the file should be minimized. - bool shouldMinimize(StringRef Filename); + bool shouldMinimize(StringRef Filename, llvm::sys::fs::UniqueID UID); /// Returns entry for the given filename. /// /// Attempts to use the local and shared caches first, then falls back to /// using the underlying filesystem. - llvm::ErrorOr getOrCreateFileSystemEntry(StringRef Filename); + llvm::ErrorOr + getOrCreateFileSystemEntry(StringRef Filename, + bool DisableMinimization = false); /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the @@ -325,7 +327,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// Minimizes the given entry if necessary and returns a wrapper object with /// reference semantics. EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry, - StringRef Filename); + StringRef Filename, bool Disable); /// Represents a filesystem entry that has been stat-ed (and potentially read) /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. @@ -401,7 +403,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// currently active preprocessor. ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings; /// The set of files that should not be minimized. - llvm::StringSet<> NotToBeMinimized; + llvm::DenseSet NotToBeMinimized; }; } // end namespace dependencies diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index cc8968a7b680f..80a70252721d8 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -42,8 +42,9 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { } EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary( - const CachedFileSystemEntry &Entry, StringRef Filename) { - if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename)) + const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { + if (Entry.isError() || Entry.isDirectory() || Disable || + !shouldMinimize(Filename, Entry.getUniqueID())) return EntryRef(/*Minimized=*/false, Filename, Entry); CachedFileContents *Contents = Entry.getContents(); @@ -210,19 +211,18 @@ static bool shouldCacheStatFailures(StringRef Filename) { } void DependencyScanningWorkerFilesystem::disableMinimization( - StringRef RawFilename) { - llvm::SmallString<256> Filename; - llvm::sys::path::native(RawFilename, Filename); - NotToBeMinimized.insert(Filename); + StringRef Filename) { + // Since we're not done setting up `NotToBeMinimized` yet, we need to disable + // minimization explicitly. + if (llvm::ErrorOr Result = + getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true)) + NotToBeMinimized.insert(Result->getStatus().getUniqueID()); } -bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) { - if (!shouldMinimizeBasedOnExtension(RawFilename)) - return false; - - llvm::SmallString<256> Filename; - llvm::sys::path::native(RawFilename, Filename); - return !NotToBeMinimized.contains(Filename); +bool DependencyScanningWorkerFilesystem::shouldMinimize( + StringRef Filename, llvm::sys::fs::UniqueID UID) { + return shouldMinimizeBasedOnExtension(Filename) && + !NotToBeMinimized.contains(UID); } const CachedFileSystemEntry & @@ -275,13 +275,15 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( - StringRef Filename) { + StringRef Filename, bool DisableMinimization) { if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) - return minimizeIfNecessary(*Entry, Filename).unwrapError(); + return minimizeIfNecessary(*Entry, Filename, DisableMinimization) + .unwrapError(); auto MaybeEntry = computeAndStoreResult(Filename); if (!MaybeEntry) return MaybeEntry.getError(); - return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError(); + return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization) + .unwrapError(); } llvm::ErrorOr diff --git a/clang/test/ClangScanDeps/modules-symlink.c b/clang/test/ClangScanDeps/modules-symlink.c new file mode 100644 index 0000000000000..1a2fe2d9f5123 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-symlink.c @@ -0,0 +1,54 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +//--- cdb_pch.json +[ + { + "directory": "DIR", + "command": "clang -x c-header DIR/pch.h -fmodules -gmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -o DIR/pch.h.gch", + "file": "DIR/pch.h" + } +] + +//--- cdb_tu.json +[ + { + "directory": "DIR", + "command": "clang -c DIR/tu.c -fmodules -gmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -include DIR/pch.h -o DIR/tu.o", + "file": "DIR/tu.c" + } +] + +//--- module.modulemap +module mod { header "symlink.h" } + +//--- pch.h +#include "symlink.h" + +//--- original.h +// Comment that will be stripped by the minimizer. +#define MACRO 1 + +//--- tu.c +#include "original.h" +static int foo = MACRO; // Macro usage that will trigger + // input file consistency checks. + +// RUN: ln -s %t/original.h %t/symlink.h + +// RUN: sed -e "s|DIR|%/t|g" %t/cdb_pch.json > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full \ +// RUN: -generate-modules-path-args -module-files-dir %t/build > %t/result_pch.json +// +// RUN: %python %S/../../utils/module-deps-to-rsp.py %t/result_pch.json \ +// RUN: --module-name=mod > %t/mod.cc1.rsp +// RUN: %python %S/../../utils/module-deps-to-rsp.py %t/result_pch.json \ +// RUN: --tu-index=0 > %t/pch.rsp +// +// RUN: %clang @%t/mod.cc1.rsp +// RUN: %clang -x c-header %t/pch.h -fmodules -gmodules -fimplicit-module-maps \ +// RUN: -fmodules-cache-path=%t/cache -o %t/pch.h.gch -I %t @%t/pch.rsp + +// RUN: sed -e "s|DIR|%/t|g" %t/cdb_tu.json > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full \ +// RUN: -generate-modules-path-args -module-files-dir %t/build > %t/result_tu.json From 68db0e25df4b1edaa2c6080eb88453ab01ea01d3 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Fri, 21 Jan 2022 13:14:39 +0100 Subject: [PATCH 170/946] [flang] Update tco tool pipline and add translation to LLVM IR tco is a tool to test the FIR to LLVM IR pipeline of the Flang compiler. This patch update tco pipelines and adds the translation to LLVM IR. A simple test is added to make sure the tool is working with a simple FIR program. More tests will be upstream in follow up patch from the fir-dev branch. This patch is part of the upstreaming effort from fir-dev branch. Reviewed By: schweitz, mehdi_amini Differential Revision: https://reviews.llvm.org/D117781 Co-authored-by: Eric Schweitz Co-authored-by: Jean Perier Co-authored-by: Andrzej Warzynski --- .../include/flang/Optimizer/CodeGen/CodeGen.h | 10 +- .../include/flang/Optimizer/Support/InitFIR.h | 12 +- flang/include/flang/Tools/CLOptions.inc | 160 ++++++++++++++++++ flang/lib/Optimizer/CodeGen/CodeGen.cpp | 37 ++++ flang/lib/Optimizer/Support/CMakeLists.txt | 1 + flang/lib/Optimizer/Support/InitFIR.cpp | 20 +++ flang/test/Fir/basic-program.fir | 11 ++ flang/tools/tco/CMakeLists.txt | 20 ++- flang/tools/tco/tco.cpp | 45 ++++- 9 files changed, 301 insertions(+), 15 deletions(-) create mode 100644 flang/include/flang/Tools/CLOptions.inc create mode 100644 flang/lib/Optimizer/Support/InitFIR.cpp create mode 100644 flang/test/Fir/basic-program.fir diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h index 1bd31b207859a..939d6aebb524d 100644 --- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h +++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h @@ -12,6 +12,8 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassRegistry.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" #include namespace fir { @@ -36,9 +38,13 @@ std::unique_ptr> createFirTargetRewritePass( /// Convert FIR to the LLVM IR dialect std::unique_ptr createFIRToLLVMPass(); +using LLVMIRLoweringPrinter = + std::function; /// Convert the LLVM IR dialect to LLVM-IR proper -std::unique_ptr -createLLVMDialectToLLVMPass(llvm::raw_ostream &output); +std::unique_ptr createLLVMDialectToLLVMPass( + llvm::raw_ostream &output, + LLVMIRLoweringPrinter printer = + [](llvm::Module &m, llvm::raw_ostream &out) { m.print(out, nullptr); }); // declarative passes #define GEN_PASS_REGISTRATION diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index e78967de2a383..2e8c1685a06f7 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -13,7 +13,6 @@ #ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H #define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H -#include "flang/Optimizer/CodeGen/CodeGen.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "mlir/Conversion/Passes.h" #include "mlir/Dialect/Affine/Passes.h" @@ -35,11 +34,19 @@ namespace fir::support { #define FLANG_DIALECT_LIST \ FLANG_NONCODEGEN_DIALECT_LIST, FIRCodeGenDialect, mlir::LLVM::LLVMDialect +inline void registerNonCodegenDialects(mlir::DialectRegistry ®istry) { + registry.insert(); +} + /// Register all the dialects used by flang. inline void registerDialects(mlir::DialectRegistry ®istry) { registry.insert(); } +inline void loadNonCodegenDialects(mlir::MLIRContext &context) { + context.loadDialect(); +} + /// Forced load of all the dialects used by flang. Lowering is not an MLIR /// pass, but a producer of FIR and MLIR. It is therefore a requirement that the /// dialects be preloaded to be able to build the IR. @@ -75,6 +82,9 @@ inline void registerMLIRPassesForFortranTools() { mlir::registerConvertAffineToStandardPass(); } +/// Register the interfaces needed to lower to LLVM IR. +void registerLLVMTranslation(mlir::MLIRContext &context); + } // namespace fir::support #endif // FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc new file mode 100644 index 0000000000000..1c85075d5cc17 --- /dev/null +++ b/flang/include/flang/Tools/CLOptions.inc @@ -0,0 +1,160 @@ +//===-- CLOptions.inc -- command line options -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// This file defines some shared command-line options that can be used when +/// debugging the test tools. This file must be included into the tool. + +#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" +#include "flang/Optimizer/CodeGen/CodeGen.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "llvm/Support/CommandLine.h" + +#define DisableOption(DOName, DOOption, DODescription) \ + static llvm::cl::opt disable##DOName("disable-" DOOption, \ + llvm::cl::desc("disable " DODescription " pass"), llvm::cl::init(false), \ + llvm::cl::Hidden) + +/// Shared option in tools to control whether dynamically sized array +/// allocations should always be on the heap. +static llvm::cl::opt dynamicArrayStackToHeapAllocation( + "fdynamic-heap-array", + llvm::cl::desc("place all array allocations of dynamic size on the heap"), + llvm::cl::init(false), llvm::cl::Hidden); + +/// Shared option in tools to set a maximum value for the number of elements in +/// a compile-time sized array that can be allocated on the stack. +static llvm::cl::opt arrayStackAllocationThreshold( + "fstack-array-size", + llvm::cl::desc( + "place all array allocations more than elements on the heap"), + llvm::cl::init(~static_cast(0)), llvm::cl::Hidden); + +namespace { +/// Optimizer Passes +DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass"); +DisableOption(FirAvc, "avc", "array value copy analysis and transformation"); +DisableOption( + FirMao, "memory-allocation-opt", "memory allocation optimization"); + +/// CodeGen Passes +#if !defined(FLANG_EXCLUDE_CODEGEN) +DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen"); +DisableOption(TargetRewrite, "target-rewrite", "rewrite FIR for target"); +DisableOption(FirToLlvmIr, "fir-to-llvmir", "FIR to LLVM-IR dialect"); +DisableOption(LlvmIrToLlvm, "llvm", "conversion to LLVM"); +#endif + +/// Generic for adding a pass to the pass manager if it is not disabled. +template +void addPassConditionally( + mlir::PassManager &pm, llvm::cl::opt &disabled, F ctor) { + if (!disabled) + pm.addPass(ctor()); +} + +template +void addNestedPassConditionally( + mlir::PassManager &pm, llvm::cl::opt &disabled, F ctor) { + if (!disabled) + pm.addNestedPass(ctor()); +} + +} // namespace + +namespace fir { + +static void defaultFlangInlinerOptPipeline(mlir::OpPassManager &pm) { + mlir::GreedyRewriteConfig config; + config.enableRegionSimplification = false; + pm.addPass(mlir::createCanonicalizerPass(config)); +} + +inline void addCfgConversionPass(mlir::PassManager &pm) { + addNestedPassConditionally( + pm, disableCfgConversion, fir::createFirToCfgPass); +} + +inline void addAVC(mlir::PassManager &pm) { + addNestedPassConditionally( + pm, disableFirAvc, fir::createArrayValueCopyPass); +} + +#if !defined(FLANG_EXCLUDE_CODEGEN) +inline void addCodeGenRewritePass(mlir::PassManager &pm) { + addPassConditionally( + pm, disableCodeGenRewrite, fir::createFirCodeGenRewritePass); +} + +inline void addTargetRewritePass(mlir::PassManager &pm) { + addPassConditionally(pm, disableTargetRewrite, []() { + return fir::createFirTargetRewritePass(fir::TargetRewriteOptions{}); + }); +} + +inline void addFIRToLLVMPass(mlir::PassManager &pm) { + addPassConditionally(pm, disableFirToLlvmIr, fir::createFIRToLLVMPass); +} + +inline void addLLVMDialectToLLVMPass( + mlir::PassManager &pm, llvm::raw_ostream &output) { + addPassConditionally(pm, disableLlvmIrToLlvm, + [&]() { return fir::createLLVMDialectToLLVMPass(output); }); +} +#endif + +/// Create a pass pipeline for running default optimization passes for +/// incremental conversion of FIR. +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) { + // simplify the IR + mlir::GreedyRewriteConfig config; + config.enableRegionSimplification = false; + fir::addAVC(pm); + pm.addNestedPass(fir::createCharacterConversionPass()); + pm.addPass(mlir::createCanonicalizerPass(config)); + + // The default inliner pass adds the canonicalizer pass with the default + // configuration. Create the inliner pass with tco config. + llvm::StringMap pipelines; + pm.addPass( + mlir::createInlinerPass(pipelines, defaultFlangInlinerOptPipeline)); + pm.addPass(mlir::createCSEPass()); + + // convert control flow to CFG form + fir::addCfgConversionPass(pm); + pm.addPass(mlir::createLowerToCFGPass()); + + pm.addPass(mlir::createCanonicalizerPass(config)); +} + +#if !defined(FLANG_EXCLUDE_CODEGEN) +inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm) { + pm.addNestedPass(fir::createAbstractResultOptPass()); + fir::addCodeGenRewritePass(pm); + fir::addTargetRewritePass(pm); + fir::addFIRToLLVMPass(pm); +} + +/// Create a pass pipeline for lowering from MLIR to LLVM IR +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm) { + // Add default optimizer pass pipeline. + fir::createDefaultFIROptimizerPassPipeline(pm); + + // Add codegen pass pipeline. + fir::createDefaultFIRCodeGenPassPipeline(pm); +} +#undef FLANG_EXCLUDE_CODEGEN +#endif + +} // namespace fir diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index be2e7cde916df..40d6d2017b2fa 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -23,6 +23,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/ArrayRef.h" #define DEBUG_TYPE "flang-codegen" @@ -3305,8 +3306,44 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase { } } }; + +/// Lower from LLVM IR dialect to proper LLVM-IR and dump the module +struct LLVMIRLoweringPass + : public mlir::PassWrapper> { + using Printer = fir::LLVMIRLoweringPrinter; + LLVMIRLoweringPass(raw_ostream &output, Printer p) + : output{output}, printer{p} {} + + mlir::ModuleOp getModule() { return getOperation(); } + + void runOnOperation() override final { + auto *ctx = getModule().getContext(); + auto optName = getModule().getName(); + llvm::LLVMContext llvmCtx; + if (auto llvmModule = mlir::translateModuleToLLVMIR( + getModule(), llvmCtx, optName ? *optName : "FIRModule")) { + printer(*llvmModule, output); + return; + } + + mlir::emitError(mlir::UnknownLoc::get(ctx), "could not emit LLVM-IR\n"); + signalPassFailure(); + } + +private: + raw_ostream &output; + Printer printer; +}; + } // namespace std::unique_ptr fir::createFIRToLLVMPass() { return std::make_unique(); } + +std::unique_ptr +fir::createLLVMDialectToLLVMPass(raw_ostream &output, + fir::LLVMIRLoweringPrinter printer) { + return std::make_unique(output, printer); +} diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt index 30a163de9ccaf..779e20711513e 100644 --- a/flang/lib/Optimizer/Support/CMakeLists.txt +++ b/flang/lib/Optimizer/Support/CMakeLists.txt @@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_flang_library(FIRSupport FIRContext.cpp + InitFIR.cpp InternalNames.cpp KindMapping.cpp diff --git a/flang/lib/Optimizer/Support/InitFIR.cpp b/flang/lib/Optimizer/Support/InitFIR.cpp new file mode 100644 index 0000000000000..baa1336d9ca02 --- /dev/null +++ b/flang/lib/Optimizer/Support/InitFIR.cpp @@ -0,0 +1,20 @@ +//===-- Optimizer/Support/InitFIR.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Support/InitFIR.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" + +void fir::support::registerLLVMTranslation(mlir::MLIRContext &context) { + mlir::DialectRegistry registry; + // Register OpenMP dialect interface here as well. + mlir::registerOpenMPDialectTranslation(registry); + // Register LLVM-IR dialect interface. + registerLLVMDialectTranslation(registry); + context.appendDialectRegistry(registry); +} diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir new file mode 100644 index 0000000000000..02463bef99496 --- /dev/null +++ b/flang/test/Fir/basic-program.fir @@ -0,0 +1,11 @@ +// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s + +// Check that tco is working with a basic test. + +func @_QQmain() { + return +} + +// CHECK: ; ModuleID = 'FIRModule' +// CHECK-LABEL: define void @_QQmain() +// CHECK: ret void diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt index 1a9c5ac72f153..a64b9c59bd02a 100644 --- a/flang/tools/tco/CMakeLists.txt +++ b/flang/tools/tco/CMakeLists.txt @@ -1,13 +1,25 @@ -get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +set(LLVM_LINK_COMPONENTS + AllTargetsAsmParsers + AllTargetsCodeGens + AllTargetsDescs + AllTargetsInfos +) +llvm_map_components_to_libnames(llvm_libs ${LLVM_LINK_COMPONENTS}) -set(LIBS +add_flang_tool(tco tco.cpp) +llvm_update_compile_flags(tco) +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +target_link_libraries(tco PRIVATE FIRCodeGen FIRDialect FIRSupport FIRTransforms + FIRBuilder ${dialect_libs} MLIRIR MLIRLLVMIR + MLIRLLVMToLLVMIRTranslation + MLIRTargetLLVMIRExport MLIRPass MLIRStandardToLLVM MLIRTransforms @@ -18,7 +30,5 @@ set(LIBS MLIRStandardToLLVM MLIRSupport MLIRVectorToLLVM + ${llvm_libs} ) - -add_flang_tool(tco tco.cpp) -target_link_libraries(tco PRIVATE ${LIBS}) diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp index 8f2c283bc82f9..2bb3b27e7eb63 100644 --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -11,8 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/CodeGen/CodeGen.h" +#include "flang/Optimizer/Support/FIRContext.h" #include "flang/Optimizer/Support/InitFIR.h" +#include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Support/KindMapping.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" +#include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" #include "mlir/Parser.h" @@ -25,11 +31,13 @@ #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +/// list of program return codes static cl::opt inputFilename(cl::Positional, cl::desc(""), cl::init("-")); @@ -42,8 +50,14 @@ static cl::opt emitFir("emit-fir", cl::desc("Parse and pretty-print the input"), cl::init(false)); +static cl::opt targetTriple("target", + cl::desc("specify a target triple"), + cl::init("native")); + +#include "flang/Tools/CLOptions.inc" + static void printModuleBody(mlir::ModuleOp mod, raw_ostream &output) { - for (auto &op : mod.getBody()->without_terminator()) + for (auto &op : *mod.getBody()) output << op << '\n'; } @@ -65,6 +79,8 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { mlir::DialectRegistry registry; fir::support::registerDialects(registry); mlir::MLIRContext context(registry); + fir::support::loadDialects(context); + fir::support::registerLLVMTranslation(context); auto owningRef = mlir::parseSourceFile(sourceMgr, &context); if (!owningRef) { @@ -80,21 +96,31 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { ToolOutputFile out(outputFilename, ec, sys::fs::OF_None); // run passes - mlir::PassManager pm{&context}; + fir::KindMapping kindMap{&context}; + fir::setTargetTriple(*owningRef, targetTriple); + fir::setKindMapping(*owningRef, kindMap); + mlir::PassManager pm(&context, mlir::OpPassManager::Nesting::Implicit); + pm.enableVerifier(/*verifyPasses=*/true); mlir::applyPassManagerCLOptions(pm); if (emitFir) { // parse the input and pretty-print it back out // -emit-fir intentionally disables all the passes + } else if (passPipeline.hasAnyOccurrences()) { + auto errorHandler = [&](const Twine &msg) { + mlir::emitError(mlir::UnknownLoc::get(pm.getContext())) << msg; + return mlir::failure(); + }; + if (mlir::failed(passPipeline.addToPipeline(pm, errorHandler))) + return mlir::failure(); } else { - // TODO: Actually add passes when added to FIR code base - // add all the passes - // the user can disable them individually + fir::createMLIRToLLVMPassPipeline(pm); + fir::addLLVMDialectToLLVMPass(pm, out.os()); } // run the pass manager if (mlir::succeeded(pm.run(*owningRef))) { // passes ran successfully, so keep the output - if (emitFir) + if (emitFir || passPipeline.hasAnyOccurrences()) printModuleBody(*owningRef, out.os()); out.keep(); return mlir::success(); @@ -107,8 +133,13 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { } int main(int argc, char **argv) { - fir::support::registerMLIRPassesForFortranTools(); [[maybe_unused]] InitLLVM y(argc, argv); + fir::support::registerMLIRPassesForFortranTools(); + fir::registerOptCodeGenPasses(); + fir::registerOptTransformPasses(); + InitializeAllTargets(); + mlir::registerAsmPrinterCLOptions(); + mlir::registerMLIRContextCLOptions(); mlir::registerPassManagerCLOptions(); mlir::PassPipelineCLParser passPipe("", "Compiler passes to run"); cl::ParseCommandLineOptions(argc, argv, "Tilikum Crossing Optimizer\n"); From 8ee135dcf8ff060656ad481c3e980fe8763576f5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 21 Jan 2022 12:24:32 +0000 Subject: [PATCH 171/946] [X86] Remove `__builtin_ia32_pmax/min` intrinsics and use generic `__builtin_elementwise_max/min` D111985 added the generic `__builtin_elementwise_max` and `__builtin_elementwise_min` intrinsics with the same integer behaviour as the SSE/AVX instructions This patch removes the `__builtin_ia32_pmax/min` intrinsics and just uses `__builtin_elementwise_max/min` - the existing tests see no changes: ``` __m256i test_mm256_max_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu32 // CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_max_epu32(a, b); } ``` This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`). Sibling patch to D117791 Differential Revision: https://reviews.llvm.org/D117798 --- clang/include/clang/Basic/BuiltinsX86.def | 48 --------------------- clang/lib/CodeGen/CGBuiltin.cpp | 52 ----------------------- clang/lib/Headers/avx2intrin.h | 24 +++++------ clang/lib/Headers/avx512bwintrin.h | 16 +++---- clang/lib/Headers/avx512fintrin.h | 16 +++---- clang/lib/Headers/avx512vlintrin.h | 16 +++---- clang/lib/Headers/emmintrin.h | 8 ++-- clang/lib/Headers/smmintrin.h | 16 +++---- clang/test/CodeGen/builtins-x86.c | 12 ------ 9 files changed, 48 insertions(+), 160 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 9b7c763b0c6c7..a8f5567248624 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -265,10 +265,6 @@ TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2") @@ -377,14 +373,6 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1") @@ -580,18 +568,6 @@ TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2") @@ -921,14 +897,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f") @@ -1047,14 +1015,6 @@ TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw" TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") @@ -1188,14 +1148,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 49f054ec1a982..4c68b20067b99 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14285,58 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops[0]); } } - case X86::BI__builtin_ia32_pmaxsb128: - case X86::BI__builtin_ia32_pmaxsw128: - case X86::BI__builtin_ia32_pmaxsd128: - case X86::BI__builtin_ia32_pmaxsq128: - case X86::BI__builtin_ia32_pmaxsb256: - case X86::BI__builtin_ia32_pmaxsw256: - case X86::BI__builtin_ia32_pmaxsd256: - case X86::BI__builtin_ia32_pmaxsq256: - case X86::BI__builtin_ia32_pmaxsb512: - case X86::BI__builtin_ia32_pmaxsw512: - case X86::BI__builtin_ia32_pmaxsd512: - case X86::BI__builtin_ia32_pmaxsq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax); - case X86::BI__builtin_ia32_pmaxub128: - case X86::BI__builtin_ia32_pmaxuw128: - case X86::BI__builtin_ia32_pmaxud128: - case X86::BI__builtin_ia32_pmaxuq128: - case X86::BI__builtin_ia32_pmaxub256: - case X86::BI__builtin_ia32_pmaxuw256: - case X86::BI__builtin_ia32_pmaxud256: - case X86::BI__builtin_ia32_pmaxuq256: - case X86::BI__builtin_ia32_pmaxub512: - case X86::BI__builtin_ia32_pmaxuw512: - case X86::BI__builtin_ia32_pmaxud512: - case X86::BI__builtin_ia32_pmaxuq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax); - case X86::BI__builtin_ia32_pminsb128: - case X86::BI__builtin_ia32_pminsw128: - case X86::BI__builtin_ia32_pminsd128: - case X86::BI__builtin_ia32_pminsq128: - case X86::BI__builtin_ia32_pminsb256: - case X86::BI__builtin_ia32_pminsw256: - case X86::BI__builtin_ia32_pminsd256: - case X86::BI__builtin_ia32_pminsq256: - case X86::BI__builtin_ia32_pminsb512: - case X86::BI__builtin_ia32_pminsw512: - case X86::BI__builtin_ia32_pminsd512: - case X86::BI__builtin_ia32_pminsq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin); - case X86::BI__builtin_ia32_pminub128: - case X86::BI__builtin_ia32_pminuw128: - case X86::BI__builtin_ia32_pminud128: - case X86::BI__builtin_ia32_pminuq128: - case X86::BI__builtin_ia32_pminub256: - case X86::BI__builtin_ia32_pminuw256: - case X86::BI__builtin_ia32_pminud256: - case X86::BI__builtin_ia32_pminuq256: - case X86::BI__builtin_ia32_pminub512: - case X86::BI__builtin_ia32_pminuw512: - case X86::BI__builtin_ia32_pminud512: - case X86::BI__builtin_ia32_pminuq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin); case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c9ad74ce3fa42..e33514a60ff3e 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -253,73 +253,73 @@ _mm256_madd_epi16(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b); } static __inline__ int __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 53319eb23011d..522ef100bab1a 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -751,7 +751,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -773,7 +773,7 @@ _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -796,7 +796,7 @@ _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -818,7 +818,7 @@ _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -840,7 +840,7 @@ _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -862,7 +862,7 @@ _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -884,7 +884,7 @@ _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -906,7 +906,7 @@ _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9b02a7cffc64d..8695aeb94de24 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1090,7 +1090,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1112,7 +1112,7 @@ _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1134,7 +1134,7 @@ _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1156,7 +1156,7 @@ _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1325,7 +1325,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1347,7 +1347,7 @@ _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1369,7 +1369,7 @@ _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1391,7 +1391,7 @@ _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index eddb99902e3d5..178c9dbc0e6ea 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -3054,7 +3054,7 @@ _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3073,7 +3073,7 @@ _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3120,7 +3120,7 @@ _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3139,7 +3139,7 @@ _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3186,7 +3186,7 @@ _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3205,7 +3205,7 @@ _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3252,7 +3252,7 @@ _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3271,7 +3271,7 @@ _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 6e9c3032c21f7..4618b808efc48 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2375,7 +2375,7 @@ _mm_madd_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] @@ -2395,7 +2395,7 @@ _mm_max_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); + return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); } /// Compares corresponding elements of two 128-bit signed [8 x i16] @@ -2415,7 +2415,7 @@ _mm_max_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] @@ -2435,7 +2435,7 @@ _mm_min_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); + return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 710e55aaa1203..0df59c5fcc592 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -668,7 +668,7 @@ _mm_stream_load_si128 (__m128i const *__V) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); + return (__m128i) __builtin_elementwise_min((__v16qs) __V1, (__v16qs) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -687,7 +687,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); + return (__m128i) __builtin_elementwise_max((__v16qs) __V1, (__v16qs) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -706,7 +706,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); + return (__m128i) __builtin_elementwise_min((__v8hu) __V1, (__v8hu) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -725,7 +725,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); + return (__m128i) __builtin_elementwise_max((__v8hu) __V1, (__v8hu) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -744,7 +744,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_min((__v4si) __V1, (__v4si) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -763,7 +763,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_max((__v4si) __V1, (__v4si) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -782,7 +782,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_min((__v4su) __V1, (__v4su) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -801,7 +801,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_max((__v4su) __V1, (__v4su) __V2); } /* SSE4 Insertion and Extraction from XMM Register Instructions. */ diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index bfcd30072fc1f..9eb5f2f5d149e 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -221,10 +221,6 @@ void f0() { tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c); tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s); - tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c); - tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s); - tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c); - tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s); tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i); tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s); @@ -455,14 +451,6 @@ void f0() { tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d); tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f); tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i); - tmp_V16c = __builtin_ia32_pmaxsb128(tmp_V16c, tmp_V16c); - tmp_V4i = __builtin_ia32_pmaxsd128(tmp_V4i, tmp_V4i); - tmp_V4i = __builtin_ia32_pmaxud128(tmp_V4i, tmp_V4i); - tmp_V8s = __builtin_ia32_pmaxuw128(tmp_V8s, tmp_V8s); - tmp_V16c = __builtin_ia32_pminsb128(tmp_V16c, tmp_V16c); - tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i); - tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i); - tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s); tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i); tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16); tmp_V4f = __builtin_ia32_roundss(tmp_V4f, tmp_V4f, imm_i_0_16); From 3ef88b31843e040c95f23ff2c3c206f1fa399c05 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 21 Jan 2022 12:34:19 +0000 Subject: [PATCH 172/946] Revert rG8ee135dcf8ff060656ad481c3e980fe8763576f5 "[X86] Remove `__builtin_ia32_pmax/min` intrinsics and use generic `__builtin_elementwise_max/min`" Some build bots are referencing the `__builtin_ia32_pmax/min` intrinsics via alternative headers --- clang/include/clang/Basic/BuiltinsX86.def | 48 +++++++++++++++++++++ clang/lib/CodeGen/CGBuiltin.cpp | 52 +++++++++++++++++++++++ clang/lib/Headers/avx2intrin.h | 24 +++++------ clang/lib/Headers/avx512bwintrin.h | 16 +++---- clang/lib/Headers/avx512fintrin.h | 16 +++---- clang/lib/Headers/avx512vlintrin.h | 16 +++---- clang/lib/Headers/emmintrin.h | 8 ++-- clang/lib/Headers/smmintrin.h | 16 +++---- clang/test/CodeGen/builtins-x86.c | 12 ++++++ 9 files changed, 160 insertions(+), 48 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index a8f5567248624..9b7c763b0c6c7 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -265,6 +265,10 @@ TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2") @@ -373,6 +377,14 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1") @@ -568,6 +580,18 @@ TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2") @@ -897,6 +921,14 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f") @@ -1015,6 +1047,14 @@ TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw" TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") @@ -1148,6 +1188,14 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4c68b20067b99..49f054ec1a982 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14285,6 +14285,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops[0]); } } + case X86::BI__builtin_ia32_pmaxsb128: + case X86::BI__builtin_ia32_pmaxsw128: + case X86::BI__builtin_ia32_pmaxsd128: + case X86::BI__builtin_ia32_pmaxsq128: + case X86::BI__builtin_ia32_pmaxsb256: + case X86::BI__builtin_ia32_pmaxsw256: + case X86::BI__builtin_ia32_pmaxsd256: + case X86::BI__builtin_ia32_pmaxsq256: + case X86::BI__builtin_ia32_pmaxsb512: + case X86::BI__builtin_ia32_pmaxsw512: + case X86::BI__builtin_ia32_pmaxsd512: + case X86::BI__builtin_ia32_pmaxsq512: + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax); + case X86::BI__builtin_ia32_pmaxub128: + case X86::BI__builtin_ia32_pmaxuw128: + case X86::BI__builtin_ia32_pmaxud128: + case X86::BI__builtin_ia32_pmaxuq128: + case X86::BI__builtin_ia32_pmaxub256: + case X86::BI__builtin_ia32_pmaxuw256: + case X86::BI__builtin_ia32_pmaxud256: + case X86::BI__builtin_ia32_pmaxuq256: + case X86::BI__builtin_ia32_pmaxub512: + case X86::BI__builtin_ia32_pmaxuw512: + case X86::BI__builtin_ia32_pmaxud512: + case X86::BI__builtin_ia32_pmaxuq512: + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax); + case X86::BI__builtin_ia32_pminsb128: + case X86::BI__builtin_ia32_pminsw128: + case X86::BI__builtin_ia32_pminsd128: + case X86::BI__builtin_ia32_pminsq128: + case X86::BI__builtin_ia32_pminsb256: + case X86::BI__builtin_ia32_pminsw256: + case X86::BI__builtin_ia32_pminsd256: + case X86::BI__builtin_ia32_pminsq256: + case X86::BI__builtin_ia32_pminsb512: + case X86::BI__builtin_ia32_pminsw512: + case X86::BI__builtin_ia32_pminsd512: + case X86::BI__builtin_ia32_pminsq512: + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin); + case X86::BI__builtin_ia32_pminub128: + case X86::BI__builtin_ia32_pminuw128: + case X86::BI__builtin_ia32_pminud128: + case X86::BI__builtin_ia32_pminuq128: + case X86::BI__builtin_ia32_pminub256: + case X86::BI__builtin_ia32_pminuw256: + case X86::BI__builtin_ia32_pminud256: + case X86::BI__builtin_ia32_pminuq256: + case X86::BI__builtin_ia32_pminub512: + case X86::BI__builtin_ia32_pminuw512: + case X86::BI__builtin_ia32_pminud512: + case X86::BI__builtin_ia32_pminuq512: + return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin); case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index e33514a60ff3e..c9ad74ce3fa42 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -253,73 +253,73 @@ _mm256_madd_epi16(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b); + return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b); + return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b); + return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b); + return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b); + return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b); + return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b); + return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b); + return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); } static __inline__ int __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 522ef100bab1a..53319eb23011d 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -751,7 +751,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); + return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -773,7 +773,7 @@ _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -796,7 +796,7 @@ _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); + return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -818,7 +818,7 @@ _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); + return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -840,7 +840,7 @@ _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); + return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -862,7 +862,7 @@ _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -884,7 +884,7 @@ _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); + return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -906,7 +906,7 @@ _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); + return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 8695aeb94de24..9b02a7cffc64d 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1090,7 +1090,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1112,7 +1112,7 @@ _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B); + return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1134,7 +1134,7 @@ _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1156,7 +1156,7 @@ _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B); + return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1325,7 +1325,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1347,7 +1347,7 @@ _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B); + return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1369,7 +1369,7 @@ _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1391,7 +1391,7 @@ _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B); + return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 178c9dbc0e6ea..eddb99902e3d5 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -3054,7 +3054,7 @@ _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3073,7 +3073,7 @@ _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3120,7 +3120,7 @@ _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); + return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3139,7 +3139,7 @@ _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); + return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3186,7 +3186,7 @@ _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3205,7 +3205,7 @@ _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3252,7 +3252,7 @@ _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); + return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3271,7 +3271,7 @@ _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); + return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 4618b808efc48..6e9c3032c21f7 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2375,7 +2375,7 @@ _mm_madd_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] @@ -2395,7 +2395,7 @@ _mm_max_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); + return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); } /// Compares corresponding elements of two 128-bit signed [8 x i16] @@ -2415,7 +2415,7 @@ _mm_max_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] @@ -2435,7 +2435,7 @@ _mm_min_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); + return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 0df59c5fcc592..710e55aaa1203 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -668,7 +668,7 @@ _mm_stream_load_si128 (__m128i const *__V) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_min((__v16qs) __V1, (__v16qs) __V2); + return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -687,7 +687,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_max((__v16qs) __V1, (__v16qs) __V2); + return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -706,7 +706,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_min((__v8hu) __V1, (__v8hu) __V2); + return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -725,7 +725,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_max((__v8hu) __V1, (__v8hu) __V2); + return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -744,7 +744,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_min((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -763,7 +763,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_max((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -782,7 +782,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_min((__v4su) __V1, (__v4su) __V2); + return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -801,7 +801,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_elementwise_max((__v4su) __V1, (__v4su) __V2); + return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); } /* SSE4 Insertion and Extraction from XMM Register Instructions. */ diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 9eb5f2f5d149e..bfcd30072fc1f 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -221,6 +221,10 @@ void f0() { tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c); tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s); + tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c); + tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s); + tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c); + tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s); tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i); tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s); @@ -451,6 +455,14 @@ void f0() { tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d); tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f); tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i); + tmp_V16c = __builtin_ia32_pmaxsb128(tmp_V16c, tmp_V16c); + tmp_V4i = __builtin_ia32_pmaxsd128(tmp_V4i, tmp_V4i); + tmp_V4i = __builtin_ia32_pmaxud128(tmp_V4i, tmp_V4i); + tmp_V8s = __builtin_ia32_pmaxuw128(tmp_V8s, tmp_V8s); + tmp_V16c = __builtin_ia32_pminsb128(tmp_V16c, tmp_V16c); + tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i); + tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i); + tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s); tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i); tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16); tmp_V4f = __builtin_ia32_roundss(tmp_V4f, tmp_V4f, imm_i_0_16); From 0abaf64580921e31983e355972b91c83fd7521f2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 21 Jan 2022 12:35:36 +0000 Subject: [PATCH 173/946] Revert rG4727d29d908f9dd608dd97a58c0af1ad579fd3ca "[X86] Remove __builtin_ia32_pabs intrinsics and use generic __builtin_elementwise_abs" Some build bots are referencing the `__builtin_ia32_pabs` intrinsics via alternative headers --- clang/include/clang/Basic/BuiltinsX86.def | 12 ++++++++++++ clang/lib/CodeGen/CGBuiltin.cpp | 15 +++++++++++++++ clang/lib/Headers/avx2intrin.h | 6 +++--- clang/lib/Headers/avx512bwintrin.h | 4 ++-- clang/lib/Headers/avx512fintrin.h | 8 ++------ clang/lib/Headers/avx512vlintrin.h | 4 ++-- clang/lib/Headers/tmmintrin.h | 6 +++--- clang/test/CodeGen/builtins-x86.c | 3 +++ 8 files changed, 42 insertions(+), 16 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 9b7c763b0c6c7..bc6208be45606 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -296,6 +296,9 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse") TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse") @@ -555,6 +558,9 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx") // AVX2 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2") @@ -921,6 +927,8 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") @@ -1037,6 +1045,8 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") @@ -1188,6 +1198,8 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 49f054ec1a982..a49c035002786 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14285,6 +14285,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops[0]); } } + case X86::BI__builtin_ia32_pabsb128: + case X86::BI__builtin_ia32_pabsw128: + case X86::BI__builtin_ia32_pabsd128: + case X86::BI__builtin_ia32_pabsb256: + case X86::BI__builtin_ia32_pabsw256: + case X86::BI__builtin_ia32_pabsd256: + case X86::BI__builtin_ia32_pabsq128: + case X86::BI__builtin_ia32_pabsq256: + case X86::BI__builtin_ia32_pabsb512: + case X86::BI__builtin_ia32_pabsw512: + case X86::BI__builtin_ia32_pabsd512: + case X86::BI__builtin_ia32_pabsq512: { + Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c9ad74ce3fa42..5064c87c2bb19 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -26,19 +26,19 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { - return (__m256i)__builtin_elementwise_abs((__v32qs)__a); + return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { - return (__m256i)__builtin_elementwise_abs((__v16hi)__a); + return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { - return (__m256i)__builtin_elementwise_abs((__v8si)__a); + return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 53319eb23011d..6aee8aed84871 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi8 (__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v64qs)__A); + return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi16 (__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v32hi)__A); + return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9b02a7cffc64d..df298640523b7 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -26,10 +26,6 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64))); typedef unsigned long long __v8du __attribute__((__vector_size__(64))); typedef unsigned int __v16su __attribute__((__vector_size__(64))); -/* We need an explicitly signed variant for char. Note that this shouldn't - * appear in the interface though. */ -typedef signed char __v64qs __attribute__((__vector_size__(64))); - typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); @@ -1850,7 +1846,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v8di)__A); + return (__m512i)__builtin_ia32_pabsq512((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1872,7 +1868,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A) { - return (__m512i)__builtin_elementwise_abs((__v16si) __A); + return (__m512i)__builtin_ia32_pabsd512((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index eddb99902e3d5..0519dba59081a 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { - return (__m128i)__builtin_elementwise_abs((__v2di)__A); + return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { - return (__m256i)__builtin_elementwise_abs((__v4di)__A); + return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index cb9be2349de5a..bcffa8187801c 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a) { - return (__m128i)__builtin_elementwise_abs((__v16qs)__a); + return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); } /// Computes the absolute value of each of the packed 16-bit signed @@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a) { - return (__m128i)__builtin_elementwise_abs((__v8hi)__a); + return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); } /// Computes the absolute value of each of the packed 32-bit signed @@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a) { - return (__m128i)__builtin_elementwise_abs((__v4si)__a); + return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); } /// Concatenates the two 128-bit integer vector operands, and diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index bfcd30072fc1f..61b9d53c74f9d 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -259,8 +259,11 @@ void f0() { tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s); tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i); tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i); + tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c); tmp_V8c = __builtin_ia32_pabsb(tmp_V8c); + tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s); tmp_V4s = __builtin_ia32_pabsw(tmp_V4s); + tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i); tmp_V2i = __builtin_ia32_pabsd(tmp_V2i); tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi); tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi); From 75e164f61d391979b4829bf2746a5d74b94e95f2 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 20 Jan 2022 12:55:14 +0100 Subject: [PATCH 174/946] [llvm] Cleanup header dependencies in ADT and Support The cleanup was manual, but assisted by "include-what-you-use". It consists in 1. Removing unused forward declaration. No impact expected. 2. Removing unused headers in .cpp files. No impact expected. 3. Removing unused headers in .h files. This removes implicit dependencies and is generally considered a good thing, but this may break downstream builds. I've updated llvm, clang, lld, lldb and mlir deps, and included a list of the modification in the second part of the commit. 4. Replacing header inclusion by forward declaration. This has the same impact as 3. Notable changes: - llvm/Support/TargetParser.h no longer includes llvm/Support/AArch64TargetParser.h nor llvm/Support/ARMTargetParser.h - llvm/Support/TypeSize.h no longer includes llvm/Support/WithColor.h - llvm/Support/YAMLTraits.h no longer includes llvm/Support/Regex.h - llvm/ADT/SmallVector.h no longer includes llvm/Support/MemAlloc.h nor llvm/Support/ErrorHandling.h You may need to add some of these headers in your compilation units, if needs be. As an hint to the impact of the cleanup, running clang++ -E -Iinclude -I../llvm/include ../llvm/lib/Support/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l before: 8000919 lines after: 7917500 lines Reduced dependencies also helps incremental rebuilds and is more ccache friendly, something not shown by the above metric :-) Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831 --- clang/lib/Basic/Targets/AArch64.h | 1 + clang/lib/Basic/Targets/ARM.h | 1 + clang/lib/Driver/SanitizerArgs.cpp | 1 + clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 1 + clang/lib/Driver/ToolChains/Arch/ARM.cpp | 1 + clang/lib/Driver/ToolChains/Arch/ARM.h | 1 + clang/tools/libclang/BuildSystem.cpp | 1 + lldb/source/Host/common/Socket.cpp | 1 + llvm/include/llvm/ADT/Optional.h | 1 - llvm/include/llvm/ADT/SmallVector.h | 2 -- llvm/include/llvm/MC/MCStreamer.h | 1 + llvm/include/llvm/Object/ELFObjectFile.h | 1 + llvm/include/llvm/Support/AArch64TargetParser.h | 1 - llvm/include/llvm/Support/ARMAttributeParser.h | 7 +++---- llvm/include/llvm/Support/Allocator.h | 4 ---- llvm/include/llvm/Support/BinaryStreamReader.h | 2 -- llvm/include/llvm/Support/BinaryStreamRef.h | 1 - llvm/include/llvm/Support/BinaryStreamWriter.h | 1 - llvm/include/llvm/Support/BlockFrequency.h | 5 ++--- llvm/include/llvm/Support/BranchProbability.h | 1 - llvm/include/llvm/Support/ConvertUTF.h | 1 - llvm/include/llvm/Support/ELFAttributeParser.h | 3 ++- llvm/include/llvm/Support/Error.h | 1 - llvm/include/llvm/Support/ExtensibleRTTI.h | 2 -- llvm/include/llvm/Support/FileCollector.h | 1 - llvm/include/llvm/Support/FileUtilities.h | 6 +++--- llvm/include/llvm/Support/FormatVariadic.h | 3 ++- llvm/include/llvm/Support/GraphWriter.h | 2 -- llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h | 1 - llvm/include/llvm/Support/RISCVISAInfo.h | 2 -- llvm/include/llvm/Support/SymbolRemappingReader.h | 3 ++- llvm/include/llvm/Support/TargetParser.h | 2 -- llvm/include/llvm/Support/TimeProfiler.h | 3 ++- llvm/include/llvm/Support/Timer.h | 2 -- llvm/include/llvm/Support/TrigramIndex.h | 1 - llvm/include/llvm/Support/TypeSize.h | 2 +- llvm/include/llvm/Support/YAMLTraits.h | 4 ---- llvm/lib/Debuginfod/Debuginfod.cpp | 2 ++ llvm/lib/Object/Object.cpp | 1 + llvm/lib/Support/APInt.cpp | 2 -- llvm/lib/Support/ARMAttributeParser.cpp | 2 -- llvm/lib/Support/ARMWinEH.cpp | 1 - llvm/lib/Support/BlockFrequency.cpp | 1 + llvm/lib/Support/DAGDeltaAlgorithm.cpp | 1 - llvm/lib/Support/DataExtractor.cpp | 1 - llvm/lib/Support/ELFAttributeParser.cpp | 2 -- llvm/lib/Support/FileOutputBuffer.cpp | 2 -- llvm/lib/Support/FileUtilities.cpp | 3 --- llvm/lib/Support/GraphWriter.cpp | 2 -- llvm/lib/Support/InitLLVM.cpp | 6 +++--- llvm/lib/Support/JSON.cpp | 1 + llvm/lib/Support/MSP430AttributeParser.cpp | 3 ++- llvm/lib/Support/MemoryBuffer.cpp | 5 ++--- llvm/lib/Support/NativeFormatting.cpp | 1 - llvm/lib/Support/PrettyStackTrace.cpp | 1 - llvm/lib/Support/ScopedPrinter.cpp | 1 - llvm/lib/Support/Signals.cpp | 1 + llvm/lib/Support/Signposts.cpp | 1 - llvm/lib/Support/SmallPtrSet.cpp | 1 - llvm/lib/Support/SmallVector.cpp | 1 + llvm/lib/Support/SpecialCaseList.cpp | 1 - llvm/lib/Support/StringMap.cpp | 1 - llvm/lib/Support/SymbolRemappingReader.cpp | 1 + llvm/lib/Support/TargetParser.cpp | 2 -- llvm/lib/Support/ThreadPool.cpp | 1 - llvm/lib/Support/TimeProfiler.cpp | 2 +- llvm/lib/Support/ToolOutputFile.cpp | 1 - llvm/lib/Support/Triple.cpp | 2 +- llvm/lib/Support/TypeSize.cpp | 1 + llvm/lib/Support/VirtualFileSystem.cpp | 3 --- llvm/lib/Support/X86TargetParser.cpp | 1 - llvm/lib/Support/YAMLParser.cpp | 1 - llvm/lib/Support/YAMLTraits.cpp | 2 -- llvm/lib/Support/raw_ostream.cpp | 2 -- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 1 + llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 1 + llvm/lib/Target/ARM/ARMSubtarget.cpp | 1 + llvm/lib/Target/ARM/ARMTargetMachine.cpp | 1 + llvm/tools/llvm-diff/llvm-diff.cpp | 1 + llvm/tools/llvm-lto/llvm-lto.cpp | 1 + llvm/tools/llvm-modextract/llvm-modextract.cpp | 1 + llvm/tools/llvm-reduce/llvm-reduce.cpp | 1 + llvm/tools/llvm-split/llvm-split.cpp | 1 + llvm/tools/llvm-stress/llvm-stress.cpp | 1 + llvm/unittests/Support/TargetParserTest.cpp | 3 ++- llvm/unittests/Support/raw_ostream_test.cpp | 1 + mlir/lib/TableGen/Format.cpp | 2 ++ mlir/lib/TableGen/Predicate.cpp | 1 + mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp | 1 + mlir/tools/mlir-tblgen/OpFormatGen.cpp | 1 + 90 files changed, 63 insertions(+), 93 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index b9e6e3214c44d..ebddce0c1c73e 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -15,6 +15,7 @@ #include "OSTargets.h" #include "clang/Basic/TargetBuiltins.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/TargetParser.h" namespace clang { diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 40c658f3f40e2..f074dac57f9b3 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -18,6 +18,7 @@ #include "clang/Basic/TargetOptions.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" namespace clang { diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 34505319af1bc..403fac76f0602 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Path.h" #include "llvm/Support/SpecialCaseList.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 89a77a368ef02..ca0ca4bf4eeac 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -11,6 +11,7 @@ #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/Host.h" diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 1055d7800b63e..16af9f6d71295 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -12,6 +12,7 @@ #include "clang/Driver/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/Host.h" diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.h b/clang/lib/Driver/ToolChains/Arch/ARM.h index 881b63bd36b9c..862a2f2796be5 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.h +++ b/clang/lib/Driver/ToolChains/Arch/ARM.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Option/Option.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" #include #include diff --git a/clang/tools/libclang/BuildSystem.cpp b/clang/tools/libclang/BuildSystem.cpp index 0d69dcf1725e1..2f638ee8700d9 100644 --- a/clang/tools/libclang/BuildSystem.cpp +++ b/clang/tools/libclang/BuildSystem.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemAlloc.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" diff --git a/lldb/source/Host/common/Socket.cpp b/lldb/source/Host/common/Socket.cpp index 1c74a8fb59029..d8b8f54a6468d 100644 --- a/lldb/source/Host/common/Socket.cpp +++ b/lldb/source/Host/common/Socket.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Errno.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/WindowsError.h" #if LLDB_ENABLE_POSIX diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h index 2af59865b8ffe..7d6b3e92f6b27 100644 --- a/llvm/include/llvm/ADT/Optional.h +++ b/llvm/include/llvm/ADT/Optional.h @@ -21,7 +21,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/type_traits.h" #include -#include #include #include diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 321546fec0130..9347f01a4191f 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -15,8 +15,6 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemAlloc.h" #include "llvm/Support/type_traits.h" #include #include diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index e19d03705acdd..3d6c512bfe73d 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -27,6 +27,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/MD5.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/VersionTuple.h" #include diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 716b94d92d032..e2d2784d4f238 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -34,6 +34,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/ScopedPrinter.h" #include #include #include diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index e53f16a8dd425..d094c704d291c 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -14,7 +14,6 @@ #ifndef LLVM_SUPPORT_AARCH64TARGETPARSER_H #define LLVM_SUPPORT_AARCH64TARGETPARSER_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ARMTargetParser.h" #include diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h index 1cc30af5eaf03..cbb5701540e14 100644 --- a/llvm/include/llvm/Support/ARMAttributeParser.h +++ b/llvm/include/llvm/Support/ARMAttributeParser.h @@ -11,14 +11,13 @@ #include "ARMBuildAttributes.h" #include "ELFAttributeParser.h" -#include "ScopedPrinter.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Endian.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" namespace llvm { +class ScopedPrinter; + class ARMAttributeParser : public ELFAttributeParser { struct DisplayHandler { ARMBuildAttrs::AttrType attribute; diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index 3b1d11e3f75f8..ec5ed06b7fa4b 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -22,16 +22,12 @@ #include "llvm/Support/Alignment.h" #include "llvm/Support/AllocatorBase.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/MemAlloc.h" #include #include #include #include -#include #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h index 29b4b09b848c3..c664ac48daad1 100644 --- a/llvm/include/llvm/Support/BinaryStreamReader.h +++ b/llvm/include/llvm/Support/BinaryStreamReader.h @@ -10,7 +10,6 @@ #define LLVM_SUPPORT_BINARYSTREAMREADER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/BinaryStreamArray.h" @@ -18,7 +17,6 @@ #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include "llvm/Support/type_traits.h" #include namespace llvm { diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h index e0aaab82ffab5..bc8c6a496ecf7 100644 --- a/llvm/include/llvm/Support/BinaryStreamRef.h +++ b/llvm/include/llvm/Support/BinaryStreamRef.h @@ -14,7 +14,6 @@ #include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamError.h" #include "llvm/Support/Error.h" -#include #include #include diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h index 3054f4ac7ef00..c05b0420aaa33 100644 --- a/llvm/include/llvm/Support/BinaryStreamWriter.h +++ b/llvm/include/llvm/Support/BinaryStreamWriter.h @@ -10,7 +10,6 @@ #define LLVM_SUPPORT_BINARYSTREAMWRITER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamError.h" diff --git a/llvm/include/llvm/Support/BlockFrequency.h b/llvm/include/llvm/Support/BlockFrequency.h index 18fb60e1904b3..bf0ad46ab4994 100644 --- a/llvm/include/llvm/Support/BlockFrequency.h +++ b/llvm/include/llvm/Support/BlockFrequency.h @@ -13,12 +13,11 @@ #ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H #define LLVM_SUPPORT_BLOCKFREQUENCY_H -#include "llvm/Support/BranchProbability.h" -#include "llvm/Support/DataTypes.h" +#include namespace llvm { -class raw_ostream; +class BranchProbability; // This class represents Block Frequency as a 64-bit value. class BlockFrequency { diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h index 6c7ad1fe2a52c..6f071c15421f1 100644 --- a/llvm/include/llvm/Support/BranchProbability.h +++ b/llvm/include/llvm/Support/BranchProbability.h @@ -16,7 +16,6 @@ #include "llvm/Support/DataTypes.h" #include #include -#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h index 1add185330fa0..a23aad6884d03 100644 --- a/llvm/include/llvm/Support/ConvertUTF.h +++ b/llvm/include/llvm/Support/ConvertUTF.h @@ -91,7 +91,6 @@ #include #include -#include // Wrap everything in namespace llvm so that programs can link with llvm and // their own version of the unicode libraries. diff --git a/llvm/include/llvm/Support/ELFAttributeParser.h b/llvm/include/llvm/Support/ELFAttributeParser.h index 8bf87b2d84f05..3062dfffff68a 100644 --- a/llvm/include/llvm/Support/ELFAttributeParser.h +++ b/llvm/include/llvm/Support/ELFAttributeParser.h @@ -10,15 +10,16 @@ #define LLVM_SUPPORT_ELFATTRIBUTEPARSER_H #include "ELFAttributes.h" -#include "ScopedPrinter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include namespace llvm { class StringRef; +class ScopedPrinter; class ELFAttributeParser { StringRef vendor; diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index e2002b89ada26..3997f0ea6db79 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -14,7 +14,6 @@ #define LLVM_SUPPORT_ERROR_H #include "llvm-c/Error.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" diff --git a/llvm/include/llvm/Support/ExtensibleRTTI.h b/llvm/include/llvm/Support/ExtensibleRTTI.h index 21055247e9327..d3193be6f529e 100644 --- a/llvm/include/llvm/Support/ExtensibleRTTI.h +++ b/llvm/include/llvm/Support/ExtensibleRTTI.h @@ -62,8 +62,6 @@ namespace llvm { -template class RTTIExtends; - /// Base class for the extensible RTTI hierarchy. /// /// This class defines virtual methods, dynamicClassID and isA, that enable diff --git a/llvm/include/llvm/Support/FileCollector.h b/llvm/include/llvm/Support/FileCollector.h index 264fb55c9dba7..232dc8658aa38 100644 --- a/llvm/include/llvm/Support/FileCollector.h +++ b/llvm/include/llvm/Support/FileCollector.h @@ -9,7 +9,6 @@ #ifndef LLVM_SUPPORT_FILECOLLECTOR_H #define LLVM_SUPPORT_FILECOLLECTOR_H -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/VirtualFileSystem.h" diff --git a/llvm/include/llvm/Support/FileUtilities.h b/llvm/include/llvm/Support/FileUtilities.h index 04efdced32a4a..f8a37fe1177d9 100644 --- a/llvm/include/llvm/Support/FileUtilities.h +++ b/llvm/include/llvm/Support/FileUtilities.h @@ -15,10 +15,10 @@ #define LLVM_SUPPORT_FILEUTILITIES_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" + +#include namespace llvm { diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h index 89575f01b7171..a872afb5e45e5 100644 --- a/llvm/include/llvm/Support/FormatVariadic.h +++ b/llvm/include/llvm/Support/FormatVariadic.h @@ -29,16 +29,17 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatCommon.h" #include "llvm/Support/FormatProviders.h" #include "llvm/Support/FormatVariadicDetails.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include #include -#include namespace llvm { diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h index 1c0f5f702c6d9..515057e7e312f 100644 --- a/llvm/include/llvm/Support/GraphWriter.h +++ b/llvm/include/llvm/Support/GraphWriter.h @@ -28,8 +28,6 @@ #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" -#include -#include #include #include #include diff --git a/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h b/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h index 8e1b3d631983d..aa7997a0228ba 100644 --- a/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h +++ b/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h @@ -14,7 +14,6 @@ #ifndef LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H #define LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H -#include #include namespace llvm { diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h index 93aaa82f4c0b1..b450c1df3558d 100644 --- a/llvm/include/llvm/Support/RISCVISAInfo.h +++ b/llvm/include/llvm/Support/RISCVISAInfo.h @@ -9,8 +9,6 @@ #ifndef LLVM_SUPPORT_RISCVISAINFO_H #define LLVM_SUPPORT_RISCVISAINFO_H -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/Support/SymbolRemappingReader.h b/llvm/include/llvm/Support/SymbolRemappingReader.h index 820cf9e021920..4fdaf87be082a 100644 --- a/llvm/include/llvm/Support/SymbolRemappingReader.h +++ b/llvm/include/llvm/Support/SymbolRemappingReader.h @@ -62,10 +62,11 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include "llvm/Support/ItaniumManglingCanonicalizer.h" -#include "llvm/Support/MemoryBuffer.h" namespace llvm { +class MemoryBuffer; + class SymbolRemappingParseError : public ErrorInfo { public: SymbolRemappingParseError(StringRef File, int64_t Line, const Twine &Message) diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index 01e25a0ea857c..1d7594ebedc22 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -17,8 +17,6 @@ // FIXME: vector is used because that's what clang uses for subtarget feature // lists, but SmallVector would probably be better #include "llvm/ADT/Triple.h" -#include "llvm/Support/AArch64TargetParser.h" -#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/RISCVISAInfo.h" #include diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h index 84794a25f78e5..6141acc99db28 100644 --- a/llvm/include/llvm/Support/TimeProfiler.h +++ b/llvm/include/llvm/Support/TimeProfiler.h @@ -10,10 +10,11 @@ #define LLVM_SUPPORT_TIMEPROFILER_H #include "llvm/Support/Error.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { +class raw_pwrite_stream; + struct TimeTraceProfiler; TimeTraceProfiler *getTimeTraceProfilerInstance(); diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h index c5874ed356988..5a55491b3276a 100644 --- a/llvm/include/llvm/Support/Timer.h +++ b/llvm/include/llvm/Support/Timer.h @@ -14,12 +14,10 @@ #include "llvm/Support/DataTypes.h" #include #include -#include #include namespace llvm { -class Timer; class TimerGroup; class raw_ostream; diff --git a/llvm/include/llvm/Support/TrigramIndex.h b/llvm/include/llvm/Support/TrigramIndex.h index 0be6a1012718b..f772deca03014 100644 --- a/llvm/include/llvm/Support/TrigramIndex.h +++ b/llvm/include/llvm/Support/TrigramIndex.h @@ -33,7 +33,6 @@ #include namespace llvm { -class StringRef; class TrigramIndex { public: diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 7d1274735a373..6bddb602e8c19 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -17,7 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index bea232e6e0000..66529075e2e71 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -18,7 +18,6 @@ #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/Regex.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/VersionTuple.h" @@ -26,9 +25,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include -#include -#include #include #include #include diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp index debee6e52b720..27614572766d4 100644 --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -21,8 +21,10 @@ #include "llvm/Debuginfod/HTTPClient.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Caching.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Path.h" #include "llvm/Support/xxhash.h" namespace llvm { diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp index 0659cf6a2d41e..576eb8d069d62 100644 --- a/llvm/lib/Object/Object.cpp +++ b/llvm/lib/Object/Object.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/MachOUniversal.h" +#include "llvm/Support/MemAlloc.h" using namespace llvm; using namespace object; diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index 4940b61602d19..b536e9a9a6d02 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -24,9 +24,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include -#include #include using namespace llvm; diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp index 4b07fb7c87140..908e56319025d 100644 --- a/llvm/lib/Support/ARMAttributeParser.cpp +++ b/llvm/lib/Support/ARMAttributeParser.cpp @@ -9,8 +9,6 @@ #include "llvm/Support/ARMAttributeParser.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/LEB128.h" #include "llvm/Support/ScopedPrinter.h" using namespace llvm; diff --git a/llvm/lib/Support/ARMWinEH.cpp b/llvm/lib/Support/ARMWinEH.cpp index 2e2fcf28451ff..8e7fa1149082f 100644 --- a/llvm/lib/Support/ARMWinEH.cpp +++ b/llvm/lib/Support/ARMWinEH.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ARMWinEH.h" -#include "llvm/Support/raw_ostream.h" namespace llvm { namespace ARM { diff --git a/llvm/lib/Support/BlockFrequency.cpp b/llvm/lib/Support/BlockFrequency.cpp index 2b63294f3789e..702165ac480b4 100644 --- a/llvm/lib/Support/BlockFrequency.cpp +++ b/llvm/lib/Support/BlockFrequency.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include using namespace llvm; diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp index a6daee00bd431..f1b730e2b58c4 100644 --- a/llvm/lib/Support/DAGDeltaAlgorithm.cpp +++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include #include using namespace llvm; diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp index 133d674275e8c..8cf3121911539 100644 --- a/llvm/lib/Support/DataExtractor.cpp +++ b/llvm/lib/Support/DataExtractor.cpp @@ -9,7 +9,6 @@ #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/SwapByteOrder.h" diff --git a/llvm/lib/Support/ELFAttributeParser.cpp b/llvm/lib/Support/ELFAttributeParser.cpp index 1206553343efe..cf8a666e92bc0 100644 --- a/llvm/lib/Support/ELFAttributeParser.cpp +++ b/llvm/lib/Support/ELFAttributeParser.cpp @@ -7,10 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ELFAttributeParser.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/LEB128.h" #include "llvm/Support/ScopedPrinter.h" using namespace llvm; diff --git a/llvm/lib/Support/FileOutputBuffer.cpp b/llvm/lib/Support/FileOutputBuffer.cpp index 4b4406c4c9f4b..c11ee59da0dda 100644 --- a/llvm/lib/Support/FileOutputBuffer.cpp +++ b/llvm/lib/Support/FileOutputBuffer.cpp @@ -11,11 +11,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileOutputBuffer.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Memory.h" -#include "llvm/Support/Path.h" #include #if !defined(_MSC_VER) && !defined(__MINGW32__) diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp index dbe28e56b2c37..1a14ed2673606 100644 --- a/llvm/lib/Support/FileUtilities.cpp +++ b/llvm/lib/Support/FileUtilities.cpp @@ -12,15 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileUtilities.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp index 696e6b7a99d8c..6e6d79b225ac8 100644 --- a/llvm/lib/Support/GraphWriter.cpp +++ b/llvm/lib/Support/GraphWriter.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" @@ -26,7 +25,6 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp index 152de6ebae0ac..8c6f86f68fa24 100644 --- a/llvm/lib/Support/InitLLVM.cpp +++ b/llvm/lib/Support/InitLLVM.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/InitLLVM.h" -#include "llvm/Support/Error.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" -#include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" -#include +#include "llvm/Support/SwapByteOrder.h" #ifdef _WIN32 #include "llvm/Support/Windows/WindowsSupport.h" diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp index 17b36ed51850d..20babbe56d861 100644 --- a/llvm/lib/Support/JSON.cpp +++ b/llvm/lib/Support/JSON.cpp @@ -12,6 +12,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/NativeFormatting.h" #include namespace llvm { diff --git a/llvm/lib/Support/MSP430AttributeParser.cpp b/llvm/lib/Support/MSP430AttributeParser.cpp index a9948a158fc01..a230a3a70adb6 100644 --- a/llvm/lib/Support/MSP430AttributeParser.cpp +++ b/llvm/lib/Support/MSP430AttributeParser.cpp @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/MSP430AttributeParser.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; using namespace llvm::MSP430Attrs; diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp index 1bbdafd082a45..7192fb1321cb6 100644 --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -13,9 +13,9 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" -#include "llvm/Support/AutoConvert.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Errc.h" -#include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" @@ -23,7 +23,6 @@ #include "llvm/Support/Program.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" #include -#include #include #include #include diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp index 254d18d797b3e..0a797046bb684 100644 --- a/llvm/lib/Support/NativeFormatting.cpp +++ b/llvm/lib/Support/NativeFormatting.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp index 0d07057f1df05..5d3335d001f31 100644 --- a/llvm/lib/Support/PrettyStackTrace.cpp +++ b/llvm/lib/Support/PrettyStackTrace.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm-c/ErrorHandling.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SaveAndRestore.h" diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp index ea90a24eaceda..a434e50e8c1fb 100644 --- a/llvm/lib/Support/ScopedPrinter.cpp +++ b/llvm/lib/Support/ScopedPrinter.cpp @@ -1,7 +1,6 @@ #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/Format.h" -#include using namespace llvm::support; diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp index c018dc92bf408..5ce41c9870299 100644 --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" diff --git a/llvm/lib/Support/Signposts.cpp b/llvm/lib/Support/Signposts.cpp index 58fafb26cdf3a..074dddc81c808 100644 --- a/llvm/lib/Support/Signposts.cpp +++ b/llvm/lib/Support/Signposts.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Signposts.h" -#include "llvm/Support/Timer.h" #include "llvm/Config/config.h" #if LLVM_SUPPORT_XCODE_SIGNPOSTS diff --git a/llvm/lib/Support/SmallPtrSet.cpp b/llvm/lib/Support/SmallPtrSet.cpp index f6e2dfb8a6c91..cbb87ea8717cf 100644 --- a/llvm/lib/Support/SmallPtrSet.cpp +++ b/llvm/lib/Support/SmallPtrSet.cpp @@ -13,7 +13,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemAlloc.h" #include diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp index 2d7721e4e1fb6..8cafbc7fad0de 100644 --- a/llvm/lib/Support/SmallVector.cpp +++ b/llvm/lib/Support/SmallVector.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/MemAlloc.h" #include #ifdef LLVM_ENABLE_EXCEPTIONS #include diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 1939ed9e9547b..137b37f2b1c3c 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -15,7 +15,6 @@ #include "llvm/Support/SpecialCaseList.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/VirtualFileSystem.h" diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp index f65d3846623c8..012c785b4351d 100644 --- a/llvm/lib/Support/StringMap.cpp +++ b/llvm/lib/Support/StringMap.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/DJB.h" #include "llvm/Support/MathExtras.h" diff --git a/llvm/lib/Support/SymbolRemappingReader.cpp b/llvm/lib/Support/SymbolRemappingReader.cpp index 1caf0947216ea..90997ab0a6cea 100644 --- a/llvm/lib/Support/SymbolRemappingReader.cpp +++ b/llvm/lib/Support/SymbolRemappingReader.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" using namespace llvm; diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index bc60bdea5f62e..c14ddbeea5c39 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -13,9 +13,7 @@ #include "llvm/Support/TargetParser.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" #include "llvm/Support/ARMBuildAttributes.h" using namespace llvm; diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index 6eec368e626ff..bf2584950c4ac 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -14,7 +14,6 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Threading.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp index 2b094a4983a08..a727bfa51731d 100644 --- a/llvm/lib/Support/TimeProfiler.cpp +++ b/llvm/lib/Support/TimeProfiler.cpp @@ -13,8 +13,8 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Threading.h" diff --git a/llvm/lib/Support/ToolOutputFile.cpp b/llvm/lib/Support/ToolOutputFile.cpp index c192ce60f31c9..c2ca97a59c620 100644 --- a/llvm/lib/Support/ToolOutputFile.cpp +++ b/llvm/lib/Support/ToolOutputFile.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ToolOutputFile.h" -#include "llvm/ADT/Triple.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Signals.h" using namespace llvm; diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index 1452fa62f5fdc..20dea8c302a5e 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -14,7 +14,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" -#include "llvm/Support/TargetParser.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/VersionTuple.h" #include #include diff --git a/llvm/lib/Support/TypeSize.cpp b/llvm/lib/Support/TypeSize.cpp index abb81016a0bad..a80fde83e3bc7 100644 --- a/llvm/lib/Support/TypeSize.cpp +++ b/llvm/lib/Support/TypeSize.cpp @@ -8,6 +8,7 @@ #include "llvm/Support/TypeSize.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/WithColor.h" #include "DebugOptions.h" diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index a963beb180bae..f15e301874c44 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -35,7 +35,6 @@ #include "llvm/Support/FileSystem/UniqueID.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" @@ -46,9 +45,7 @@ #include #include #include -#include #include -#include #include #include #include diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index ab49ac548f89a..10f9692d217e9 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -12,7 +12,6 @@ #include "llvm/Support/X86TargetParser.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" #include using namespace llvm; diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index 2adf37a511d15..200261d3ed5c0 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/Unicode.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index 416423298bc6d..b79d520900d2c 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -18,13 +18,11 @@ #include "llvm/Support/Format.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Unicode.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include #include #include -#include #include #include #include diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 073f370ba34e3..1b1b0af79ae8d 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -24,10 +24,8 @@ #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include -#include #include #include -#include #include // may provide O_BINARY. diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index df8f1091bc452..f4d046078d68e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/TargetParser.h" using namespace llvm; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2928b6c299168..33ed7ae9780e6 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 36c4bbaafcbf8..16befa4fff040 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -35,6 +35,7 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Target/TargetOptions.h" diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 0b314ac2a41e1..c38970f8e3414 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -43,6 +43,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ARMTargetParser.h" #include "llvm/Support/TargetParser.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" diff --git a/llvm/tools/llvm-diff/llvm-diff.cpp b/llvm/tools/llvm-diff/llvm-diff.cpp index d9d19f35ffee8..7349469c80d6d 100644 --- a/llvm/tools/llvm-diff/llvm-diff.cpp +++ b/llvm/tools/llvm-diff/llvm-diff.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/WithColor.h" #include #include diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp index c9021b135c6cf..d78c4dff7db48 100644 --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/WithColor.h" #include "llvm/Target/TargetOptions.h" #include #include diff --git a/llvm/tools/llvm-modextract/llvm-modextract.cpp b/llvm/tools/llvm-modextract/llvm-modextract.cpp index 9a44cbf68d0de..b1d6bfb790ec0 100644 --- a/llvm/tools/llvm-modextract/llvm-modextract.cpp +++ b/llvm/tools/llvm-modextract/llvm-modextract.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" using namespace llvm; diff --git a/llvm/tools/llvm-reduce/llvm-reduce.cpp b/llvm/tools/llvm-reduce/llvm-reduce.cpp index e07351aaa385b..59cc055a0870e 100644 --- a/llvm/tools/llvm-reduce/llvm-reduce.cpp +++ b/llvm/tools/llvm-reduce/llvm-reduce.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/WithColor.h" #include "llvm/Target/TargetMachine.h" #include #include diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp index 6de28dc611ecb..c6e20e0373c71 100644 --- a/llvm/tools/llvm-split/llvm-split.cpp +++ b/llvm/tools/llvm-split/llvm-split.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/WithColor.h" #include "llvm/Transforms/Utils/SplitModule.h" using namespace llvm; diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp index f2be4e7d0712c..bb11c18b57fa4 100644 --- a/llvm/tools/llvm-stress/llvm-stress.cpp +++ b/llvm/tools/llvm-stress/llvm-stress.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/InitLLVM.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/WithColor.h" #include #include #include diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 8fe13caab0ca7..768ec83a0e126 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/TargetParser.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/AArch64TargetParser.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/TargetParser.h" #include "gtest/gtest.h" #include diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index 74587c76236a6..5125af8ce8a4b 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Format.h" diff --git a/mlir/lib/TableGen/Format.cpp b/mlir/lib/TableGen/Format.cpp index 917d1f5b50fff..7209cafcab7db 100644 --- a/mlir/lib/TableGen/Format.cpp +++ b/mlir/lib/TableGen/Format.cpp @@ -13,6 +13,8 @@ //===----------------------------------------------------------------------===// #include "mlir/TableGen/Format.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include using namespace mlir; diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp index f9b6f6719efef..3492c2f89077f 100644 --- a/mlir/lib/TableGen/Predicate.cpp +++ b/mlir/lib/TableGen/Predicate.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp index e5b75561e7f8a..0dc1ef426d02a 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp @@ -14,6 +14,7 @@ #include "mlir/TableGen/Format.h" #include "mlir/TableGen/GenInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 8a61e3795b5b4..7a722305d0c31 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Signals.h" #include "llvm/TableGen/Error.h" From 2b8e4c6e5fbd5ec3bf7b75fd6b1e11d66fde78a9 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 14:01:51 +0100 Subject: [PATCH 175/946] Add missing header in Support/ConvertUTF.h --- llvm/include/llvm/Support/ConvertUTF.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h index a23aad6884d03..1add185330fa0 100644 --- a/llvm/include/llvm/Support/ConvertUTF.h +++ b/llvm/include/llvm/Support/ConvertUTF.h @@ -91,6 +91,7 @@ #include #include +#include // Wrap everything in namespace llvm so that programs can link with llvm and // their own version of the unicode libraries. From 38ac4093d9d2ae28d631ca1cc5802533989165c5 Mon Sep 17 00:00:00 2001 From: Archibald Elliott Date: Fri, 21 Jan 2022 13:14:58 +0000 Subject: [PATCH 176/946] [NFCI][Support] Avoid ASSERT_/EXPECT_TRUE(A B) The error messages in tests are far better when a test fails if the test is written using ASSERT_/EXPECT_(A, B) rather than ASSERT_/EXPECT_TRUE(A B). This commit updates all of llvm/unittests/Support to use these macros where possible. This change has not been possible in: - llvm/unittests/Support/FSUniqueIDTest.cpp - due to not overloading operators beyond ==, != and <. - llvm/unittests/Support/BranchProbabilityTest.cpp - where the unchanged tests are of the operator overloads themselves. There are other possibilities of this conversion not being valid, which have not applied in these tests, as they do not use NULL (they use nullptr), and they do not use const char* (they use std::string or StringRef). Reviewed By: mubashar_ Differential Revision: https://reviews.llvm.org/D117319 --- llvm/unittests/Support/Casting.cpp | 34 +++---- llvm/unittests/Support/CommandLineTest.cpp | 89 +++++++++---------- .../DynamicLibrary/DynamicLibraryTest.cpp | 42 +++++---- llvm/unittests/Support/ErrorTest.cpp | 23 +++-- llvm/unittests/Support/FSUniqueIDTest.cpp | 6 +- .../unittests/Support/IndexedAccessorTest.cpp | 2 +- llvm/unittests/Support/JSONTest.cpp | 4 +- llvm/unittests/Support/MemoryBufferTest.cpp | 20 ++--- llvm/unittests/Support/Path.cpp | 2 +- llvm/unittests/Support/ProgramTest.cpp | 4 +- llvm/unittests/Support/TarWriterTest.cpp | 4 +- llvm/unittests/Support/TargetParserTest.cpp | 26 +++--- llvm/unittests/Support/TimerTest.cpp | 2 +- llvm/unittests/Support/UnicodeTest.cpp | 6 +- .../Support/VirtualFileSystemTest.cpp | 76 ++++++++-------- llvm/unittests/Support/YAMLIOTest.cpp | 86 +++++++++--------- llvm/unittests/Support/YAMLParserTest.cpp | 24 ++--- llvm/unittests/Support/raw_ostream_test.cpp | 2 +- 18 files changed, 235 insertions(+), 217 deletions(-) diff --git a/llvm/unittests/Support/Casting.cpp b/llvm/unittests/Support/Casting.cpp index a196fc2ec5ed3..e99c0d4860315 100644 --- a/llvm/unittests/Support/Casting.cpp +++ b/llvm/unittests/Support/Casting.cpp @@ -283,7 +283,7 @@ TEST(CastingTest, UpcastIsInferred) { Derived D; EXPECT_TRUE(isa(D)); Base *BP = dyn_cast(&D); - EXPECT_TRUE(BP != nullptr); + EXPECT_NE(BP, nullptr); } @@ -379,31 +379,31 @@ TEST(CastingTest, smart_isa) { } TEST(CastingTest, smart_cast) { - EXPECT_TRUE(cast(MD) == &D); - EXPECT_TRUE(cast(CD) == &D); + EXPECT_EQ(cast(MD), &D); + EXPECT_EQ(cast(CD), &D); } TEST(CastingTest, smart_cast_or_null) { - EXPECT_TRUE(cast_or_null(MN) == nullptr); - EXPECT_TRUE(cast_or_null(CN) == nullptr); - EXPECT_TRUE(cast_or_null(MD) == &D); - EXPECT_TRUE(cast_or_null(CD) == &D); + EXPECT_EQ(cast_or_null(MN), nullptr); + EXPECT_EQ(cast_or_null(CN), nullptr); + EXPECT_EQ(cast_or_null(MD), &D); + EXPECT_EQ(cast_or_null(CD), &D); } TEST(CastingTest, smart_dyn_cast) { - EXPECT_TRUE(dyn_cast(MB) == nullptr); - EXPECT_TRUE(dyn_cast(CB) == nullptr); - EXPECT_TRUE(dyn_cast(MD) == &D); - EXPECT_TRUE(dyn_cast(CD) == &D); + EXPECT_EQ(dyn_cast(MB), nullptr); + EXPECT_EQ(dyn_cast(CB), nullptr); + EXPECT_EQ(dyn_cast(MD), &D); + EXPECT_EQ(dyn_cast(CD), &D); } TEST(CastingTest, smart_dyn_cast_or_null) { - EXPECT_TRUE(dyn_cast_or_null(MN) == nullptr); - EXPECT_TRUE(dyn_cast_or_null(CN) == nullptr); - EXPECT_TRUE(dyn_cast_or_null(MB) == nullptr); - EXPECT_TRUE(dyn_cast_or_null(CB) == nullptr); - EXPECT_TRUE(dyn_cast_or_null(MD) == &D); - EXPECT_TRUE(dyn_cast_or_null(CD) == &D); + EXPECT_EQ(dyn_cast_or_null(MN), nullptr); + EXPECT_EQ(dyn_cast_or_null(CN), nullptr); + EXPECT_EQ(dyn_cast_or_null(MB), nullptr); + EXPECT_EQ(dyn_cast_or_null(CB), nullptr); + EXPECT_EQ(dyn_cast_or_null(MD), &D); + EXPECT_EQ(dyn_cast_or_null(CD), &D); } } // end namespace pointer_wrappers diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp index 4e1160fe2dbc5..8032d0e7bf403 100644 --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -101,8 +101,7 @@ TEST(CommandLineTest, ModifyExisitingOption) { StringMap &Map = cl::getRegisteredOptions(*cl::TopLevelSubCommand); - ASSERT_TRUE(Map.count("test-option") == 1) << - "Could not find option in map."; + ASSERT_EQ(Map.count("test-option"), 1u) << "Could not find option in map."; cl::Option *Retrieved = Map["test-option"]; ASSERT_EQ(&TestOption, Retrieved) << "Retrieved wrong option."; @@ -701,7 +700,7 @@ TEST(CommandLineTest, DefaultOptions) { const char *args0[] = {"prog", "-b", "args0 bar string", "-f"}; EXPECT_TRUE(cl::ParseCommandLineOptions(sizeof(args0) / sizeof(char *), args0, StringRef(), &llvm::nulls())); - EXPECT_TRUE(Bar == "args0 bar string"); + EXPECT_EQ(Bar, "args0 bar string"); EXPECT_TRUE(Foo); EXPECT_FALSE(SC1_B); EXPECT_TRUE(SC2_Foo.empty()); @@ -711,7 +710,7 @@ TEST(CommandLineTest, DefaultOptions) { const char *args1[] = {"prog", "sc1", "-b", "-bar", "args1 bar string", "-f"}; EXPECT_TRUE(cl::ParseCommandLineOptions(sizeof(args1) / sizeof(char *), args1, StringRef(), &llvm::nulls())); - EXPECT_TRUE(Bar == "args1 bar string"); + EXPECT_EQ(Bar, "args1 bar string"); EXPECT_TRUE(Foo); EXPECT_TRUE(SC1_B); EXPECT_TRUE(SC2_Foo.empty()); @@ -727,10 +726,10 @@ TEST(CommandLineTest, DefaultOptions) { "-f", "-foo", "foo string"}; EXPECT_TRUE(cl::ParseCommandLineOptions(sizeof(args2) / sizeof(char *), args2, StringRef(), &llvm::nulls())); - EXPECT_TRUE(Bar == "args2 bar string"); + EXPECT_EQ(Bar, "args2 bar string"); EXPECT_TRUE(Foo); EXPECT_FALSE(SC1_B); - EXPECT_TRUE(SC2_Foo == "foo string"); + EXPECT_EQ(SC2_Foo, "foo string"); for (auto *S : cl::getRegisteredSubcommands()) { if (*S) { EXPECT_EQ("sc2", S->getName()); @@ -777,8 +776,8 @@ TEST(CommandLineTest, ResponseFileWindows) { EXPECT_TRUE( cl::ParseCommandLineOptions(2, args, StringRef(), &llvm::nulls())); EXPECT_TRUE(TopLevelOpt); - EXPECT_TRUE(InputFilenames[0] == "path\\dir\\file1"); - EXPECT_TRUE(InputFilenames[1] == "path/dir/file2"); + EXPECT_EQ(InputFilenames[0], "path\\dir\\file1"); + EXPECT_EQ(InputFilenames[1], "path/dir/file2"); } TEST(CommandLineTest, ResponseFiles) { @@ -1011,9 +1010,9 @@ TEST(CommandLineTest, SetDefautValue) { EXPECT_TRUE( cl::ParseCommandLineOptions(2, args, StringRef(), &llvm::nulls())); - EXPECT_TRUE(Opt1 == "false"); + EXPECT_EQ(Opt1, "false"); EXPECT_TRUE(Opt2); - EXPECT_TRUE(Opt3 == 3); + EXPECT_EQ(Opt3, 3); Opt2 = false; Opt3 = 1; @@ -1028,9 +1027,9 @@ TEST(CommandLineTest, SetDefautValue) { O->setDefault(); } - EXPECT_TRUE(Opt1 == "true"); + EXPECT_EQ(Opt1, "true"); EXPECT_TRUE(Opt2); - EXPECT_TRUE(Opt3 == 3); + EXPECT_EQ(Opt3, 3); Alias.removeArgument(); } @@ -1135,8 +1134,8 @@ TEST(CommandLineTest, PositionalEatArgsError) { cl::ResetAllOptionOccurrences(); EXPECT_TRUE(cl::ParseCommandLineOptions(6, args4, StringRef(), &OS)); OS.flush(); - EXPECT_TRUE(PosEatArgs.size() == 1); - EXPECT_TRUE(PosEatArgs2.size() == 2); + EXPECT_EQ(PosEatArgs.size(), 1u); + EXPECT_EQ(PosEatArgs2.size(), 2u); EXPECT_TRUE(Errs.empty()); } @@ -1412,8 +1411,8 @@ TEST(CommandLineTest, PrefixOptions) { const char *args[] = {"prog", "-I=/usr/include"}; EXPECT_TRUE( cl::ParseCommandLineOptions(2, args, StringRef(), &llvm::nulls())); - EXPECT_TRUE(IncludeDirs.size() == 1); - EXPECT_TRUE(IncludeDirs.front().compare("/usr/include") == 0); + EXPECT_EQ(IncludeDirs.size(), 1u); + EXPECT_EQ(IncludeDirs.front().compare("/usr/include"), 0); IncludeDirs.erase(IncludeDirs.begin()); cl::ResetAllOptionOccurrences(); @@ -1424,8 +1423,8 @@ TEST(CommandLineTest, PrefixOptions) { const char *args2[] = {"prog", "-I", "/usr/include"}; EXPECT_TRUE( cl::ParseCommandLineOptions(3, args2, StringRef(), &llvm::nulls())); - EXPECT_TRUE(IncludeDirs.size() == 1); - EXPECT_TRUE(IncludeDirs.front().compare("/usr/include") == 0); + EXPECT_EQ(IncludeDirs.size(), 1u); + EXPECT_EQ(IncludeDirs.front().compare("/usr/include"), 0); IncludeDirs.erase(IncludeDirs.begin()); cl::ResetAllOptionOccurrences(); @@ -1435,8 +1434,8 @@ TEST(CommandLineTest, PrefixOptions) { const char *args3[] = {"prog", "-I/usr/include"}; EXPECT_TRUE( cl::ParseCommandLineOptions(2, args3, StringRef(), &llvm::nulls())); - EXPECT_TRUE(IncludeDirs.size() == 1); - EXPECT_TRUE(IncludeDirs.front().compare("/usr/include") == 0); + EXPECT_EQ(IncludeDirs.size(), 1u); + EXPECT_EQ(IncludeDirs.front().compare("/usr/include"), 0); StackOption> MacroDefs( "D", cl::AlwaysPrefix, cl::desc("Define a macro"), @@ -1450,8 +1449,8 @@ TEST(CommandLineTest, PrefixOptions) { const char *args4[] = {"prog", "-D=HAVE_FOO"}; EXPECT_TRUE( cl::ParseCommandLineOptions(2, args4, StringRef(), &llvm::nulls())); - EXPECT_TRUE(MacroDefs.size() == 1); - EXPECT_TRUE(MacroDefs.front().compare("=HAVE_FOO") == 0); + EXPECT_EQ(MacroDefs.size(), 1u); + EXPECT_EQ(MacroDefs.front().compare("=HAVE_FOO"), 0); MacroDefs.erase(MacroDefs.begin()); cl::ResetAllOptionOccurrences(); @@ -1471,8 +1470,8 @@ TEST(CommandLineTest, PrefixOptions) { const char *args6[] = {"prog", "-DHAVE_FOO"}; EXPECT_TRUE( cl::ParseCommandLineOptions(2, args6, StringRef(), &llvm::nulls())); - EXPECT_TRUE(MacroDefs.size() == 1); - EXPECT_TRUE(MacroDefs.front().compare("HAVE_FOO") == 0); + EXPECT_EQ(MacroDefs.size(), 1u); + EXPECT_EQ(MacroDefs.front().compare("HAVE_FOO"), 0); } TEST(CommandLineTest, GroupingWithValue) { @@ -1757,12 +1756,12 @@ TEST(CommandLineTest, OptionErrorMessage) { OptA.error("custom error", OS); OS.flush(); - EXPECT_FALSE(Errs.find("for the -a option:") == std::string::npos); + EXPECT_NE(Errs.find("for the -a option:"), std::string::npos); Errs.clear(); OptLong.error("custom error", OS); OS.flush(); - EXPECT_FALSE(Errs.find("for the --long option:") == std::string::npos); + EXPECT_NE(Errs.find("for the --long option:"), std::string::npos); Errs.clear(); cl::ResetAllOptionOccurrences(); @@ -1785,8 +1784,8 @@ TEST(CommandLineTest, OptionErrorMessageSuggest) { EXPECT_FALSE(cl::ParseCommandLineOptions(2, args, StringRef(), &OS)); OS.flush(); - EXPECT_FALSE(Errs.find("prog: Did you mean '--aluminium'?\n") == - std::string::npos); + EXPECT_NE(Errs.find("prog: Did you mean '--aluminium'?\n"), + std::string::npos); Errs.clear(); cl::ResetAllOptionOccurrences(); @@ -1808,8 +1807,8 @@ TEST(CommandLineTest, OptionErrorMessageSuggestNoHidden) { EXPECT_FALSE(cl::ParseCommandLineOptions(2, args, StringRef(), &OS)); OS.flush(); - EXPECT_FALSE(Errs.find("prog: Did you mean '--aluminium'?\n") == - std::string::npos); + EXPECT_NE(Errs.find("prog: Did you mean '--aluminium'?\n"), + std::string::npos); Errs.clear(); cl::ResetAllOptionOccurrences(); @@ -1840,7 +1839,7 @@ TEST(CommandLineTest, Callback) { EXPECT_TRUE(OptA); EXPECT_FALSE(OptB); EXPECT_FALSE(OptC); - EXPECT_TRUE(List.size() == 0); + EXPECT_EQ(List.size(), 0u); cl::ResetAllOptionOccurrences(); const char *args2[] = {"prog", "-b"}; @@ -1848,7 +1847,7 @@ TEST(CommandLineTest, Callback) { EXPECT_TRUE(OptA); EXPECT_TRUE(OptB); EXPECT_FALSE(OptC); - EXPECT_TRUE(List.size() == 0); + EXPECT_EQ(List.size(), 0u); cl::ResetAllOptionOccurrences(); const char *args3[] = {"prog", "-c"}; @@ -1856,7 +1855,7 @@ TEST(CommandLineTest, Callback) { EXPECT_TRUE(OptA); EXPECT_TRUE(OptB); EXPECT_TRUE(OptC); - EXPECT_TRUE(List.size() == 0); + EXPECT_EQ(List.size(), 0u); cl::ResetAllOptionOccurrences(); const char *args4[] = {"prog", "--list=foo,bar"}; @@ -1864,7 +1863,7 @@ TEST(CommandLineTest, Callback) { EXPECT_TRUE(OptA); EXPECT_TRUE(OptB); EXPECT_TRUE(OptC); - EXPECT_TRUE(List.size() == 2); + EXPECT_EQ(List.size(), 2u); cl::ResetAllOptionOccurrences(); const char *args5[] = {"prog", "--list=bar"}; @@ -1872,7 +1871,7 @@ TEST(CommandLineTest, Callback) { EXPECT_FALSE(OptA); EXPECT_FALSE(OptB); EXPECT_FALSE(OptC); - EXPECT_TRUE(List.size() == 1); + EXPECT_EQ(List.size(), 1u); cl::ResetAllOptionOccurrences(); } @@ -1899,9 +1898,9 @@ TEST(CommandLineTest, ConsumeAfterOnePositional) { EXPECT_TRUE(cl::ParseCommandLineOptions(4, Args, StringRef(), &OS)); OS.flush(); EXPECT_EQ("input", Input); - EXPECT_TRUE(ExtraArgs.size() == 2); - EXPECT_TRUE(ExtraArgs[0] == "arg1"); - EXPECT_TRUE(ExtraArgs[1] == "arg2"); + EXPECT_EQ(ExtraArgs.size(), 2u); + EXPECT_EQ(ExtraArgs[0], "arg1"); + EXPECT_EQ(ExtraArgs[1], "arg2"); EXPECT_TRUE(Errs.empty()); } @@ -1923,9 +1922,9 @@ TEST(CommandLineTest, ConsumeAfterTwoPositionals) { OS.flush(); EXPECT_EQ("input1", Input1); EXPECT_EQ("input2", Input2); - EXPECT_TRUE(ExtraArgs.size() == 2); - EXPECT_TRUE(ExtraArgs[0] == "arg1"); - EXPECT_TRUE(ExtraArgs[1] == "arg2"); + EXPECT_EQ(ExtraArgs.size(), 2u); + EXPECT_EQ(ExtraArgs[0], "arg1"); + EXPECT_EQ(ExtraArgs[1], "arg2"); EXPECT_TRUE(Errs.empty()); } @@ -1946,17 +1945,17 @@ TEST(CommandLineTest, ResetAllOptionOccurrences) { EXPECT_TRUE(OS.str().empty()); EXPECT_TRUE(Option); - EXPECT_EQ(1, (int)Sink.size()); + EXPECT_EQ(1u, Sink.size()); EXPECT_EQ("-unknown", Sink[0]); EXPECT_EQ("input", Input); - EXPECT_EQ(1, (int)ExtraArgs.size()); + EXPECT_EQ(1u, ExtraArgs.size()); EXPECT_EQ("-arg", ExtraArgs[0]); cl::ResetAllOptionOccurrences(); EXPECT_FALSE(Option); - EXPECT_EQ(0, (int)Sink.size()); + EXPECT_EQ(0u, Sink.size()); EXPECT_EQ(0, Input.getNumOccurrences()); - EXPECT_EQ(0, (int)ExtraArgs.size()); + EXPECT_EQ(0u, ExtraArgs.size()); } } // anonymous namespace diff --git a/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp b/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp index 6c9063775a59d..784b9c1bb2d27 100644 --- a/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp +++ b/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp @@ -66,11 +66,13 @@ TEST(DynamicLibrary, Overload) { EXPECT_TRUE(Err.empty()); GetString GS = FuncPtr(DL.getAddressOfSymbol("TestA")); - EXPECT_TRUE(GS != nullptr && GS != &TestA); + EXPECT_NE(GS, nullptr); + EXPECT_NE(GS, &TestA); EXPECT_EQ(StdString(GS()), "LibCall"); GS = FuncPtr(DynamicLibrary::SearchForAddressOfSymbol("TestA")); - EXPECT_TRUE(GS != nullptr && GS != &TestA); + EXPECT_NE(GS, nullptr); + EXPECT_NE(GS, &TestA); EXPECT_EQ(StdString(GS()), "LibCall"); DL = DynamicLibrary::getPermanentLibrary(nullptr, &Err); @@ -79,32 +81,37 @@ TEST(DynamicLibrary, Overload) { // Test overloading local symbols does not occur by default GS = FuncPtr(DynamicLibrary::SearchForAddressOfSymbol("TestA")); - EXPECT_TRUE(GS != nullptr && GS == &TestA); + EXPECT_NE(GS, nullptr); + EXPECT_EQ(GS, &TestA); EXPECT_EQ(StdString(GS()), "ProcessCall"); GS = FuncPtr(DL.getAddressOfSymbol("TestA")); - EXPECT_TRUE(GS != nullptr && GS == &TestA); + EXPECT_NE(GS, nullptr); + EXPECT_EQ(GS, &TestA); EXPECT_EQ(StdString(GS()), "ProcessCall"); // Test overloading by forcing library priority when searching for a symbol DynamicLibrary::SearchOrder = DynamicLibrary::SO_LoadedFirst; GS = FuncPtr(DynamicLibrary::SearchForAddressOfSymbol("TestA")); - EXPECT_TRUE(GS != nullptr && GS != &TestA); + EXPECT_NE(GS, nullptr); + EXPECT_NE(GS, &TestA); EXPECT_EQ(StdString(GS()), "LibCall"); DynamicLibrary::AddSymbol("TestA", PtrFunc(&OverloadTestA)); GS = FuncPtr(DL.getAddressOfSymbol("TestA")); - EXPECT_TRUE(GS != nullptr && GS != &OverloadTestA); + EXPECT_NE(GS, nullptr); + EXPECT_NE(GS, &OverloadTestA); GS = FuncPtr(DynamicLibrary::SearchForAddressOfSymbol("TestA")); - EXPECT_TRUE(GS != nullptr && GS == &OverloadTestA); + EXPECT_NE(GS, nullptr); + EXPECT_EQ(GS, &OverloadTestA); EXPECT_EQ(StdString(GS()), "OverloadCall"); } EXPECT_TRUE(FuncPtr(DynamicLibrary::SearchForAddressOfSymbol( "TestA")) == nullptr); // Check serach ordering is reset to default after call to llvm_shutdown - EXPECT_TRUE(DynamicLibrary::SearchOrder == DynamicLibrary::SO_Linker); + EXPECT_EQ(DynamicLibrary::SearchOrder, DynamicLibrary::SO_Linker); } TEST(DynamicLibrary, Shutdown) { @@ -120,15 +127,15 @@ TEST(DynamicLibrary, Shutdown) { SetStrings SS_0 = FuncPtr( DynamicLibrary::SearchForAddressOfSymbol("SetStrings")); - EXPECT_TRUE(SS_0 != nullptr); + EXPECT_NE(SS_0, nullptr); SS_0(A, B); EXPECT_EQ(B, "Local::Local(PipSqueak)"); TestOrder TO_0 = FuncPtr( DynamicLibrary::SearchForAddressOfSymbol("TestOrder")); - EXPECT_TRUE(TO_0 != nullptr); - + EXPECT_NE(TO_0, nullptr); + DynamicLibrary DL2 = DynamicLibrary::getPermanentLibrary(LibPath(C).c_str(), &Err); EXPECT_TRUE(DL2.isValid()); @@ -137,13 +144,13 @@ TEST(DynamicLibrary, Shutdown) { // Should find latest version of symbols in SecondLib SetStrings SS_1 = FuncPtr( DynamicLibrary::SearchForAddressOfSymbol("SetStrings")); - EXPECT_TRUE(SS_1 != nullptr); - EXPECT_TRUE(SS_0 != SS_1); + EXPECT_NE(SS_1, nullptr); + EXPECT_NE(SS_0, SS_1); TestOrder TO_1 = FuncPtr( DynamicLibrary::SearchForAddressOfSymbol("TestOrder")); - EXPECT_TRUE(TO_1 != nullptr); - EXPECT_TRUE(TO_0 != TO_1); + EXPECT_NE(TO_1, nullptr); + EXPECT_NE(TO_0, TO_1); B.clear(); SS_1(C, B); @@ -154,8 +161,9 @@ TEST(DynamicLibrary, Shutdown) { } EXPECT_EQ(A, "Global::~Global"); EXPECT_EQ(B, "Local::~Local"); - EXPECT_TRUE(FuncPtr(DynamicLibrary::SearchForAddressOfSymbol( - "SetStrings")) == nullptr); + EXPECT_EQ(FuncPtr( + DynamicLibrary::SearchForAddressOfSymbol("SetStrings")), + nullptr); // Test unload/destruction ordering EXPECT_EQ(Order.size(), 2UL); diff --git a/llvm/unittests/Support/ErrorTest.cpp b/llvm/unittests/Support/ErrorTest.cpp index 3ca94b2a2aef9..d4daceda2b14a 100644 --- a/llvm/unittests/Support/ErrorTest.cpp +++ b/llvm/unittests/Support/ErrorTest.cpp @@ -179,7 +179,7 @@ TEST(Error, HandleCustomError) { CaughtErrorInfo = CE.getInfo(); }); - EXPECT_TRUE(CaughtErrorInfo == 42) << "Wrong result from CustomError handler"; + EXPECT_EQ(CaughtErrorInfo, 42) << "Wrong result from CustomError handler"; } // Check that handler type deduction also works for handlers @@ -253,7 +253,8 @@ TEST(Error, HandleCustomErrorWithCustomBaseClass) { CaughtErrorExtraInfo = SE.getExtraInfo(); }); - EXPECT_TRUE(CaughtErrorInfo == 42 && CaughtErrorExtraInfo == 7) + EXPECT_EQ(CaughtErrorInfo, 42) << "Wrong result from CustomSubError handler"; + EXPECT_EQ(CaughtErrorExtraInfo, 7) << "Wrong result from CustomSubError handler"; } @@ -270,9 +271,9 @@ TEST(Error, FirstHandlerOnly) { }, [&](const CustomError &CE) { DummyInfo = CE.getInfo(); }); - EXPECT_TRUE(CaughtErrorInfo == 42 && CaughtErrorExtraInfo == 7 && - DummyInfo == 0) - << "Activated the wrong Error handler(s)"; + EXPECT_EQ(CaughtErrorInfo, 42) << "Activated the wrong Error handler(s)"; + EXPECT_EQ(CaughtErrorExtraInfo, 7) << "Activated the wrong Error handler(s)"; + EXPECT_EQ(DummyInfo, 0) << "Activated the wrong Error handler(s)"; } // Check that general handlers shadow specific ones. @@ -289,7 +290,11 @@ TEST(Error, HandlerShadowing) { DummyExtraInfo = SE.getExtraInfo(); }); - EXPECT_TRUE(CaughtErrorInfo == 42 && DummyInfo == 0 && DummyExtraInfo == 0) + EXPECT_EQ(CaughtErrorInfo, 42) + << "General Error handler did not shadow specific handler"; + EXPECT_EQ(DummyInfo, 0) + << "General Error handler did not shadow specific handler"; + EXPECT_EQ(DummyExtraInfo, 0) << "General Error handler did not shadow specific handler"; } @@ -317,9 +322,9 @@ TEST(Error, CheckJoinErrors) { CustomErrorInfo1 = CE.getInfo(); }); - EXPECT_TRUE(CustomErrorInfo1 == 7 && CustomErrorInfo2 == 42 && - CustomErrorExtraInfo == 7) - << "Failed handling compound Error."; + EXPECT_EQ(CustomErrorInfo1, 7) << "Failed handling compound Error."; + EXPECT_EQ(CustomErrorInfo2, 42) << "Failed handling compound Error."; + EXPECT_EQ(CustomErrorExtraInfo, 7) << "Failed handling compound Error."; // Test appending a single item to a list. { diff --git a/llvm/unittests/Support/FSUniqueIDTest.cpp b/llvm/unittests/Support/FSUniqueIDTest.cpp index 6c794730e39dc..81e5088bf24cb 100644 --- a/llvm/unittests/Support/FSUniqueIDTest.cpp +++ b/llvm/unittests/Support/FSUniqueIDTest.cpp @@ -20,9 +20,9 @@ TEST(FSUniqueIDTest, construct) { } TEST(FSUniqueIDTest, equals) { - EXPECT_TRUE(UniqueID(20, 10) == UniqueID(20, 10)); - EXPECT_FALSE(UniqueID(20, 20) == UniqueID(20, 10)); - EXPECT_FALSE(UniqueID(10, 10) == UniqueID(20, 10)); + EXPECT_EQ(UniqueID(20, 10), UniqueID(20, 10)); + EXPECT_NE(UniqueID(20, 20), UniqueID(20, 10)); + EXPECT_NE(UniqueID(10, 10), UniqueID(20, 10)); } TEST(FSUniqueIDTest, less) { diff --git a/llvm/unittests/Support/IndexedAccessorTest.cpp b/llvm/unittests/Support/IndexedAccessorTest.cpp index 9981e91df100e..501d7a6ea2ec0 100644 --- a/llvm/unittests/Support/IndexedAccessorTest.cpp +++ b/llvm/unittests/Support/IndexedAccessorTest.cpp @@ -32,7 +32,7 @@ struct ArrayIndexedAccessorRange template static void compareData(ArrayIndexedAccessorRange range, ArrayRef referenceData) { - ASSERT_TRUE(referenceData.size() == range.size()); + ASSERT_EQ(referenceData.size(), range.size()); ASSERT_TRUE(std::equal(range.begin(), range.end(), referenceData.begin())); } diff --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp index f28c99819d053..ecfd2a5fe1a09 100644 --- a/llvm/unittests/Support/JSONTest.cpp +++ b/llvm/unittests/Support/JSONTest.cpp @@ -138,9 +138,9 @@ TEST(JSONTest, Object) { EXPECT_FALSE(O.try_emplace("a", 4).second); auto D = O.find("d"); - EXPECT_FALSE(D == O.end()); + EXPECT_NE(D, O.end()); auto E = O.find("e"); - EXPECT_TRUE(E == O.end()); + EXPECT_EQ(E, O.end()); O.erase("b"); O.erase(D); diff --git a/llvm/unittests/Support/MemoryBufferTest.cpp b/llvm/unittests/Support/MemoryBufferTest.cpp index c3e7b3c926a6c..bcd25021b5635 100644 --- a/llvm/unittests/Support/MemoryBufferTest.cpp +++ b/llvm/unittests/Support/MemoryBufferTest.cpp @@ -75,15 +75,15 @@ class MemoryBufferTest : public testing::Test { TEST_F(MemoryBufferTest, get) { // Default name and null-terminator flag OwningBuffer MB1(MemoryBuffer::getMemBuffer(data)); - EXPECT_TRUE(nullptr != MB1.get()); + EXPECT_NE(nullptr, MB1.get()); // RequiresNullTerminator = false OwningBuffer MB2(MemoryBuffer::getMemBuffer(data, "one", false)); - EXPECT_TRUE(nullptr != MB2.get()); + EXPECT_NE(nullptr, MB2.get()); // RequiresNullTerminator = true OwningBuffer MB3(MemoryBuffer::getMemBuffer(data, "two", true)); - EXPECT_TRUE(nullptr != MB3.get()); + EXPECT_NE(nullptr, MB3.get()); // verify all 3 buffers point to the same address EXPECT_EQ(MB1->getBufferStart(), MB2->getBufferStart()); @@ -153,11 +153,11 @@ TEST_F(MemoryBufferTest, NullTerminator4K) { TEST_F(MemoryBufferTest, copy) { // copy with no name OwningBuffer MBC1(MemoryBuffer::getMemBufferCopy(data)); - EXPECT_TRUE(nullptr != MBC1.get()); + EXPECT_NE(nullptr, MBC1.get()); // copy with a name OwningBuffer MBC2(MemoryBuffer::getMemBufferCopy(data, "copy")); - EXPECT_TRUE(nullptr != MBC2.get()); + EXPECT_NE(nullptr, MBC2.get()); // verify the two copies do not point to the same place EXPECT_NE(MBC1->getBufferStart(), MBC2->getBufferStart()); @@ -198,25 +198,25 @@ TEST_F(MemoryBufferTest, createFromPipe) { TEST_F(MemoryBufferTest, make_new) { // 0-sized buffer OwningBuffer Zero(WritableMemoryBuffer::getNewUninitMemBuffer(0)); - EXPECT_TRUE(nullptr != Zero.get()); + EXPECT_NE(nullptr, Zero.get()); // uninitialized buffer with no name OwningBuffer One(WritableMemoryBuffer::getNewUninitMemBuffer(321)); - EXPECT_TRUE(nullptr != One.get()); + EXPECT_NE(nullptr, One.get()); // uninitialized buffer with name OwningBuffer Two(WritableMemoryBuffer::getNewUninitMemBuffer(123, "bla")); - EXPECT_TRUE(nullptr != Two.get()); + EXPECT_NE(nullptr, Two.get()); // 0-initialized buffer with no name OwningBuffer Three(WritableMemoryBuffer::getNewMemBuffer(321, data)); - EXPECT_TRUE(nullptr != Three.get()); + EXPECT_NE(nullptr, Three.get()); for (size_t i = 0; i < 321; ++i) EXPECT_EQ(0, Three->getBufferStart()[0]); // 0-initialized buffer with name OwningBuffer Four(WritableMemoryBuffer::getNewMemBuffer(123, "zeros")); - EXPECT_TRUE(nullptr != Four.get()); + EXPECT_NE(nullptr, Four.get()); for (size_t i = 0; i < 123; ++i) EXPECT_EQ(0, Four->getBufferStart()[0]); } diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp index e3fa5d0b2c491..b749448141f79 100644 --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -2315,7 +2315,7 @@ TEST_F(FileSystemTest, widenPath) { for (size_t i = 0; i < NumChars; ++i) Input += Pi; // Check that UTF-8 length already exceeds MAX_PATH. - EXPECT_TRUE(Input.size() > MAX_PATH); + EXPECT_GT(Input.size(), MAX_PATH); SmallVector Result; ASSERT_NO_ERROR(windows::widenPath(Input, Result)); // Result should not start with the long path prefix. diff --git a/llvm/unittests/Support/ProgramTest.cpp b/llvm/unittests/Support/ProgramTest.cpp index d899026a358a0..fbbcd847e3c7a 100644 --- a/llvm/unittests/Support/ProgramTest.cpp +++ b/llvm/unittests/Support/ProgramTest.cpp @@ -287,8 +287,8 @@ TEST(ProgramTest, TestExecuteNegative) { bool ExecutionFailed; int RetCode = ExecuteAndWait(Executable, argv, llvm::None, {}, 0, 0, &Error, &ExecutionFailed); - ASSERT_TRUE(RetCode < 0) << "On error ExecuteAndWait should return 0 or " - "positive value indicating the result code"; + ASSERT_LT(RetCode, 0) << "On error ExecuteAndWait should return 0 or " + "positive value indicating the result code"; ASSERT_TRUE(ExecutionFailed); ASSERT_FALSE(Error.empty()); } diff --git a/llvm/unittests/Support/TarWriterTest.cpp b/llvm/unittests/Support/TarWriterTest.cpp index 5a7d901a2e9d0..b5f072431fd15 100644 --- a/llvm/unittests/Support/TarWriterTest.cpp +++ b/llvm/unittests/Support/TarWriterTest.cpp @@ -68,7 +68,7 @@ static std::vector createTar(StringRef Base, StringRef Filename) { static UstarHeader createUstar(StringRef Base, StringRef Filename) { std::vector Buf = createTar(Base, Filename); - EXPECT_TRUE(Buf.size() >= sizeof(UstarHeader)); + EXPECT_GE(Buf.size(), sizeof(UstarHeader)); return *reinterpret_cast(Buf.data()); } @@ -112,7 +112,7 @@ TEST_F(TarWriterTest, LongFilename) { TEST_F(TarWriterTest, Pax) { std::vector Buf = createTar("", std::string(200, 'x')); - EXPECT_TRUE(Buf.size() >= 1024); + EXPECT_GE(Buf.size(), 1024u); auto *Hdr = reinterpret_cast(Buf.data()); EXPECT_EQ("", StringRef(Hdr->Prefix)); diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 768ec83a0e126..a9a8de6d291a3 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -687,13 +687,13 @@ TEST(TargetParserTest, ARMExtensionFeatures) { Features.clear(); ARM::getExtensionFeatures(E.first, Features); EXPECT_TRUE(llvm::is_contained(Features, E.second.at(0))); - EXPECT_TRUE(Extensions.size() == Features.size()); + EXPECT_EQ(Extensions.size(), Features.size()); // test -extension Features.clear(); ARM::getExtensionFeatures(~E.first, Features); EXPECT_TRUE(llvm::is_contained(Features, E.second.at(1))); - EXPECT_TRUE(Extensions.size() == Features.size()); + EXPECT_EQ(Extensions.size(), Features.size()); } } @@ -701,10 +701,12 @@ TEST(TargetParserTest, ARMFPUFeatures) { std::vector Features; for (ARM::FPUKind FK = static_cast(0); FK <= ARM::FPUKind::FK_LAST; - FK = static_cast(static_cast(FK) + 1)) - EXPECT_TRUE((FK == ARM::FK_INVALID || FK >= ARM::FK_LAST) - ? !ARM::getFPUFeatures(FK, Features) - : ARM::getFPUFeatures(FK, Features)); + FK = static_cast(static_cast(FK) + 1)) { + if (FK == ARM::FK_INVALID || FK >= ARM::FK_LAST) + EXPECT_FALSE(ARM::getFPUFeatures(FK, Features)); + else + EXPECT_TRUE(ARM::getFPUFeatures(FK, Features)); + } } TEST(TargetParserTest, ARMArchExtFeature) { @@ -1448,7 +1450,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(!Features.size()); AArch64::getExtensionFeatures(ExtVal, Features); - EXPECT_TRUE(Extensions.size() == Features.size()); + EXPECT_EQ(Extensions.size(), Features.size()); EXPECT_TRUE(llvm::is_contained(Features, "+crc")); EXPECT_TRUE(llvm::is_contained(Features, "+crypto")); @@ -1477,10 +1479,12 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { TEST(TargetParserTest, AArch64ArchFeatures) { std::vector Features; - for (auto AK : AArch64::ArchKinds) - EXPECT_TRUE((AK == AArch64::ArchKind::INVALID) - ? !AArch64::getArchFeatures(AK, Features) - : AArch64::getArchFeatures(AK, Features)); + for (auto AK : AArch64::ArchKinds) { + if (AK == AArch64::ArchKind::INVALID) + EXPECT_FALSE(AArch64::getArchFeatures(AK, Features)); + else + EXPECT_TRUE(AArch64::getArchFeatures(AK, Features)); + } } TEST(TargetParserTest, AArch64ArchExtFeature) { diff --git a/llvm/unittests/Support/TimerTest.cpp b/llvm/unittests/Support/TimerTest.cpp index 9a9b67eaa9f78..09545eb6939ae 100644 --- a/llvm/unittests/Support/TimerTest.cpp +++ b/llvm/unittests/Support/TimerTest.cpp @@ -45,7 +45,7 @@ TEST(Timer, Additivity) { T1.stopTimer(); auto TR2 = T1.getTotalTime(); - EXPECT_TRUE(TR1 < TR2); + EXPECT_LT(TR1, TR2); } TEST(Timer, CheckIfTriggered) { diff --git a/llvm/unittests/Support/UnicodeTest.cpp b/llvm/unittests/Support/UnicodeTest.cpp index 6ce323dc8f380..09f1cb3e1ff66 100644 --- a/llvm/unittests/Support/UnicodeTest.cpp +++ b/llvm/unittests/Support/UnicodeTest.cpp @@ -95,9 +95,9 @@ TEST(Unicode, isPrintable) { UTF32 *Target32 = &buf32[0]; auto status = ConvertUTF8toUTF32(&Target8, Target8 + 1, &Target32, Target32 + 1, strictConversion); - EXPECT_TRUE(status == conversionOK); - EXPECT_TRUE((columnWidthUTF8(reinterpret_cast(buf8)) == 1) == - (bool)isPrintable(buf32[0])); + EXPECT_EQ(status, conversionOK); + EXPECT_EQ((columnWidthUTF8(reinterpret_cast(buf8)) == 1), + (bool)isPrintable(buf32[0])); } } diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp index caae58f74f636..6b191c6401685 100644 --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp @@ -567,7 +567,8 @@ TEST(VirtualFileSystemTest, BasicRealFSRecursiveIteration) { for (const std::string &Name : Contents) { ASSERT_FALSE(Name.empty()); int Index = Name[Name.size() - 1] - 'a'; - ASSERT_TRUE(Index >= 0 && Index < 4); + ASSERT_GE(Index, 0); + ASSERT_LT(Index, 4); Counts[Index]++; } EXPECT_EQ(1, Counts[0]); // a @@ -644,7 +645,8 @@ TEST(VirtualFileSystemTest, BasicRealFSRecursiveIterationNoPush) { for (const std::string &Name : Contents) { ASSERT_FALSE(Name.empty()); int Index = Name[Name.size() - 1] - 'a'; - ASSERT_TRUE(Index >= 0 && Index < 7); + ASSERT_GE(Index, 0); + ASSERT_LT(Index, 7); Counts[Index]++; } EXPECT_EQ(1, Counts[0]); // a @@ -1183,9 +1185,9 @@ TEST_F(InMemoryFileSystemTest, AddHardLinkToFile) { FS.addFile(Target, 0, MemoryBuffer::getMemBuffer("content of target")); EXPECT_TRUE(FS.addHardLink(FromLink, Target)); EXPECT_THAT(FromLink, IsHardLinkTo(&FS, Target)); - EXPECT_TRUE(FS.status(FromLink)->getSize() == FS.status(Target)->getSize()); - EXPECT_TRUE(FS.getBufferForFile(FromLink)->get()->getBuffer() == - FS.getBufferForFile(Target)->get()->getBuffer()); + EXPECT_EQ(FS.status(FromLink)->getSize(), FS.status(Target)->getSize()); + EXPECT_EQ(FS.getBufferForFile(FromLink)->get()->getBuffer(), + FS.getBufferForFile(Target)->get()->getBuffer()); } TEST_F(InMemoryFileSystemTest, AddHardLinkInChainPattern) { @@ -1381,7 +1383,7 @@ TEST_F(VFSFromYAMLTest, MappedFiles) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -1474,7 +1476,7 @@ TEST_F(VFSFromYAMLTest, MappedRoot) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -1522,7 +1524,7 @@ TEST_F(VFSFromYAMLTest, RemappedDirectoryOverlay) { " ]\n" "}]}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -1567,7 +1569,7 @@ TEST_F(VFSFromYAMLTest, RemappedDirectoryOverlayNoExternalNames) { " ]\n" "}]}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); ErrorOr S = FS->status("//root/foo"); ASSERT_FALSE(S.getError()); @@ -1608,7 +1610,7 @@ TEST_F(VFSFromYAMLTest, RemappedDirectoryOverlayNoFallthrough) { " ]\n" "}]}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); ErrorOr S = Lower->status("//root/foo"); ASSERT_FALSE(S.getError()); @@ -1752,7 +1754,7 @@ TEST_F(VFSFromYAMLTest, CaseInsensitive) { " ]\n" "}]}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -1788,7 +1790,7 @@ TEST_F(VFSFromYAMLTest, CaseSensitive) { " ]\n" "}]}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -1930,7 +1932,7 @@ TEST_F(VFSFromYAMLTest, UseExternalName) { " }\n" "] }", Lower); - ASSERT_TRUE(nullptr != FS.get()); + ASSERT_NE(nullptr, FS.get()); // default true EXPECT_EQ("//root/external/file", FS->status("//root/A")->getName()); @@ -1954,7 +1956,7 @@ TEST_F(VFSFromYAMLTest, UseExternalName) { " }\n" "] }", Lower); - ASSERT_TRUE(nullptr != FS.get()); + ASSERT_NE(nullptr, FS.get()); // default EXPECT_EQ("//root/A", FS->status("//root/A")->getName()); @@ -1974,7 +1976,7 @@ TEST_F(VFSFromYAMLTest, MultiComponentPath) { " 'external-contents': '//root/other' }]\n" "}", Lower); - ASSERT_TRUE(nullptr != FS.get()); + ASSERT_NE(nullptr, FS.get()); EXPECT_FALSE(FS->status("//root/path/to/file").getError()); EXPECT_FALSE(FS->status("//root/path/to").getError()); EXPECT_FALSE(FS->status("//root/path").getError()); @@ -1988,7 +1990,7 @@ TEST_F(VFSFromYAMLTest, MultiComponentPath) { " 'external-contents': '//root/other' }]}]\n" "}", Lower); - ASSERT_TRUE(nullptr != FS.get()); + ASSERT_NE(nullptr, FS.get()); EXPECT_FALSE(FS->status("//root/path/to/file").getError()); EXPECT_FALSE(FS->status("//root/path/to").getError()); EXPECT_FALSE(FS->status("//root/path").getError()); @@ -2002,7 +2004,7 @@ TEST_F(VFSFromYAMLTest, MultiComponentPath) { " 'external-contents': '//root/other' }]}]\n" "}", Lower); - ASSERT_TRUE(nullptr != FS.get()); + ASSERT_NE(nullptr, FS.get()); EXPECT_FALSE(FS->status("//root/path/to/file").getError()); EXPECT_FALSE(FS->status("//root/path/to").getError()); EXPECT_FALSE(FS->status("//root/path").getError()); @@ -2021,7 +2023,7 @@ TEST_F(VFSFromYAMLTest, TrailingSlashes) { " 'external-contents': '//root/other' }]}]\n" "}", Lower); - ASSERT_TRUE(nullptr != FS.get()); + ASSERT_NE(nullptr, FS.get()); EXPECT_FALSE(FS->status("//root/path/to/file").getError()); EXPECT_FALSE(FS->status("//root/path/to").getError()); EXPECT_FALSE(FS->status("//root/path").getError()); @@ -2057,7 +2059,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIteration) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -2107,7 +2109,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIterationSameDirMultipleEntries) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -2142,7 +2144,7 @@ TEST_F(VFSFromYAMLTest, RecursiveDirectoryIterationLevel) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); IntrusiveRefCntPtr O( new vfs::OverlayFileSystem(Lower)); @@ -2238,7 +2240,7 @@ TEST_F(VFSFromYAMLTest, NonFallthroughDirectoryIteration) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); std::error_code EC; checkContents(FS->dir_begin("//root/", EC), @@ -2266,7 +2268,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIterationWithDuplicates) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); std::error_code EC; checkContents(FS->dir_begin("//root/", EC), @@ -2295,7 +2297,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIterationErrorInVFSLayer) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); std::error_code EC; checkContents(FS->dir_begin("//root/foo", EC), @@ -2328,7 +2330,7 @@ TEST_F(VFSFromYAMLTest, GetRealPath) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); // Regular file present in underlying file system. SmallString<16> RealPath; @@ -2370,7 +2372,7 @@ TEST_F(VFSFromYAMLTest, WorkingDirectory) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); std::error_code EC = FS->setCurrentWorkingDirectory("//root/bar"); ASSERT_FALSE(EC); @@ -2439,10 +2441,10 @@ TEST_F(VFSFromYAMLTest, WorkingDirectoryFallthrough) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); std::error_code EC = FS->setCurrentWorkingDirectory("//root/"); ASSERT_FALSE(EC); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); llvm::ErrorOr Status = FS->status("bar/a"); ASSERT_FALSE(Status.getError()); @@ -2511,10 +2513,10 @@ TEST_F(VFSFromYAMLTest, WorkingDirectoryFallthroughInvalid) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); std::error_code EC = FS->setCurrentWorkingDirectory("//root/"); ASSERT_FALSE(EC); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); llvm::ErrorOr Status = FS->status("bar/a"); ASSERT_FALSE(Status.getError()); @@ -2548,10 +2550,10 @@ TEST_F(VFSFromYAMLTest, VirtualWorkingDirectory) { "]\n" "}", Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); std::error_code EC = FS->setCurrentWorkingDirectory("//root/bar"); ASSERT_FALSE(EC); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); llvm::ErrorOr Status = FS->status("a"); ASSERT_FALSE(Status.getError()); @@ -2596,7 +2598,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest) { Lower->addDirectory("//root/h"); IntrusiveRefCntPtr FS = getFromYAMLRawString(Buffer, Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); EXPECT_TRUE(FS->exists(_a.path())); EXPECT_TRUE(FS->exists(_ab.path())); @@ -2636,7 +2638,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest2) { IntrusiveRefCntPtr Lower(new ErrorDummyFileSystem()); IntrusiveRefCntPtr FS = getFromYAMLRawString(Buffer, Lower); - EXPECT_TRUE(FS.get() != nullptr); + EXPECT_NE(FS.get(), nullptr); } TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest3) { @@ -2669,7 +2671,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest3) { IntrusiveRefCntPtr Lower(new ErrorDummyFileSystem()); IntrusiveRefCntPtr FS = getFromYAMLRawString(Buffer, Lower); - EXPECT_TRUE(FS.get() != nullptr); + EXPECT_NE(FS.get(), nullptr); } TEST_F(VFSFromYAMLTest, YAMLVFSWriterTestHandleDirs) { @@ -2689,7 +2691,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTestHandleDirs) { OS.flush(); // We didn't add a single file - only directories. - EXPECT_TRUE(Buffer.find("'type': 'file'") == std::string::npos); + EXPECT_EQ(Buffer.find("'type': 'file'"), std::string::npos); IntrusiveRefCntPtr Lower(new ErrorDummyFileSystem()); Lower->addDirectory("//root/a"); @@ -2701,7 +2703,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTestHandleDirs) { Lower->addRegularFile("//root/c/c"); IntrusiveRefCntPtr FS = getFromYAMLRawString(Buffer, Lower); - ASSERT_TRUE(FS.get() != nullptr); + ASSERT_NE(FS.get(), nullptr); EXPECT_FALSE(FS->exists(_a.path("a"))); EXPECT_FALSE(FS->exists(_b.path("b"))); diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp index 3e1efbc755011..a9230c2ca4767 100644 --- a/llvm/unittests/Support/YAMLIOTest.cpp +++ b/llvm/unittests/Support/YAMLIOTest.cpp @@ -398,8 +398,8 @@ TEST(YAMLIO, TestReadBuiltInTypes) { yin >> map; EXPECT_FALSE(yin.error()); - EXPECT_TRUE(map.str.equals("hello there")); - EXPECT_TRUE(map.stdstr == "hello where?"); + EXPECT_EQ(map.str, "hello there"); + EXPECT_EQ(map.stdstr, "hello where?"); EXPECT_EQ(map.u64, 5000000000ULL); EXPECT_EQ(map.u32, 4000000000U); EXPECT_EQ(map.u16, 65000); @@ -454,23 +454,23 @@ TEST(YAMLIO, TestReadWriteBuiltInTypes) { yin >> map; EXPECT_FALSE(yin.error()); - EXPECT_TRUE(map.str.equals("one two")); - EXPECT_TRUE(map.stdstr == "three four"); - EXPECT_EQ(map.u64, 6000000000ULL); - EXPECT_EQ(map.u32, 3000000000U); - EXPECT_EQ(map.u16, 50000); - EXPECT_EQ(map.u8, 254); - EXPECT_EQ(map.b, true); - EXPECT_EQ(map.s64, -6000000000LL); - EXPECT_EQ(map.s32, -2000000000L); - EXPECT_EQ(map.s16, -32000); - EXPECT_EQ(map.s8, -128); - EXPECT_EQ(map.f, 3.25); - EXPECT_EQ(map.d, -2.8625); - EXPECT_EQ(map.h8, Hex8(254)); - EXPECT_EQ(map.h16, Hex16(50000)); - EXPECT_EQ(map.h32, Hex32(3000000000U)); - EXPECT_EQ(map.h64, Hex64(6000000000LL)); + EXPECT_EQ(map.str, "one two"); + EXPECT_EQ(map.stdstr, "three four"); + EXPECT_EQ(map.u64, 6000000000ULL); + EXPECT_EQ(map.u32, 3000000000U); + EXPECT_EQ(map.u16, 50000); + EXPECT_EQ(map.u8, 254); + EXPECT_EQ(map.b, true); + EXPECT_EQ(map.s64, -6000000000LL); + EXPECT_EQ(map.s32, -2000000000L); + EXPECT_EQ(map.s16, -32000); + EXPECT_EQ(map.s8, -128); + EXPECT_EQ(map.f, 3.25); + EXPECT_EQ(map.d, -2.8625); + EXPECT_EQ(map.h8, Hex8(254)); + EXPECT_EQ(map.h16, Hex16(50000)); + EXPECT_EQ(map.h32, Hex32(3000000000U)); + EXPECT_EQ(map.h64, Hex64(6000000000LL)); } } @@ -785,18 +785,18 @@ TEST(YAMLIO, TestReadWriteStringTypes) { yin >> map; EXPECT_FALSE(yin.error()); - EXPECT_TRUE(map.str1.equals("'aaa")); - EXPECT_TRUE(map.str2.equals("\"bbb")); - EXPECT_TRUE(map.str3.equals("`ccc")); - EXPECT_TRUE(map.str4.equals("@ddd")); - EXPECT_TRUE(map.str5.equals("")); - EXPECT_TRUE(map.str6.equals("0000000004000000")); - EXPECT_TRUE(map.stdstr1 == "'eee"); - EXPECT_TRUE(map.stdstr2 == "\"fff"); - EXPECT_TRUE(map.stdstr3 == "`ggg"); - EXPECT_TRUE(map.stdstr4 == "@hhh"); - EXPECT_TRUE(map.stdstr5 == ""); - EXPECT_TRUE(map.stdstr6 == "0000000004000000"); + EXPECT_EQ(map.str1, "'aaa"); + EXPECT_EQ(map.str2, "\"bbb"); + EXPECT_EQ(map.str3, "`ccc"); + EXPECT_EQ(map.str4, "@ddd"); + EXPECT_EQ(map.str5, ""); + EXPECT_EQ(map.str6, "0000000004000000"); + EXPECT_EQ(map.stdstr1, "'eee"); + EXPECT_EQ(map.stdstr2, "\"fff"); + EXPECT_EQ(map.stdstr3, "`ggg"); + EXPECT_EQ(map.stdstr4, "@hhh"); + EXPECT_EQ(map.stdstr5, ""); + EXPECT_EQ(map.stdstr6, "0000000004000000"); EXPECT_EQ(std::string("\0a\0b\0", 5), map.stdstr13); } } @@ -2208,10 +2208,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex8Error) { yin2 >> seq2; EXPECT_TRUE(!!yin2.error()); - EXPECT_TRUE(seq.size() == 3); - EXPECT_TRUE(seq.size() == seq2.size()); + EXPECT_EQ(seq.size(), 3u); + EXPECT_EQ(seq.size(), seq2.size()); for (size_t i = 0; i < seq.size(); ++i) - EXPECT_TRUE(seq[i] == seq2[i]); + EXPECT_EQ(seq[i], seq2[i]); } @@ -2238,10 +2238,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex16Error) { yin2 >> seq2; EXPECT_TRUE(!!yin2.error()); - EXPECT_TRUE(seq.size() == 3); - EXPECT_TRUE(seq.size() == seq2.size()); + EXPECT_EQ(seq.size(), 3u); + EXPECT_EQ(seq.size(), seq2.size()); for (size_t i = 0; i < seq.size(); ++i) - EXPECT_TRUE(seq[i] == seq2[i]); + EXPECT_EQ(seq[i], seq2[i]); } // @@ -2268,10 +2268,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex32Error) { yin2 >> seq2; EXPECT_TRUE(!!yin2.error()); - EXPECT_TRUE(seq.size() == 3); - EXPECT_TRUE(seq.size() == seq2.size()); + EXPECT_EQ(seq.size(), 3u); + EXPECT_EQ(seq.size(), seq2.size()); for (size_t i = 0; i < seq.size(); ++i) - EXPECT_TRUE(seq[i] == seq2[i]); + EXPECT_EQ(seq[i], seq2[i]); } // @@ -2297,10 +2297,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex64Error) { yin2 >> seq2; EXPECT_TRUE(!!yin2.error()); - EXPECT_TRUE(seq.size() == 3); - EXPECT_TRUE(seq.size() == seq2.size()); + EXPECT_EQ(seq.size(), 3u); + EXPECT_EQ(seq.size(), seq2.size()); for (size_t i = 0; i < seq.size(); ++i) - EXPECT_TRUE(seq[i] == seq2[i]); + EXPECT_EQ(seq[i], seq2[i]); } TEST(YAMLIO, TestMalformedMapFailsGracefully) { diff --git a/llvm/unittests/Support/YAMLParserTest.cpp b/llvm/unittests/Support/YAMLParserTest.cpp index adda6ce9f5c30..692d963828318 100644 --- a/llvm/unittests/Support/YAMLParserTest.cpp +++ b/llvm/unittests/Support/YAMLParserTest.cpp @@ -269,9 +269,9 @@ TEST(YAMLParser, SameNodeIteratorOperatorNotEquals) { auto Begin = Node->begin(); auto End = Node->end(); - EXPECT_TRUE(Begin != End); - EXPECT_FALSE(Begin != Begin); - EXPECT_FALSE(End != End); + EXPECT_NE(Begin, End); + EXPECT_EQ(Begin, Begin); + EXPECT_EQ(End, End); } TEST(YAMLParser, SameNodeIteratorOperatorEquals) { @@ -284,9 +284,9 @@ TEST(YAMLParser, SameNodeIteratorOperatorEquals) { auto Begin = Node->begin(); auto End = Node->end(); - EXPECT_FALSE(Begin == End); - EXPECT_TRUE(Begin == Begin); - EXPECT_TRUE(End == End); + EXPECT_NE(Begin, End); + EXPECT_EQ(Begin, Begin); + EXPECT_EQ(End, End); } TEST(YAMLParser, DifferentNodesIteratorOperatorNotEquals) { @@ -305,9 +305,9 @@ TEST(YAMLParser, DifferentNodesIteratorOperatorNotEquals) { auto AnotherBegin = AnotherNode->begin(); auto AnotherEnd = AnotherNode->end(); - EXPECT_TRUE(Begin != AnotherBegin); - EXPECT_TRUE(Begin != AnotherEnd); - EXPECT_FALSE(End != AnotherEnd); + EXPECT_NE(Begin, AnotherBegin); + EXPECT_NE(Begin, AnotherEnd); + EXPECT_EQ(End, AnotherEnd); } TEST(YAMLParser, DifferentNodesIteratorOperatorEquals) { @@ -326,9 +326,9 @@ TEST(YAMLParser, DifferentNodesIteratorOperatorEquals) { auto AnotherBegin = AnotherNode->begin(); auto AnotherEnd = AnotherNode->end(); - EXPECT_FALSE(Begin == AnotherBegin); - EXPECT_FALSE(Begin == AnotherEnd); - EXPECT_TRUE(End == AnotherEnd); + EXPECT_NE(Begin, AnotherBegin); + EXPECT_NE(Begin, AnotherEnd); + EXPECT_EQ(End, AnotherEnd); } TEST(YAMLParser, FlowSequenceTokensOutsideFlowSequence) { diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp index 5125af8ce8a4b..8c695300b745d 100644 --- a/llvm/unittests/Support/raw_ostream_test.cpp +++ b/llvm/unittests/Support/raw_ostream_test.cpp @@ -469,7 +469,7 @@ TEST(raw_ostreamTest, reserve_stream) { OS << "11111111111111111111"; uint64_t CurrentPos = OS.tell(); OS.reserveExtraSpace(1000); - EXPECT_TRUE(Str.capacity() >= CurrentPos + 1000); + EXPECT_GE(Str.capacity(), CurrentPos + 1000); OS << "hello"; OS << 1; OS << 'w' << 'o' << 'r' << 'l' << 'd'; From 51c53a0791cd2794365cab9917922ce1e324b379 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 14:18:17 +0100 Subject: [PATCH 177/946] Add apple-specific missing include --- llvm/lib/Support/PrettyStackTrace.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp index 5d3335d001f31..fa91405fee10a 100644 --- a/llvm/lib/Support/PrettyStackTrace.cpp +++ b/llvm/lib/Support/PrettyStackTrace.cpp @@ -20,6 +20,10 @@ #include "llvm/Support/Watchdog.h" #include "llvm/Support/raw_ostream.h" +#ifdef __APPLE__ +#include "llvm/ADT/SmallString.h" +#endif + #include #include #include From 2a9e33db4f0a558572309b29a7d247185b4c21d1 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 14:28:47 +0100 Subject: [PATCH 178/946] Add ms-specific missing header in Support/InitLLVM.cpp --- llvm/lib/Support/InitLLVM.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp index 8c6f86f68fa24..2b7173b289403 100644 --- a/llvm/lib/Support/InitLLVM.cpp +++ b/llvm/lib/Support/InitLLVM.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/SwapByteOrder.h" #ifdef _WIN32 +#include "llvm/Support/Error.h" #include "llvm/Support/Windows/WindowsSupport.h" #endif From 622354a522073b0a048a88c957b161fb376a40eb Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 12 Jan 2022 16:09:58 +0100 Subject: [PATCH 179/946] [llvm][ADT] Implement `BitVector::{pop_,}back` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLVM Programmer’s Manual strongly discourages the use of `std::vector` and suggests `llvm::BitVector` as a possible replacement. Currently, some users of `std::vector` cannot switch to `llvm::BitVector` because it doesn't implement the `pop_back()` and `back()` functions. To enable easy transition of `std::vector` users, this patch implements `llvm::BitVector::pop_back()` and `llvm::BitVector::back()`. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D117115 --- .../readability/FunctionSizeCheck.cpp | 3 ++- clang-tools-extra/clangd/SourceCode.cpp | 3 ++- clang/lib/Format/UnwrappedLineParser.cpp | 4 +-- clang/lib/Format/UnwrappedLineParser.h | 3 ++- llvm/include/llvm/ADT/BitVector.h | 12 +++++++++ llvm/include/llvm/ADT/SmallBitVector.h | 12 +++++++++ llvm/lib/MC/MCParser/MasmParser.cpp | 3 ++- llvm/unittests/ADT/BitVectorTest.cpp | 26 +++++++++++++++++++ 8 files changed, 60 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp index d98bec2ebdf1f..9d3300d2787ca 100644 --- a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp @@ -9,6 +9,7 @@ #include "FunctionSizeCheck.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "llvm/ADT/BitVector.h" using namespace clang::ast_matchers; @@ -118,7 +119,7 @@ class FunctionASTVisitor : public RecursiveASTVisitor { std::vector NestingThresholders; }; FunctionInfo Info; - std::vector TrackedParent; + llvm::BitVector TrackedParent; unsigned StructNesting = 0; unsigned CurrentNestingLevel = 0; }; diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp index 6f6d936ac3a7e..e005fe4b37361 100644 --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -27,6 +27,7 @@ #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -663,7 +664,7 @@ void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts, // Stack of enclosing namespaces, e.g. {"clang", "clangd"} std::vector Enclosing; // Contains e.g. "clang", "clangd" // Stack counts open braces. true if the brace opened a namespace. - std::vector BraceStack; + llvm::BitVector BraceStack; enum { Default, diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index f466111260962..67b7b3937b07e 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -58,7 +58,7 @@ namespace { class ScopedDeclarationState { public: - ScopedDeclarationState(UnwrappedLine &Line, std::vector &Stack, + ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, bool MustBeDeclaration) : Line(Line), Stack(Stack) { Line.MustBeDeclaration = MustBeDeclaration; @@ -74,7 +74,7 @@ class ScopedDeclarationState { private: UnwrappedLine &Line; - std::vector &Stack; + llvm::BitVector &Stack; }; static bool isLineComment(const FormatToken &FormatTok) { diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 8d4d4dca7633f..3f64d57c7bff7 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -18,6 +18,7 @@ #include "FormatToken.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" +#include "llvm/ADT/BitVector.h" #include "llvm/Support/Regex.h" #include #include @@ -231,7 +232,7 @@ class UnwrappedLineParser { // We store for each line whether it must be a declaration depending on // whether we are in a compound statement or not. - std::vector DeclarationScopeStack; + llvm::BitVector DeclarationScopeStack; const FormatStyle &Style; const AdditionalKeywords &Keywords; diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h index cd1964cbdd98d..fff4a8f578d2a 100644 --- a/llvm/include/llvm/ADT/BitVector.h +++ b/llvm/include/llvm/ADT/BitVector.h @@ -444,6 +444,12 @@ class BitVector { return (Bits[Idx / BITWORD_SIZE] & Mask) != 0; } + /// Return the last element in the vector. + bool back() const { + assert(!empty() && "Getting last element of empty vector."); + return (*this)[size() - 1]; + } + bool test(unsigned Idx) const { return (*this)[Idx]; } @@ -465,6 +471,12 @@ class BitVector { set(OldSize); } + /// Pop one bit from the end of the vector. + void pop_back() { + assert(!empty() && "Empty vector has no element to pop."); + resize(size() - 1); + } + /// Test if any common bits are set. bool anyCommon(const BitVector &RHS) const { unsigned ThisWords = Bits.size(); diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h index 51ee5dbbce05a..17be317a10d72 100644 --- a/llvm/include/llvm/ADT/SmallBitVector.h +++ b/llvm/include/llvm/ADT/SmallBitVector.h @@ -462,6 +462,12 @@ class SmallBitVector { return getPointer()->operator[](Idx); } + /// Return the last element in the vector. + bool back() const { + assert(!empty() && "Getting last element of empty vector."); + return (*this)[size() - 1]; + } + bool test(unsigned Idx) const { return (*this)[Idx]; } @@ -471,6 +477,12 @@ class SmallBitVector { resize(size() + 1, Val); } + /// Pop one bit from the end of the vector. + void pop_back() { + assert(!empty() && "Empty vector has no element to pop."); + resize(size() - 1); + } + /// Test if any common bits are set. bool anyCommon(const SmallBitVector &RHS) const { if (isSmall() && RHS.isSmall()) diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index e2dfd339e93e2..a888e830182fb 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -379,7 +380,7 @@ class MasmParser : public MCAsmParser { /// time of assembly struct tm TM; - std::vector EndStatementAtEOFStack; + BitVector EndStatementAtEOFStack; AsmCond TheCondState; std::vector TheCondStack; diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp index 7ab4ab9a9d06e..ce043d47b97f1 100644 --- a/llvm/unittests/ADT/BitVectorTest.cpp +++ b/llvm/unittests/ADT/BitVectorTest.cpp @@ -1171,21 +1171,25 @@ TYPED_TEST(BitVectorTest, PushBack) { EXPECT_EQ(-1, Vec.find_first()); EXPECT_EQ(10U, Vec.size()); EXPECT_EQ(0U, Vec.count()); + EXPECT_EQ(false, Vec.back()); Vec.push_back(true); EXPECT_EQ(10, Vec.find_first()); EXPECT_EQ(11U, Vec.size()); EXPECT_EQ(1U, Vec.count()); + EXPECT_EQ(true, Vec.back()); Vec.push_back(false); EXPECT_EQ(10, Vec.find_first()); EXPECT_EQ(12U, Vec.size()); EXPECT_EQ(1U, Vec.count()); + EXPECT_EQ(false, Vec.back()); Vec.push_back(true); EXPECT_EQ(10, Vec.find_first()); EXPECT_EQ(13U, Vec.size()); EXPECT_EQ(2U, Vec.count()); + EXPECT_EQ(true, Vec.back()); // Add a lot of values to cause reallocation. for (int i = 0; i != 100; ++i) { @@ -1197,6 +1201,28 @@ TYPED_TEST(BitVectorTest, PushBack) { EXPECT_EQ(102U, Vec.count()); } +TYPED_TEST(BitVectorTest, PopBack) { + TypeParam Vec(10, true); + EXPECT_EQ(10U, Vec.size()); + EXPECT_EQ(10U, Vec.count()); + EXPECT_EQ(true, Vec.back()); + + Vec.pop_back(); + EXPECT_EQ(9U, Vec.size()); + EXPECT_EQ(9U, Vec.count()); + EXPECT_EQ(true, Vec.back()); + + Vec.push_back(false); + EXPECT_EQ(10U, Vec.size()); + EXPECT_EQ(9U, Vec.count()); + EXPECT_EQ(false, Vec.back()); + + Vec.pop_back(); + EXPECT_EQ(9U, Vec.size()); + EXPECT_EQ(9U, Vec.count()); + EXPECT_EQ(true, Vec.back()); +} + TYPED_TEST(BitVectorTest, DenseSet) { DenseSet Set; TypeParam A(10, true); From 7e3bcae5069fdb13ba6241b726d3a3912287784e Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 14:59:11 +0100 Subject: [PATCH 180/946] Add apple-specific missing header in Support/GraphWriter.cpp --- llvm/lib/Support/GraphWriter.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp index 6e6d79b225ac8..e875e18a7e92e 100644 --- a/llvm/lib/Support/GraphWriter.cpp +++ b/llvm/lib/Support/GraphWriter.cpp @@ -25,6 +25,11 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" + +#ifdef __APPLE__ +#include "llvm/Support/CommandLine.h" +#endif + #include #include #include From d5ae039ed7b84bf767d15417a3e9bf61f982257b Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Thu, 20 Jan 2022 14:28:18 -0500 Subject: [PATCH 181/946] [SystemZ] Properly register machine passes. Registering the passes enables use of -stop-before=/-stop-after options. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D117823 --- llvm/lib/Target/SystemZ/SystemZ.h | 10 ++++++++++ llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp | 10 +--------- llvm/lib/Target/SystemZ/SystemZElimCompare.cpp | 12 ++++++------ llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp | 12 ++++++------ llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 12 +++++++----- llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp | 10 +--------- llvm/lib/Target/SystemZ/SystemZShortenInst.cpp | 17 +++++++++-------- llvm/lib/Target/SystemZ/SystemZTDC.cpp | 4 ---- .../lib/Target/SystemZ/SystemZTargetMachine.cpp | 8 ++++++++ 9 files changed, 48 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h index bedbd061ea5c1..5be19f0e3b467 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.h +++ b/llvm/lib/Target/SystemZ/SystemZ.h @@ -20,6 +20,7 @@ namespace llvm { class SystemZTargetMachine; class FunctionPass; +class PassRegistry; namespace SystemZ { // Condition-code mask values. @@ -196,6 +197,15 @@ FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM); FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); + +void initializeSystemZElimComparePass(PassRegistry &); +void initializeSystemZShortenInstPass(PassRegistry &); +void initializeSystemZLongBranchPass(PassRegistry &); +void initializeSystemZLDCleanupPass(PassRegistry &); +void initializeSystemZCopyPhysRegsPass(PassRegistry &); +void initializeSystemZPostRewritePass(PassRegistry &); +void initializeSystemZTDCPassPass(PassRegistry &); + } // end namespace llvm #endif diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp index 7d21d29d270e3..763aa8c0e41fd 100644 --- a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp +++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp @@ -25,12 +25,6 @@ using namespace llvm; -#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs" - -namespace llvm { - void initializeSystemZCopyPhysRegsPass(PassRegistry&); -} - namespace { class SystemZCopyPhysRegs : public MachineFunctionPass { @@ -41,8 +35,6 @@ class SystemZCopyPhysRegs : public MachineFunctionPass { initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry()); } - StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; } - bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -59,7 +51,7 @@ char SystemZCopyPhysRegs::ID = 0; } // end anonymous namespace INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs", - SYSTEMZ_COPYPHYSREGS_NAME, false, false) + "SystemZ Copy Physregs", false, false) FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) { return new SystemZCopyPhysRegs(); diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index 631cbff303e83..4893acc813352 100644 --- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -65,11 +65,8 @@ class SystemZElimCompare : public MachineFunctionPass { public: static char ID; - SystemZElimCompare(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { - return "SystemZ Comparison Elimination"; + SystemZElimCompare() : MachineFunctionPass(ID) { + initializeSystemZElimComparePass(*PassRegistry::getPassRegistry()); } bool processBlock(MachineBasicBlock &MBB); @@ -106,6 +103,9 @@ char SystemZElimCompare::ID = 0; } // end anonymous namespace +INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE, + "SystemZ Comparison Elimination", false, false) + // Returns true if MI is an instruction whose output equals the value in Reg. static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { switch (MI.getOpcode()) { @@ -746,5 +746,5 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { } FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { - return new SystemZElimCompare(TM); + return new SystemZElimCompare(); } diff --git a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp index 06d893d043e9d..d6c7959854485 100644 --- a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -29,11 +29,8 @@ namespace { class SystemZLDCleanup : public MachineFunctionPass { public: static char ID; - SystemZLDCleanup(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {} - - StringRef getPassName() const override { - return "SystemZ Local Dynamic TLS Access Clean-up"; + SystemZLDCleanup() : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) { + initializeSystemZLDCleanupPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -52,8 +49,11 @@ char SystemZLDCleanup::ID = 0; } // end anonymous namespace +INITIALIZE_PASS(SystemZLDCleanup, "systemz-ld-cleanup", + "SystemZ Local Dynamic TLS Access Clean-up", false, false) + FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) { - return new SystemZLDCleanup(TM); + return new SystemZLDCleanup(); } void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp index 9c985c16f0829..d53693154d404 100644 --- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -135,10 +135,9 @@ class SystemZLongBranch : public MachineFunctionPass { public: static char ID; - SystemZLongBranch(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { return "SystemZ Long Branch"; } + SystemZLongBranch() : MachineFunctionPass(ID) { + initializeSystemZLongBranchPass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &F) override; @@ -174,6 +173,9 @@ const uint64_t MaxForwardRange = 0xfffe; } // end anonymous namespace +INITIALIZE_PASS(SystemZLongBranch, DEBUG_TYPE, "SystemZ Long Branch", false, + false) + // Position describes the state immediately before Block. Update Block // accordingly and move Position to the end of the block's non-terminator // instructions. @@ -481,5 +483,5 @@ bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) { } FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) { - return new SystemZLongBranch(TM); + return new SystemZLongBranch(); } diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp index aaa7f8fc88f50..4b95d0d67389d 100644 --- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -21,16 +21,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; -#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass" - #define DEBUG_TYPE "systemz-postrewrite" STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)"); -namespace llvm { - void initializeSystemZPostRewritePass(PassRegistry&); -} - namespace { class SystemZPostRewrite : public MachineFunctionPass { @@ -44,8 +38,6 @@ class SystemZPostRewrite : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &Fn) override; - StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; } - private: void selectLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -70,7 +62,7 @@ char SystemZPostRewrite::ID = 0; } // end anonymous namespace INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite", - SYSTEMZ_POSTREWRITE_NAME, false, false) + "SystemZ Post Rewrite pass", false, false) /// Returns an instance of the Post Rewrite pass. FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) { diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index 254e5e92449b2..92930dad80ef8 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -26,11 +26,7 @@ namespace { class SystemZShortenInst : public MachineFunctionPass { public: static char ID; - SystemZShortenInst(const SystemZTargetMachine &tm); - - StringRef getPassName() const override { - return "SystemZ Instruction Shortening"; - } + SystemZShortenInst(); bool processBlock(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &F) override; @@ -56,12 +52,17 @@ class SystemZShortenInst : public MachineFunctionPass { char SystemZShortenInst::ID = 0; } // end anonymous namespace +INITIALIZE_PASS(SystemZShortenInst, DEBUG_TYPE, + "SystemZ Instruction Shortening", false, false) + FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) { - return new SystemZShortenInst(TM); + return new SystemZShortenInst(); } -SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(nullptr) {} +SystemZShortenInst::SystemZShortenInst() + : MachineFunctionPass(ID), TII(nullptr) { + initializeSystemZShortenInstPass(*PassRegistry::getPassRegistry()); +} // Tie operands if MI has become a two-address instruction. static void tieOpsIfNeeded(MachineInstr &MI) { diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp index 7cb7dca2ea28b..f62afb8ddfcfa 100644 --- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp @@ -61,10 +61,6 @@ using namespace llvm; -namespace llvm { - void initializeSystemZTDCPassPass(PassRegistry&); -} - namespace { class SystemZTDCPass : public FunctionPass { diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index deb3358102ede..f1469fe8f56b7 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -32,6 +32,14 @@ using namespace llvm; extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() { // Register the target. RegisterTargetMachine X(getTheSystemZTarget()); + auto &PR = *PassRegistry::getPassRegistry(); + initializeSystemZElimComparePass(PR); + initializeSystemZShortenInstPass(PR); + initializeSystemZLongBranchPass(PR); + initializeSystemZLDCleanupPass(PR); + initializeSystemZShortenInstPass(PR); + initializeSystemZPostRewritePass(PR); + initializeSystemZTDCPassPass(PR); } // Determine whether we use the vector ABI. From e9211e03937751ab75bbb34e38acc330b85fb0d8 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Thu, 20 Jan 2022 12:04:42 +0100 Subject: [PATCH 182/946] Remove dependency from raw_ostream on The tryLockFor method from raw_fd_sotreamis the sole user of that header, and it's not referenced in the mono repo. I still chose to keep it (may be useful for downstream user) but added a transient type that's forward declared to hold the duration parameter. Notable changes: - "llvm/Support/Duration.h" must be included in order to use tryLockFor. - "llvm/Support/raw_ostream.h" no longer includes This sole change has an interesting impact on the number of processed line, as measured by: clang++ -E -Iinclude -I../llvm/include ../llvm/lib/Support/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l before: 7917500 after: 7835142 Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831 --- lldb/tools/lldb-vscode/FifoFiles.h | 2 ++ llvm/include/llvm/Debuginfod/Debuginfod.h | 2 ++ llvm/include/llvm/Debuginfod/HTTPClient.h | 2 ++ llvm/include/llvm/Support/Duration.h | 28 +++++++++++++++++++++++ llvm/include/llvm/Support/raw_ostream.h | 4 ++-- llvm/lib/Support/raw_ostream.cpp | 5 ++-- llvm/unittests/Support/Path.cpp | 1 + 7 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 llvm/include/llvm/Support/Duration.h diff --git a/lldb/tools/lldb-vscode/FifoFiles.h b/lldb/tools/lldb-vscode/FifoFiles.h index f186f65e86c43..a0c4562b5a6b7 100644 --- a/lldb/tools/lldb-vscode/FifoFiles.h +++ b/lldb/tools/lldb-vscode/FifoFiles.h @@ -14,6 +14,8 @@ #include "JSONUtils.h" +#include + namespace lldb_vscode { /// Struct that controls the life of a fifo file in the filesystem. diff --git a/llvm/include/llvm/Debuginfod/Debuginfod.h b/llvm/include/llvm/Debuginfod/Debuginfod.h index fcb8ed3a9222b..064cfa75b1a1b 100644 --- a/llvm/include/llvm/Debuginfod/Debuginfod.h +++ b/llvm/include/llvm/Debuginfod/Debuginfod.h @@ -23,6 +23,8 @@ #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include + namespace llvm { typedef ArrayRef BuildIDRef; diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h index e8f0e7ef8f786..ca3b76ca9f3f4 100644 --- a/llvm/include/llvm/Debuginfod/HTTPClient.h +++ b/llvm/include/llvm/Debuginfod/HTTPClient.h @@ -19,6 +19,8 @@ #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include + namespace llvm { enum class HTTPMethod { GET }; diff --git a/llvm/include/llvm/Support/Duration.h b/llvm/include/llvm/Support/Duration.h new file mode 100644 index 0000000000000..a5a0e2a3357aa --- /dev/null +++ b/llvm/include/llvm/Support/Duration.h @@ -0,0 +1,28 @@ +//===--- Duration.h - wrapper around std::chrono::Duration ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The sole purpose of this file is to avoid the dependency on in +// raw_ostream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_DURATION_H +#define LLVM_SUPPORT_DURATION_H + +#include + +namespace llvm { +class Duration { + std::chrono::milliseconds Value; + public: + Duration(std::chrono::milliseconds Value) : Value(Value) {} + std::chrono::milliseconds getDuration() const { return Value; } +}; +} + +#endif diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index fc46ec0d74564..e288ac27e804d 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -17,7 +17,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" #include -#include #include #include #include @@ -30,6 +29,7 @@ namespace llvm { +class Duration; class formatv_object_base; class format_object_base; class FormattedString; @@ -574,7 +574,7 @@ class raw_fd_ostream : public raw_pwrite_stream { /// /// It is used as @ref lock. LLVM_NODISCARD - Expected tryLockFor(std::chrono::milliseconds Timeout); + Expected tryLockFor(Duration const& Timeout); }; /// This returns a reference to a raw_fd_ostream for standard output. Use it diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 1b1b0af79ae8d..e4b747b68beaa 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Duration.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" @@ -868,8 +869,8 @@ Expected raw_fd_ostream::lock() { } Expected -raw_fd_ostream::tryLockFor(std::chrono::milliseconds Timeout) { - std::error_code EC = sys::fs::tryLockFile(FD, Timeout); +raw_fd_ostream::tryLockFor(Duration const& Timeout) { + std::error_code EC = sys::fs::tryLockFile(FD, Timeout.getDuration()); if (!EC) return sys::fs::FileLocker(FD); return errorCodeToError(EC); diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp index b749448141f79..c3ba0b51f06f7 100644 --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -15,6 +15,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Duration.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" From ad43217a046634be24174299beec3a28018ec3c0 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Mon, 13 Dec 2021 17:20:16 +0000 Subject: [PATCH 183/946] [InstCombine] Fold for masked gather when loading the same value each time. This patch checks in the masked gather when the first operand value is a splat and the mask is all one, because the masked gather is reloading the same value each time. This patch replaces this pattern of masked gather by a scalar load of the value and splats it in a vector. Differential Revision: https://reviews.llvm.org/D115726 --- .../InstCombine/InstCombineCalls.cpp | 20 +++++- .../InstCombine/masked_intrinsics.ll | 62 +++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e3a9e806abdba..f63a186166ecc 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -352,9 +352,27 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) { // * Dereferenceable address & few lanes -> scalarize speculative load/selects // * Adjacent vector addresses -> masked.load // * Narrow width by halfs excluding zero/undef lanes -// * Vector splat address w/known mask -> scalar load // * Vector incrementing address -> vector masked load Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { + auto *ConstMask = dyn_cast(II.getArgOperand(2)); + if (!ConstMask) + return nullptr; + + // Vector splat address w/known mask -> scalar load + // Fold the gather to load the source vector first lane + // because it is reloading the same value each time + if (ConstMask->isAllOnesValue()) + if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) { + auto *VecTy = cast(II.getType()); + const Align Alignment = + cast(II.getArgOperand(1))->getAlignValue(); + LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr, + Alignment, "load.scalar"); + Value *Shuf = + Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast"); + return replaceInstUsesWith(II, cast(Shuf)); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index a82aebd738fea..5ba559fd35f9c 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -376,3 +376,65 @@ define void @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_m ; Function Attrs: declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32 immarg, <4 x i1>) declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(, , i32 immarg, ) + +; Test gathers that can be simplified to scalar load + splat + +;; Splat address and all active mask +define @gather_nxv2i64_uniform_ptrs_all_active_mask(i64* %src) { +; CHECK-LABEL: @gather_nxv2i64_uniform_ptrs_all_active_mask( +; CHECK-NEXT: [[LOAD_SCALAR:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[LOAD_SCALAR]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: ret [[BROADCAST_SPLAT2]] +; + %broadcast.splatinsert = insertelement poison, i64 *%src, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %res = call @llvm.masked.gather.nxv2i64( %broadcast.splat, i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) + ret %res +} + +define <2 x i64> @gather_v2i64_uniform_ptrs_all_active_mask(i64* %src) { +; CHECK-LABEL: @gather_v2i64_uniform_ptrs_all_active_mask( +; CHECK-NEXT: [[LOAD_SCALAR:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[LOAD_SCALAR]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x i64> [[BROADCAST_SPLAT2]] +; + %broadcast.splatinsert = insertelement <2 x i64*> poison, i64 *%src, i32 0 + %broadcast.splat = shufflevector <2 x i64*> %broadcast.splatinsert, <2 x i64*> poison, <2 x i32> zeroinitializer + %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %broadcast.splat, i32 8, <2 x i1> , <2 x i64> undef) + ret <2 x i64> %res +} + +; Negative gather tests + +;; Vector of pointers is not a splat. +define <2 x i64> @negative_gather_v2i64_non_uniform_ptrs_all_active_mask(<2 x i64*> %inVal, i64* %src ) { +; CHECK-LABEL: @negative_gather_v2i64_non_uniform_ptrs_all_active_mask( +; CHECK-NEXT: [[INSERT_VALUE:%.*]] = insertelement <2 x i64*> [[INVAL:%.*]], i64* [[SRC:%.*]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> [[INSERT_VALUE]], i32 8, <2 x i1> , <2 x i64> undef) +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %insert.value = insertelement <2 x i64*> %inVal, i64 *%src, i32 1 + %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %insert.value, i32 8, <2 x i1>, <2 x i64> undef) + ret <2 x i64> %res +} + +;; Unknown mask value +define <2 x i64> @negative_gather_v2i64_uniform_ptrs_no_all_active_mask(i64* %src, <2 x i1> %mask) { +; CHECK-LABEL: @negative_gather_v2i64_uniform_ptrs_no_all_active_mask( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64*> poison, i64* [[SRC:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT]], <2 x i64*> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[MASK:%.*]], <2 x i64> undef) +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %broadcast.splatinsert = insertelement <2 x i64*> poison, i64 *%src, i32 0 + %broadcast.splat = shufflevector <2 x i64*> %broadcast.splatinsert, <2 x i64*> poison, <2 x i32> zeroinitializer + %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %broadcast.splat, i32 8, <2 x i1> %mask, <2 x i64> undef) + ret <2 x i64> %res +} + +; Function Attrs: +declare @llvm.masked.gather.nxv2i64(, i32, , ) +declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) + From 4d82ae67b20826d97471c1ea76e8db3b054398f9 Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Wed, 12 Jan 2022 14:40:14 +0100 Subject: [PATCH 184/946] Add security group 2021 transparency report. Differential Revision: https://reviews.llvm.org/D117872 --- llvm/docs/Reference.rst | 1 + llvm/docs/Security.rst | 2 ++ llvm/docs/SecurityTransparencyReports.rst | 44 +++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 llvm/docs/SecurityTransparencyReports.rst diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst index d10fc8f23f735..0a2a84b31c4e2 100644 --- a/llvm/docs/Reference.rst +++ b/llvm/docs/Reference.rst @@ -38,6 +38,7 @@ LLVM and API reference documentation. ScudoHardenedAllocator MemTagSanitizer Security + SecurityTransparencyReports SegmentedStacks StackMaps SpeculativeLoadHardening diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 19ce13b04babc..04cf5cabf8793 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -116,6 +116,8 @@ Transparency Report Every year, the LLVM Security Group must publish a transparency report. The intent of this report is to keep the community informed by summarizing the disclosures that have been made public in the last year. It shall contain a list of all public disclosures, as well as statistics on time to fix issues, length of embargo periods, and so on. +The transparency reports are published at :doc:`SecurityTransparencyReports`. + Privileges and Responsibilities of LLVM Security Group Members ============================================================== diff --git a/llvm/docs/SecurityTransparencyReports.rst b/llvm/docs/SecurityTransparencyReports.rst new file mode 100644 index 0000000000000..bcc28d8a9624f --- /dev/null +++ b/llvm/docs/SecurityTransparencyReports.rst @@ -0,0 +1,44 @@ +======================================== +LLVM Security Group Transparency Reports +======================================== + +This page lists the yearly LLVM Security group transparency reports. + +2021 +---- + +The :doc:`LLVM security group ` was established on the 10th of July +2020 by the act of the `initial +commit `_ describing +the purpose of the group and the processes it follows. Many of the group's +processes were still not well-defined enough for the group to operate well. +Over the course of 2021, the key processes were defined well enough to enable +the group to operate reasonably well: + +* We defined details on how to report security issues, see `this commit on + 20th of May 2021 `_ +* We refined the nomination process for new group members, see `this + commit on 30th of July 2021 `_ +* We started writing an annual transparency report (you're reading the 2021 + report here). + +Over the course of 2021, we had 2 people leave the LLVM Security group and 4 +people join. + +In 2021, the security group received 13 issue reports that were made publicly +visible before 31st of December 2021. The security group judged 2 of these +reports to be security issues: + +* https://bugs.chromium.org/p/llvm/issues/detail?id=5 +* https://bugs.chromium.org/p/llvm/issues/detail?id=11 + +Both issues were addressed with source changes: #5 in clangd/vscode-clangd, and +#11 in llvm-project. No dedicated LLVM release was made for either. + +We believe that with the publishing of this first annual transparency report, +the security group now has implemented all necessary processes for the group to +operate as promised. The group's processes can be improved further, and we do +expect further improvements to get implemented in 2022. Many of the potential +improvements end up being discussed on the `monthly public call on LLVM's +security group `_. + From b8102449a72c5144cb75cfca46e78b517d7f6606 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 21 Jan 2022 15:54:33 +0100 Subject: [PATCH 185/946] [clang-tidy] Avoid binding nullptr to a reference That's undefined behavior. Found by -fsanitize=null. --- .../clang-tidy/performance/MoveConstArgCheck.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp index 0e91451211aed..6e7d28b2974f7 100644 --- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp @@ -68,9 +68,9 @@ void MoveConstArgCheck::registerMatchers(MatchFinder *Finder) { } bool IsRValueReferenceParam(const Expr *Invocation, - const QualType &InvocationParmType, + const QualType *InvocationParmType, const Expr *Arg) { - if (Invocation && InvocationParmType->isRValueReferenceType() && + if (Invocation && (*InvocationParmType)->isRValueReferenceType() && Arg->isLValue()) { if (!Invocation->getType()->isRecordType()) return true; @@ -138,7 +138,7 @@ void MoveConstArgCheck::check(const MatchFinder::MatchResult &Result) { // std::move shouldn't be removed when an lvalue wrapped by std::move is // passed to the function with an rvalue reference parameter. bool IsRVRefParam = - IsRValueReferenceParam(ReceivingExpr, *InvocationParmType, Arg); + IsRValueReferenceParam(ReceivingExpr, InvocationParmType, Arg); const auto *Var = IsVariable ? dyn_cast(Arg)->getDecl() : nullptr; From e5fd3a7df9170cc69c88881e06fbd33c9cbd633d Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 21 Jan 2022 09:59:22 -0500 Subject: [PATCH 186/946] Try to unbreak build on Windows after e9211e03937 --- llvm/tools/llvm-readobj/COFFDumper.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index e1b28e3ce7451..caeb49af24be8 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -52,6 +52,7 @@ #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/Win64EH.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; using namespace llvm::object; From 8bc66189429919d86d3ff3b4b5a3f63cfa6c35ca Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 16:04:19 +0100 Subject: [PATCH 187/946] Add missing llvm/support/Regex.h include in polly/lib/Analysis/ScopDetection.cpp --- polly/lib/Analysis/ScopDetection.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index f0be6d5ff1ae0..8b3af67a596b3 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -78,6 +78,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include #include From 3c9e3dada916f33e8a4c62629f1954a7a5cdbf71 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 21 Jan 2022 10:04:41 -0500 Subject: [PATCH 188/946] Try to unbreak build on Windows more after e9211e03937 --- llvm/tools/llvm-readobj/XCOFFDumper.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp index e34b105cfbf0a..6e778d558d4fb 100644 --- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp +++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/ScopedPrinter.h" +#include #include using namespace llvm; From 9900acacfb3ff2bc5c957aeb84225c29aa1b74fc Mon Sep 17 00:00:00 2001 From: Vy Nguyen Date: Fri, 21 Jan 2022 10:17:28 -0500 Subject: [PATCH 189/946] [libcxx][doc][nfc] Fixed typo in doc --- libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst b/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst index a0f260a44e815..1af0b34263662 100644 --- a/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst +++ b/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst @@ -36,7 +36,7 @@ Design ====== * Annotate the two definitions of ``std::unique_ptr`` with ``clang::trivial_abi`` attribute. -* Put the attribuate behind a flag because this change has potential compilation and runtime breakages. +* Put the attribute behind a flag because this change has potential compilation and runtime breakages. This comes with some side effects: From 9d3437fbf3419502351d41ff9e28f06b0c3f06e8 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Tue, 18 Jan 2022 18:20:14 -0600 Subject: [PATCH 190/946] [ADT] [NFC] Add StringRef::detectEOL This change moves EOL detection out of the clang::InclusionRewriter into llvm::StringRef so that it can be easily reused elsewhere. It also adds additional explicit test cases to verify the correct and expected return results. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D117626 --- .../Frontend/Rewrite/InclusionRewriter.cpp | 20 ++----------- llvm/include/llvm/ADT/StringRef.h | 19 ++++++++++++ llvm/unittests/ADT/StringRefTest.cpp | 30 +++++++++++++++++++ 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp index 931f3a24c5888..3e8d582f90c27 100644 --- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp +++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp @@ -251,28 +251,12 @@ bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const { return false; } -/// Detect the likely line ending style of \p FromFile by examining the first -/// newline found within it. -static StringRef DetectEOL(const MemoryBufferRef &FromFile) { - // Detect what line endings the file uses, so that added content does not mix - // the style. We need to check for "\r\n" first because "\n\r" will match - // "\r\n\r\n". - const char *Pos = strchr(FromFile.getBufferStart(), '\n'); - if (!Pos) - return "\n"; - if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') - return "\r\n"; - if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') - return "\n\r"; - return "\n"; -} - void InclusionRewriter::detectMainFileEOL() { Optional FromFile = *SM.getBufferOrNone(SM.getMainFileID()); assert(FromFile); if (!FromFile) return; // Should never happen, but whatever. - MainEOL = DetectEOL(*FromFile); + MainEOL = FromFile->getBuffer().detectEOL(); } /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at @@ -378,7 +362,7 @@ void InclusionRewriter::Process(FileID FileId, Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts()); RawLex.SetCommentRetentionState(false); - StringRef LocalEOL = DetectEOL(FromFile); + StringRef LocalEOL = FromFile.getBuffer().detectEOL(); // Per the GNU docs: "1" indicates entering a new file. if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID()) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 3950910f0635a..9f64250c58a36 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -877,6 +877,25 @@ namespace llvm { return ltrim(Chars).rtrim(Chars); } + /// Detect the line ending style of the string. + /// + /// If the string contains a line ending, return the line ending character + /// sequence that is detected. Otherwise return '\n' for unix line endings. + /// + /// \return - The line ending character sequence. + LLVM_NODISCARD + StringRef detectEOL() const { + size_t Pos = find('\r'); + if (Pos == npos) { + // If there is no carriage return, assume unix + return "\n"; + } + if (Pos + 1 < Length && Data[Pos + 1] == '\n') + return "\r\n"; // Windows + if (Pos > 0 && Data[Pos - 1] == '\n') + return "\n\r"; // You monster! + return "\r"; // Classic Mac + } /// @} }; diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp index 41c35804f1226..e80a25a19969c 100644 --- a/llvm/unittests/ADT/StringRefTest.cpp +++ b/llvm/unittests/ADT/StringRefTest.cpp @@ -1109,6 +1109,36 @@ TEST(StringRefTest, GTestPrinter) { EXPECT_EQ(R"("foo")", ::testing::PrintToString(StringRef("foo"))); } +TEST(StringRefTest, LFLineEnding) { + constexpr StringRef Cases[] = {"\nDoggo\nPupper", "Floofer\n", "Woofer"}; + EXPECT_EQ(StringRef("\n"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\n"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\n"), Cases[2].detectEOL()); +} + +TEST(StringRefTest, CRLineEnding) { + constexpr StringRef Cases[] = {"\rDoggo\rPupper", "Floofer\r", "Woo\rfer\n"}; + EXPECT_EQ(StringRef("\r"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\r"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\r"), Cases[2].detectEOL()); +} + +TEST(StringRefTest, CRLFLineEnding) { + constexpr StringRef Cases[] = {"\r\nDoggo\r\nPupper", "Floofer\r\n", + "Woofer\r\nSubWoofer\n"}; + EXPECT_EQ(StringRef("\r\n"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\r\n"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\r\n"), Cases[2].detectEOL()); +} + +TEST(StringRefTest, LFCRLineEnding) { + constexpr StringRef Cases[] = {"\n\rDoggo\n\rPupper", "Floofer\n\r", + "Woofer\n\rSubWoofer\n"}; + EXPECT_EQ(StringRef("\n\r"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\n\r"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\n\r"), Cases[2].detectEOL()); +} + static_assert(std::is_trivially_copyable::value, "trivially copyable"); From 5597ec2dc4f888bf9704626c798ec6e50e5d1384 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Fri, 21 Jan 2022 17:05:36 +0100 Subject: [PATCH 191/946] Include missing "llvm/Support/Path.h" in "flang/lib/Frontend/CompilerInvocation.cpp" --- flang/lib/Frontend/CompilerInvocation.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 159c5632c0e48..4525130e5c2e7 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -23,6 +23,7 @@ #include "llvm/Option/OptTable.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" #include From f24fe96f469b8ccdb8504f998d1107d79c8fd363 Mon Sep 17 00:00:00 2001 From: Jake Egan Date: Fri, 21 Jan 2022 11:18:48 -0500 Subject: [PATCH 192/946] [ifs] Use a tmp file instead of "-" Currently, Clang on AIX uses the system assembler to generate object files from assembly. The use of `-o -` results in a file named `-` instead of output to stdout. This patch uses a temporary object file instead. Reviewed By: DiggerLin, hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D117587 --- clang/test/InterfaceStubs/object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/test/InterfaceStubs/object.c b/clang/test/InterfaceStubs/object.c index 45e2d38ba3e9c..a7609ff40af72 100644 --- a/clang/test/InterfaceStubs/object.c +++ b/clang/test/InterfaceStubs/object.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -fvisibility default -o - -emit-interface-stubs %s | FileCheck -check-prefix=CHECK-TAPI %s -// RUN: %clang -fvisibility=default -c -o - %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-SYMBOLS %s +// RUN: %clang -fvisibility=default -c -o %t.o %s +// RUN: llvm-nm %t.o 2>&1 | FileCheck -check-prefix=CHECK-SYMBOLS %s // CHECK-TAPI: data", Type: Object, Size: 4 } // CHECK-SYMBOLS: data From cab96169380296a496614f433507d86b743f0d02 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Sun, 7 Nov 2021 19:44:59 +0100 Subject: [PATCH 193/946] [libc++] Use addressof in unordered_map. This addresses the usage of `operator&` in ``. (Note there are still more headers with the same issue.) Reviewed By: #libc, Quuxplusone, ldionne Differential Revision: https://reviews.llvm.org/D117393 --- libcxx/include/__hash_table | 40 +++++++-------- libcxx/include/unordered_map | 21 ++++---- ...rator.operators.addressof.compile.pass.cpp | 49 +++++++++++++++++++ .../assign_move.addressof.compile.pass.cpp | 42 ++++++++++++++++ .../move.addressof.compile.pass.cpp | 33 +++++++++++++ .../move_alloc.addressof.compile.pass.cpp | 36 ++++++++++++++ .../emplace_hint.addressof.compile.pass.cpp | 30 ++++++++++++ ...rase_const_iter.addressof.compile.pass.cpp | 27 ++++++++++ .../erase_range.addressof.compile.pass.cpp | 27 ++++++++++ ...nt_const_lvalue.addressof.compile.pass.cpp | 28 +++++++++++ ...ible_value_type.addressof.compile.pass.cpp | 28 +++++++++++ ...alue_value_type.addressof.compile.pass.cpp | 28 +++++++++++ ...ry_emplace_hint.addressof.compile.pass.cpp | 40 +++++++++++++++ .../swap.addressof.compile.pass.cpp | 29 +++++++++++ .../move.addressof.compile.pass.cpp | 33 +++++++++++++ .../move_alloc.addressof.compile.pass.cpp | 36 ++++++++++++++ .../emplace_hint.addressof.compile.pass.cpp | 30 ++++++++++++ 17 files changed, 527 insertions(+), 30 deletions(-) create mode 100644 libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 6b682ab27c6c3..adc732cffb015 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -308,9 +308,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_iterator& operator=(const __hash_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; } return *this; @@ -406,7 +406,7 @@ public: : __node_(__x.__node_) { #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__iterator_copy(this, &__x); + __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); #endif } @@ -415,7 +415,7 @@ public: __hash_const_iterator(const __hash_const_iterator& __i) : __node_(__i.__node_) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); } _LIBCPP_INLINE_VISIBILITY @@ -427,9 +427,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_iterator& operator=(const __hash_const_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; } return *this; @@ -523,7 +523,7 @@ public: __bucket_(__i.__bucket_), __bucket_count_(__i.__bucket_count_) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); } _LIBCPP_INLINE_VISIBILITY @@ -535,9 +535,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_local_iterator& operator=(const __hash_local_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; __bucket_ = __i.__bucket_; __bucket_count_ = __i.__bucket_count_; @@ -655,7 +655,7 @@ public: __bucket_count_(__x.__bucket_count_) { #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__iterator_copy(this, &__x); + __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); #endif } @@ -666,7 +666,7 @@ public: __bucket_(__i.__bucket_), __bucket_count_(__i.__bucket_count_) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); } _LIBCPP_INLINE_VISIBILITY @@ -678,9 +678,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator& operator=(const __hash_const_local_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; __bucket_ = __i.__bucket_; __bucket_count_ = __i.__bucket_count_; @@ -1615,7 +1615,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( __u.size() = 0; } #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -2021,7 +2021,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( const_iterator __p, __node_pointer __cp) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); if (__p != end() && key_eq()(*__p, __cp->__value_)) @@ -2148,7 +2148,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi( const_iterator __p, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); __node_holder __h = __construct_node(_VSTD::forward<_Args>(__args)...); @@ -2472,7 +2472,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { __next_pointer __np = __p.__node_; - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered container erase(iterator) called with an iterator not" " referring to this container"); _LIBCPP_DEBUG_ASSERT(__p != end(), @@ -2492,10 +2492,10 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__first)) == this, "unordered container::erase(iterator, iterator) called with an iterator not" " referring to this container"); - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__last)) == this, "unordered container::erase(iterator, iterator) called with an iterator not" " referring to this container"); for (const_iterator __p = __first; __first != __last; __p = __first) @@ -2727,7 +2727,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] = __u.__p1_.first().__ptr(); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 73edadab20990..accab28a99592 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -519,6 +519,7 @@ template #include <__functional/is_transparent.h> #include <__hash_table> #include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> #include <__node_handle> #include <__utility/forward.h> #include @@ -1186,7 +1187,7 @@ public: {return __table_.__insert_unique(__x);} iterator insert(const_iterator __p, const value_type& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__p) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not " "referring to this unordered_map"); ((void)__p); @@ -1207,7 +1208,7 @@ public: {return __table_.__insert_unique(_VSTD::move(__x));} iterator insert(const_iterator __p, value_type&& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1225,7 +1226,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __p, _Pp&& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_map::insert(const_iterator, value_type&&) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1241,7 +1242,7 @@ public: template _LIBCPP_INLINE_VISIBILITY iterator emplace_hint(const_iterator __p, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_map::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1273,7 +1274,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); ((void)__h); @@ -1284,7 +1285,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__h) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); ((void)__h); @@ -1692,7 +1693,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -1712,7 +1713,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -2468,7 +2469,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -2489,7 +2490,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } diff --git a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..856b78293a107 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// Validate the constructors of the (const)(_local)_iterator classes to be +// properly guarded against ADL-hijacking operator&. + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +template +void test() { + FromIterator from; + ToIterator copy(from); + copy = from; + + ToIterator move(std::move(from)); + from = FromIterator(); + move = std::move(from); +} + +void test() { + { + using I = std::unordered_map::iterator; + using CI = std::unordered_map::const_iterator; + test(); + test(); + test(); + } + { + using IL = std::unordered_map::local_iterator; + using CIL = std::unordered_map::const_local_iterator; + test(); + test(); + test(); + } +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..6dbd7aaea2a8e --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// unordered_map& operator=(unordered_map&&) +// noexcept( +// allocator_type::propagate_on_container_move_assignment::value && +// is_nothrow_move_assignable::value && +// is_nothrow_move_assignable::value && +// is_nothrow_move_assignable::value); + +// Validate whether the container can be move-assigned with an ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + { + std::unordered_map mo; + std::unordered_map m; + m = std::move(mo); + } + { + std::unordered_map mo; + std::unordered_map m; + m = std::move(mo); + } +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..e36c6525d631b --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// unordered_map(unordered_map&& u) +// noexcept( +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map mo; + std::unordered_map m(std::move(mo)); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..1fec0ee5d0f4b --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// unordered_map(unordered_map&& u, const allocator_type& a); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +#include "test_allocator.h" +#include "min_allocator.h" + +void test() { + using A = test_allocator>; + using C = std::unordered_map, + std::equal_to, A>; + + C mo; + C m(std::move(mo), A()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..58ddefd8cfbfc --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// template +// iterator emplace_hint(const_iterator position, Args&&... args); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + m.emplace_hint(m.cbegin()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..1461f2499baad --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator erase(const_iterator p) + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + m.erase(m.cbegin()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..5f342f7b2152f --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator erase(const_iterator first, const_iterator last) + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + m.erase(m.cbegin(), m.cend()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..db1805e7d7e63 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator insert(const_iterator p, const value_type& x); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + const std::pair v; + m.insert(m.cend(), v); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..530b826b61e78 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// template ::value>::type> +// pair insert(P&& x); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test(std::unordered_map& m) { m.insert(m.cend(), *m.begin()); } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..80219cb193edd --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator insert(const_iterator hint, value_type&& obj); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test(std::unordered_map& m) { + m.insert(m.cend(), std::pair{}); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..2c667374d4fe8 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// template +// iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args); +// template +// iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args); +// template + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + { + const operator_hijacker k; + m.try_emplace(m.cend(), k); + } + { + operator_hijacker k; + m.try_emplace(m.cend(), std::move(k)); + } +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..f5b5f516d42b5 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// void swap(unordered_map& c) +// noexcept(allocator_traits::is_always_equal::value && +// noexcept(swap(declval(), declval())) && +// noexcept(swap(declval(), declval()))); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m1; + std::unordered_map m2; + std::swap(m1, m2); +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..73b19f35e2048 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_multimap + +// unordered_multimap(unordered_multimap&&) +// noexcept( +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_multimap mo; + std::unordered_multimap m(std::move(mo)); +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..6419a03666d65 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_multimap + +// unordered_multimap(unordered_map&& u, const allocator_type& a); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +#include "test_allocator.h" +#include "min_allocator.h" + +void test() { + using A = test_allocator>; + using C = std::unordered_multimap, + std::equal_to, A>; + + C mo; + C m(std::move(mo), A()); +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..5e23b73cf34b3 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_multimap + +// template +// iterator emplace_hint(const_iterator position, Args&&... args); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_multimap m; + m.emplace_hint(m.cbegin()); +} From b7fd91c84b4eea5324d9757243387280f4284236 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Thu, 13 Jan 2022 16:27:28 -0800 Subject: [PATCH 194/946] Upstream MLIR PyTACO implementation. Add TACO tests to test/Integration/Dialect/SparseTensor/taco. Add the MLIR PyTACO implementation as tools under the directory. Reviewed By: aartbik, mehdi_amini Differential Revision: https://reviews.llvm.org/D117260 --- mlir/python/requirements.txt | 1 + .../Dialect/SparseTensor/taco/README.md | 27 + .../Dialect/SparseTensor/taco/data/gold_A.tns | 50 + .../Dialect/SparseTensor/taco/data/gold_y.tns | 4 + .../Dialect/SparseTensor/taco/data/nell-2.tns | 5 + .../Dialect/SparseTensor/taco/data/pwtk.mtx | 11 + .../Dialect/SparseTensor/taco/test_MTTKRP.py | 53 + .../Dialect/SparseTensor/taco/test_SpMV.py | 54 + .../taco/test_simple_tensor_algebra.py | 30 + .../SparseTensor/taco/tools/lit.local.cfg | 2 + .../SparseTensor/taco/tools/mlir_pytaco.py | 1768 +++++++++++++++++ .../taco/tools/mlir_pytaco_api.py | 47 + .../SparseTensor/taco/tools/mlir_pytaco_io.py | 206 ++ .../taco/tools/mlir_pytaco_utils.py | 121 ++ 14 files changed, 2379 insertions(+) create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/README.md create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt index 0cc86af2c9cfb..991e8eb243358 100644 --- a/mlir/python/requirements.txt +++ b/mlir/python/requirements.txt @@ -1,3 +1,4 @@ numpy pybind11>=2.8.0 PyYAML +dataclasses diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/README.md b/mlir/test/Integration/Dialect/SparseTensor/taco/README.md new file mode 100644 index 0000000000000..88a8ce2581962 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/README.md @@ -0,0 +1,27 @@ +# MLIR-PyTACO: Implementing PyTACO with MLIR + +TACO (http://tensor-compiler.org/) is a tensor algebra compiler. TACO defines +PyTACO, a domain specific language in Python, for writing tensor algebra +applications. + +This directory contains the implementation of PyTACO using MLIR. In particular, +we implement a Python layer that accepts the PyTACO language, generates MLIR +linalg.generic OPs with sparse tensor annotation to represent the tensor +computation, and invokes the MLIR sparse tensor code generator +(https://mlir.llvm.org/docs/Dialects/SparseTensorOps/) as well as other MLIR +compilation passes to generate an executable. Then, we invoke the MLIR execution +engine to execute the program and pass the result back to the Python layer. + +As can be seen from the tests in this directory, in order to port a PyTACO +program to MLIR-PyTACO, we basically only need to replace this line that imports +PyTACO: + +```python +import pytaco as pt +``` + +with this line to import MLIR-PyTACO: + +```python +from tools import mlir_pytaco_api as pt +``` diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns new file mode 100644 index 0000000000000..b66caa12106a9 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns @@ -0,0 +1,50 @@ +1 1 12 +1 2 12 +1 3 12 +1 4 12 +1 5 12 +1 6 12 +1 7 12 +1 8 12 +1 9 12 +1 10 12 +1 11 12 +1 12 12 +1 13 12 +1 14 12 +1 15 12 +1 16 12 +1 17 12 +1 18 12 +1 19 12 +1 20 12 +1 21 12 +1 22 12 +1 23 12 +1 24 12 +1 25 12 +2 1 6 +2 2 6 +2 3 6 +2 4 6 +2 5 6 +2 6 6 +2 7 6 +2 8 6 +2 9 6 +2 10 6 +2 11 6 +2 12 6 +2 13 6 +2 14 6 +2 15 6 +2 16 6 +2 17 6 +2 18 6 +2 19 6 +2 20 6 +2 21 6 +2 22 6 +2 23 6 +2 24 6 +2 25 6 diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns new file mode 100644 index 0000000000000..a9eab90a0627a --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns @@ -0,0 +1,4 @@ +# See http://frostt.io/tensors/file-formats.html for FROSTT (.tns) format +1 37102 +2 -20.4138 +3 804927 diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns new file mode 100644 index 0000000000000..a6c570c3c7d8f --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns @@ -0,0 +1,5 @@ +1 1 1 1.0 +1 2 2 2.0 +1 3 4 3.0 +2 1 1 1.0 +2 4 3 2.0 diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx b/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx new file mode 100644 index 0000000000000..ec1cebc1c8f82 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx @@ -0,0 +1,11 @@ +%%MatrixMarket matrix coordinate real symmetric +%------------------------------------------------------------------------------- +% To download a matrix for a real world application +% https://math.nist.gov/MatrixMarket/ +%------------------------------------------------------------------------------- +3 3 5 +1 1 37423.0879671 +2 1 -22.4050781162 +3 1 -300.654980157 +3 2 -.00869762944058 +3 3 805225.750212 diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py new file mode 100644 index 0000000000000..1fda4f4406393 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py @@ -0,0 +1,53 @@ +# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s + +import numpy as np +import os +import sys +import tempfile + +_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_PATH) +from tools import mlir_pytaco_api as pt + +###### This PyTACO part is taken from the TACO open-source project. ###### +# See http://tensor-compiler.org/docs/data_analytics/index.html. + +compressed = pt.compressed +dense = pt.dense + +# Define formats for storing the sparse tensor and dense matrices. +csf = pt.format([compressed, compressed, compressed]) +rm = pt.format([dense, dense]) + +# Load a sparse three-dimensional tensor from file (stored in the FROSTT +# format) and store it as a compressed sparse fiber tensor. We use a small +# tensor for the purpose of testing. To run the program using the data from +# the real application, please download the data from: +# http://frostt.io/tensors/nell-2/ +B = pt.read(os.path.join(_SCRIPT_PATH, "data/nell-2.tns"), csf) + +# These two lines have been modified from the original program to use static +# data to support result comparison. +C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float64)) +D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float64)) + +# Declare the result to be a dense matrix. +A = pt.tensor([B.shape[0], 25], rm) + +# Declare index vars. +i, j, k, l = pt.get_index_vars(4) + +# Define the MTTKRP computation. +A[i, j] = B[i, k, l] * D[l, j] * C[k, j] + +########################################################################## + +# CHECK: Compare result True +# Perform the MTTKRP computation and write the result to file. +with tempfile.TemporaryDirectory() as test_dir: + actual_file = os.path.join(test_dir, "A.tns") + pt.write(actual_file, A) + actual = np.loadtxt(actual_file, np.float64) + expected = np.loadtxt( + os.path.join(_SCRIPT_PATH, "data/gold_A.tns"), np.float64) + print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}") diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py new file mode 100644 index 0000000000000..80bb023360ff8 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py @@ -0,0 +1,54 @@ +# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s + +import numpy as np +import os +import sys +import tempfile + +_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_PATH) +from tools import mlir_pytaco_api as pt + +###### This PyTACO part is taken from the TACO open-source project. ###### +# See http://tensor-compiler.org/docs/scientific_computing/index.html. + +compressed = pt.compressed +dense = pt.dense + +# Define formats for storing the sparse matrix and dense vectors. +csr = pt.format([dense, compressed]) +dv = pt.format([dense]) + +# Load a sparse matrix stored in the matrix market format) and store it +# as a CSR matrix. The matrix in this test is a reduced version of the data +# downloaded from here: +# https://www.cise.ufl.edu/research/sparse/MM/Boeing/pwtk.tar.gz +# In order to run the program using the matrix above, you can download the +# matrix and replace this path to the actual path to the file. +A = pt.read(os.path.join(_SCRIPT_PATH, "data/pwtk.mtx"), csr) + +# These two lines have been modified from the original program to use static +# data to support result comparison. +x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float64)) +z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float64)) + +# Declare the result to be a dense vector +y = pt.tensor([A.shape[0]], dv) + +# Declare index vars +i, j = pt.get_index_vars(2) + +# Define the SpMV computation +y[i] = A[i, j] * x[j] + z[i] + +########################################################################## + +# CHECK: Compare result True +# Perform the SpMV computation and write the result to file +with tempfile.TemporaryDirectory() as test_dir: + actual_file = os.path.join(test_dir, "y.tns") + pt.write(actual_file, y) + actual = np.loadtxt(actual_file, np.float64) + expected = np.loadtxt( + os.path.join(_SCRIPT_PATH, "data/gold_y.tns"), np.float64) + print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}") diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py new file mode 100644 index 0000000000000..021519028496c --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py @@ -0,0 +1,30 @@ +# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s + +import os +import sys + +_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_PATH) +from tools import mlir_pytaco_api as pt + +compressed = pt.compressed +dense = pt.dense + +# Ensure that we can run an unmodified PyTACO program with a simple tensor +# algebra expression using tensor index notation, and produce the expected +# result. +i, j = pt.get_index_vars(2) +A = pt.tensor([2, 3]) +B = pt.tensor([2, 3]) +C = pt.tensor([2, 3]) +D = pt.tensor([2, 3], dense) +A.insert([0, 1], 10) +A.insert([1, 2], 40) +B.insert([0, 0], 20) +B.insert([1, 2], 30) +C.insert([0, 1], 5) +C.insert([1, 2], 7) +D[i, j] = A[i, j] + B[i, j] - C[i, j] + +# CHECK: [20. 5. 0. 0. 0. 63.] +print(D.to_array().reshape(6)) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg new file mode 100644 index 0000000000000..650ca33613cc6 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg @@ -0,0 +1,2 @@ +# Files in this directory are tools, not tests. +config.unsupported = True diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py new file mode 100644 index 0000000000000..f64d34037eabd --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py @@ -0,0 +1,1768 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Experimental MLIR-PyTACO with sparse tensor support. + +See http://tensor-compiler.org/ for TACO tensor compiler. + +This module implements the Python classes for PyTACO index notation. These +include classes for data types, tensor dimension formats (aka mode formats), +tensor dimension orderings (aka mode ordering), tensor storage formats, and +tensors. + +The PyTACO API doesn't follow the naming conversion required by the style guide +for this module. As such, we first implement the supporting classes and routines +following the style guide, and then define the type aliases and constants to +support the PyTACO API in the pytaco_api module. +""" + +from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union + +import abc +import ctypes +import dataclasses +import enum +import numpy as np +import functools +import operator +import os +import threading + +# Import MLIR related modules. +from mlir import all_passes_registration # Register MLIR compiler passes. +from mlir import execution_engine +from mlir import ir +from mlir import runtime +from mlir.dialects import arith +from mlir.dialects import builtin +from mlir.dialects import linalg +from mlir.dialects import std +from mlir.dialects import sparse_tensor +from mlir.dialects.linalg.opdsl import lang +from mlir.passmanager import PassManager + +from . import mlir_pytaco_utils as utils + +# TACO naming prefixes. +_TACO_INDEX_PREFIX = "i" +_TACO_TENSOR_PREFIX = "A" + +# Bitwidths for pointers and indices. +_POINTER_BIT_WIDTH = 0 +_INDEX_BIT_WIDTH = 0 +# The name for the environment variable that provides the full path for the +# supporting library. +_SUPPORTLIB_ENV_VAR = "SUPPORTLIB" +# The default supporting library if the environment variable is not provided. +_DEFAULT_SUPPORTLIB = "libmlir_c_runner_utils.so" +# The JIT compiler optimization level. +_OPT_LEVEL = 2 +# The entry point to the JIT compiled program. +_ENTRY_NAME = "main" + +# Type aliases for type annotation. +_BinaryOp = Callable[[Any, Any], Any] +_ExprVisitor = Callable[..., None] +_ExprInfoDict = Dict["IndexExpr", "_ExprInfo"] +_LogicalOp = Callable[[bool, bool], bool] +_ModeFormatOp = Callable[["ModeFormat", "ModeFormat"], "ModeFormat"] +_SubtreeLeafChecker = Optional[Callable[..., bool]] + + +class Type(enum.Enum): + """The data types supported by TACO. + + We use numpy data types to implement the enum data types. + """ + INT16 = np.int16 + INT32 = np.int32 + INT64 = np.int64 + # numpy _ctype_from_dtype_scalar can't handle np.float16 yet. + FLOAT32 = np.float32 + FLOAT64 = np.float64 + + +# All floating point type enums. +_FLOAT_TYPES = (Type.FLOAT32, Type.FLOAT64) +# All integral type enums. +_INT_TYPES = (Type.INT16, Type.INT32, Type.INT64) +# Type alias for any numpy type used to implement the runtime support for the +# enum data types. +_AnyRuntimeType = Union[np.int16, np.int32, np.int64, np.float32, np.float64] + + +@dataclasses.dataclass(frozen=True) +class DType: + """The data type class. + + We support the TACO API dtype class with an alias of this class. + + The following methods are defined by the TACO API: + is_float: Returns whether the data type represents a floating point value. + is_int: Returns whether the data type represents an integral value. + + Attributes: + kind: A Type enum representing the data type. + value: The numpy data type for the TACO data type. + """ + kind: Type = Type.FLOAT64 + + def is_float(self) -> bool: + """Returns whether the data type represents a floating point value.""" + return self.kind in _FLOAT_TYPES + + def is_int(self) -> bool: + """Returns whether the data type represents an integral value.""" + return self.kind in _INT_TYPES + + @property + def value(self) -> _AnyRuntimeType: + """Returns the numpy dtype for the data type.""" + return self.kind.value + + +def _mlir_type_from_taco_type(dtype: DType) -> ir.Type: + """Returns the MLIR type corresponding to the given TACO type.""" + dtype_to_irtype = { + Type.INT16: ir.IntegerType.get_signless(16), + Type.INT32: ir.IntegerType.get_signless(32), + Type.INT64: ir.IntegerType.get_signless(64), + Type.FLOAT32: ir.F32Type.get(), + Type.FLOAT64: ir.F64Type.get() + } + return dtype_to_irtype[dtype.kind] + + +def _compile_mlir(module: ir.Module) -> ir.Module: + """Compiles an MLIR module and returns the compiled module.""" + # TODO: Replace this with a pipeline implemented for + # https://github.com/llvm/llvm-project/issues/51751. + pipeline = ( + f"sparsification," + f"sparse-tensor-conversion," + f"builtin.func(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf)," + f"convert-scf-to-std," + f"func-bufferize," + f"tensor-constant-bufferize," + f"builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize)," + f"convert-vector-to-llvm{{reassociate-fp-reductions=1 enable-index-optimizations=1}}," + f"lower-affine," + f"convert-memref-to-llvm," + f"convert-std-to-llvm," + f"reconcile-unrealized-casts") + PassManager.parse(pipeline).run(module) + return module + + +@functools.lru_cache() +def _get_support_lib_name() -> str: + """Returns the string for the supporting C shared library.""" + return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB) + + +def _ctype_pointer_from_array(array: np.ndarray) -> ctypes.pointer: + """Returns the ctype pointer for the given numpy array.""" + return ctypes.pointer( + ctypes.pointer(runtime.get_ranked_memref_descriptor(array))) + + +class ModeFormat(enum.Enum): + """The tensor dimension storage format class. + + We support the TACO API mode_format class with an alias of this class. + + In TACO, a tensor dimension is called a mode and the storage format for a + tensor dimension is called a mode format. + """ + DENSE = sparse_tensor.DimLevelType.dense + COMPRESSED = sparse_tensor.DimLevelType.compressed + + +def _mode_format_operation(a: ModeFormat, b: ModeFormat, + op: _LogicalOp) -> ModeFormat: + """Implements the given operator on ModeFormat.""" + return (ModeFormat.COMPRESSED + if op(a == ModeFormat.COMPRESSED, b == ModeFormat.COMPRESSED) else + ModeFormat.DENSE) + + +def _mode_format_estimator(op: _BinaryOp) -> _ModeFormatOp: + """Produces a ModeFormat operator for the given binary operator. + + The ModeFormat operator is used as a heuristic to derive the destination + dimension sparsity from the source dimension sparsity. In particular, if the + binary operator produces a disjunction of the zero values from its source + operands, such as the MUL operator, we return a ModeFormat operator that + uses operator.or_. That is, we estimate that a dimension for the MUL + operation result to be sparse if either of its source operands is sparse. + + On the other hand, if the binary operator produces a conjunction of the + zero values from its source operands, such as the ADD operator, we return + a ModeFormat operator that uses operator.and_. In this case, we estimate + that a dimension for the ADD operation result to be sparse if both of its + source operands are sparse. + + Args: + op: A _BinaryOp object representing a supporting operator on tensors. + + Returns: + A ModeFormatOp for estimating the destination dimension sparsity from + the source dimension sparsity. + """ + conjunction = functools.partial(_mode_format_operation, op=operator.and_) + disjunction = functools.partial(_mode_format_operation, op=operator.or_) + return conjunction if op(0, 1) != 0 else disjunction + + +def _all_instance_of(collection: Iterable, cls: Any) -> bool: + """Returns true if all elements of the iterable is an instance of cls.""" + return all(isinstance(e, cls) for e in collection) + + +def _identity_ordering(rank: int) -> List[int]: + """Returns the identity ordering for tensor of given rank.""" + return list(range(rank)) + + +@dataclasses.dataclass(frozen=True) +class ModeOrdering: + """The tensor dimension ordering class. + + We support the TACO API mode_ordering class with an alias of this class. + + Attributes: + ordering: A list of integers representing the ordering of the tensor + dimensions. + """ + ordering: List[int] + + def __post_init__(self) -> None: + """Verifies the value in ordering. + + Raises: + ValueError: If ordering is not a list of integers. + """ + if (not isinstance(self.ordering, list) or + not _all_instance_of(self.ordering, int)): + raise ValueError("Ordering must be a list of integers: " + f"{self.ordering}") + # Check that ordering is a permutation of the dimension numbers. + if sorted(self.ordering) != _identity_ordering(self.rank()): + raise ValueError(f"Invalid ordering: {self.ordering} != " + f"permutation{_identity_ordering(self.rank())}.") + + def rank(self) -> int: + """Returns the number of dimensions represented by the ordering.""" + return len(self.ordering) + + +@dataclasses.dataclass(frozen=True) +class ModeFormatPack: + """The tensor dimension format class. + + We support the TACO API mode_format_pack class with an alias of this class. + + The storage format of a tensor contains one mode_format for each tensor + dimension. + + Attributes: + formats: A list of ModeFormat representing the storage format for each of + the tensor dimension. + """ + formats: List[ModeFormat] + + def __post_init__(self) -> None: + """Verifies the value in formats. + + Raises: + ValueError: If formats is not a list of ModeFormats. + """ + if (not isinstance(self.formats, list) or + not _all_instance_of(self.formats, ModeFormat)): + raise ValueError("Formats must be a list of ModeFormat: " + f"{self.formats}") + + def rank(self) -> int: + """Returns the number of dimensions represented by the format pack.""" + return len(self.formats) + + +@dataclasses.dataclass +class Format: + """The tensor format class defined by the TACO API. + + Attributes: + format_pack: A ModeFormatPack representing the storage format for the tensor + dimensions. + ordering: A ModeOrdering representing the tensor dimension ordering in the + storage. + """ + format_pack: ModeFormatPack + ordering: Optional[ModeOrdering] = None + + def __post_init__(self) -> None: + """Verifies and fixes up the values in format_pack and ordering. + + Verifies and fixes up the values in format_pack and ordering to supports the + initializer syntax defined by the TACO API. If format_pack is a list of + ModeFormat, replaces it with ModeFormatPack constructed from the list. If + ordering is not provided, set ordering to the natural ordering for the rank + corresponding to format_pack. + + Raises: + ValueError: If format_pack is not an instance of ModeFormatPack or if + ordering is not an instance of ModeOrdering. + """ + if isinstance(self.format_pack, list): + if not _all_instance_of(self.format_pack, ModeFormat): + raise ValueError(f"Expected a list of ModeFormat: {self.format_pack}") + self.format_pack = ModeFormatPack(self.format_pack) + if not isinstance(self.format_pack, ModeFormatPack): + raise ValueError(f"Expected ModeFormatpack: {self.format_pack}") + + if self.ordering is None: + self.ordering = ModeOrdering(list(range(self.rank()))) + if not isinstance(self.ordering, ModeOrdering): + raise ValueError(f"Expected ModeOrdering: {self.ordering}") + + if self.format_pack.rank() != self.ordering.rank(): + raise ValueError("Inconsistent ModeFormatPack and ModeOrdering: " + f"len({self.format_pack}) != " + f"len({self.ordering})") + + def is_dense(self) -> bool: + """Returns true if all the Tensor dimensions have a dense format.""" + return all([f == ModeFormat.DENSE for f in self.format_pack.formats]) + + def rank(self) -> int: + """Returns the number of dimensions represented by the format.""" + return self.format_pack.rank() + + def mlir_tensor_attr(self) -> Optional[sparse_tensor.EncodingAttr]: + """Constructs the MLIR attributes for the tensor format.""" + if self.is_dense(): + return None + + order = ( + range(self.rank()) if + (self.ordering is None) else self.ordering.ordering) + mlir_storage_format = [f.value for f in self.format_pack.formats] + return sparse_tensor.EncodingAttr.get(mlir_storage_format, + ir.AffineMap.get_permutation(order), + _POINTER_BIT_WIDTH, _INDEX_BIT_WIDTH) + + +def _make_format(formats: List[ModeFormat], + ordering: Optional[List[int]] = None) -> Format: + """Constructs a format from a list of ModeFormat and an optional ordering. + + Args: + formats: A list of ModeFormat, one for each dimension of a tensor. + ordering: An optional list of integer, for the ordering of the tensor + dimensions. When an ordering is not given, the identity ordering is used. + + Returns: + A tensor format object. + + Raises: + ValueError: If formats is not a list of ModeFormat or the length of formats + is not consistent with the len of ordering. + """ + ordering = ordering or _identity_ordering(len(formats)) + return Format(ModeFormatPack(formats), ModeOrdering(ordering)) + + +class _AtomicCounter: + """An atomic counter.""" + + def __init__(self): + self._counter = 0 + self._counter_lock = threading.Lock() + + def increment(self) -> int: + """Increments the counter by one and returns the old value.""" + old_value = self._counter + with self._counter_lock: + self._counter = self._counter + 1 + return old_value + + +class IndexVar: + """The tensor index class. + + We support the TACO API index_var class with an alias of this class. + + An IndexVar object represents an index variable in tensor index notation. + + Attributes: + name: A unique string name of the IndexVar. + """ + _counter = _AtomicCounter() + + def __init__(self): + id = self._counter.increment() + self._name = f"{_TACO_INDEX_PREFIX}{id}" + + def __repr__(self) -> str: + return f"IndexVar(name={repr(self._name)})" + + @property + def name(self) -> str: + """Returns the name of the IndexVar.""" + return self._name + + +def get_index_vars(n: int) -> List[IndexVar]: + """Returns a list of n IndexVar. + + This routine is defined by the TACO API. + + Args: + n: An interger representing the number of IndexVar to get. + + Returns: + A list of IndexVar. + + Raises: + ValueError: if n is not a positive integer. + """ + if not isinstance(n, int) or n <= 0: + raise ValueError(f"Expected an integer: {n}.") + # If lock contention ever becomes an issue, we could implement a bulk getter + # that returns a range by only claiming the lock once. + return [IndexVar() for i in range(n)] + + +def _mlir_symbols_from_index_vars( + index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.SymbolDef, ...]: + """Returns a tuple of MLIR symbols for the given tuple of index_var.""" + return tuple(getattr(lang.S, i.name) for i in index_vars) + + +def _mlir_dimensions_from_index_vars( + index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.DimDef, ...]: + """Returns a tuple of MLIR dimensions for the given tuple of index_var.""" + return tuple(getattr(lang.D, i.name) for i in index_vars) + + +def _mlir_tensor_type( + dtype: DType, shape: Tuple[int, ...], + attr: Optional[sparse_tensor.EncodingAttr]) -> ir.RankedTensorType: + """Returns an MLIR tensor type. + + Args: + dtype: An DType object for the element data type of the tensor. + shape: A tuple of integer for the shape of the tensor. + attr: An optional MLIR sparse tensor attribute, only provided if the tensor + is a sparse tensor. + + Returns: + An MLIR ranked tensor type. + """ + ir_type = _mlir_type_from_taco_type(dtype) + return ir.RankedTensorType.get(shape, ir_type, attr) + + +def _verify_and_normalize_indices(indices) -> Tuple[IndexVar, ...]: + """Verifies and normalizes the indices for a tensor access. + + Args: + indices: The index expression used to access a tensor, which could be any + Python object from user inputs. + + Returns: + A tuple of IndexVar. + + Raises: + ValueError: If indices is not an IndexVar or a tuple of IndexVar. + """ + if isinstance(indices, IndexVar): + return (indices,) + elif isinstance(indices, tuple) and _all_instance_of(indices, IndexVar): + return indices + + raise ValueError(f"Expected IndexVars: {indices}") + + +@dataclasses.dataclass(frozen=True) +class _StructOpInfo: + """Information for generating a structured op in the linalg dialect. + + This information is associated with an expression node that serves as the + root for an expression subtree implemented with a structured op. + + Attributes: + dst_indices: A tuple of IndexVar, representing the result dimensions of the + structured op. This is used to construct the temporary variable for the + tensor to hold the structured op result. + dst_dims: A tuple of int, representing the result shape of the structured + op. + dst_dtype: A DType representing the data type of the structured op result. + dst_name: A string representing the name of the structured op result. + dst_format: A Format object representing the destination tensor format. + """ + dst_indices: Tuple[IndexVar, ...] + dst_dims: Tuple[int, ...] + dst_dtype: DType + dst_name: str + dst_format: Format + + def __post_init__(self) -> None: + """Verifies the integrity of the attribute values.""" + assert len(self.dst_indices) == len(self.dst_dims) + assert self.dst_format is not None + + def emit_tensor_init(self) -> ir.RankedTensorType: + """Returns an initialization for the destination tensor.""" + if self.dst_format.is_dense(): + # Initialize the dense tensor. + ir_type = _mlir_type_from_taco_type(self.dst_dtype) + tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result + zero = arith.ConstantOp(ir_type, 0.0) + return linalg.FillOp(output=tensor, value=zero).results[0] + + # Initialize the sparse tensor. + mlir_type = _mlir_tensor_type(self.dst_dtype, self.dst_dims, + self.dst_format.mlir_tensor_attr()) + index_type = ir.IndexType.get() + dims = [arith.ConstantOp(index_type, d).result for d in mlir_type.shape] + return sparse_tensor.InitOp(mlir_type, dims) + + +class _Stats: + """Information to describe how a tensor expression is implemented. + + Currently, we only record the temporary tensors introduced for splitting the + original expression. + """ + + def __init__(self): + self._temps = [] + + def __repr__(self) -> str: + return f"_Stats({repr(self._temps)})" + + def add_element(self, structop: _StructOpInfo): + """Adds a temporary tensor.""" + self._temps.append(structop) + + def get_total(self) -> int: + """Gets the total number of temporary tensors.""" + return len(self._temps) + + def _get_element(self, idx: int) -> _StructOpInfo: + """Gets the ith temporary tensor.""" + assert idx < self.get_total() + return self._temps[idx] + + def get_dimensions(self, idx: int) -> Tuple[int]: + """Gets the dimensions for the ith temporary tensor.""" + return self._get_element(idx).dst_dims + + def get_formats(self, idx: int) -> Tuple[ModeFormat]: + """Gets the ModeFormats for the ith temporary tensor.""" + return tuple(self._get_element(idx).dst_format.format_pack.formats) + + +class Tensor: + """The tensor class. + + We support the TACO API tensor class with an alias of this class. + + This class is part of the TACO API with the following methods: + insert: Inserts a value to the given coordinate in the tensor. + to_array: Returns a numpy ndarray for the tensor. + + TACO API also defines the following arrtibutes for the class: + dtype: A dtype object representing the data type of the tensor. + format: A format object representing the storage format of the tensor. + name: A string object representing the name of the tensor. + order: An integral rank of the tensor. + shape: A list of integers representing the shape of the tensor. + + We currently ignore the tensor dimension ordering for dense tensor. + """ + _counter = _AtomicCounter() + + def _get_unique_name(self) -> str: + """Returns a unique name for creating a new Tensor.""" + return f"{_TACO_TENSOR_PREFIX}{self._counter.increment()}" + + def _init_format(self, fmt: Union[ModeFormat, List[ModeFormat], + Format]) -> None: + """Process the fmt argument for the Tensor constructor. + + Args: + fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If + this argument is a ModeFormat, uses this ModeFormat for all the tensor + dimensions. If this argument is a list of ModeFormat, the len of the + list should equal to the rank of the tensor. If this argument is a + format, uses it for the format of the tensor. + + Raises: + ValueError: If fmt is not one of the expected type or is inconsistent + with the rank of the tensor. This is because fmt could be an users + input. + """ + if isinstance(fmt, ModeFormat): + self._format = _make_format([fmt] * self.order) + elif isinstance(fmt, list): + if len(fmt) == self.order and isinstance(fmt[0], ModeFormat): + self._format = _make_format(fmt) + else: + raise ValueError("Inconsistent shape and format: " + f"{self._shape}, {fmt}.") + elif isinstance(fmt, Format): + if fmt.rank() != self.order: + raise ValueError("Inconsistent shape and format: " + f"{self._shape}, {fmt}.") + else: + self._format = fmt + else: + raise ValueError(f"Invalid format argument: {fmt}.") + + def __init__(self, + value_or_shape: Optional[Union[List[int], Tuple[int, ...], float, + int]] = None, + fmt: Optional[Union[ModeFormat, List[ModeFormat], + Format]] = None, + dtype: Optional[DType] = None, + name: Optional[str] = None): + """The tensor constructor interface defined by TACO API. + + Args: + value_or_shape: This argument is optional and can be int, float, + List[int], or Tuple[int, ...]. If this argument is an int or float, + creates a scalar tensor and initializes it with the value. If this + argument is a list or tuple of int, uses it as the shape to create a + tensor. + fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If + this argument is a ModeFormat, uses this ModeFormat for all the tensor + dimensions. If this argument is a list of ModeFormat, the len of the + list should equal to the rank of the tensor. If this argument is a + format, uses it for the format of the tensor. + dtype: An object of dtype, representing the data type of the tensor. + name: A string name of the tensor. If a name is not given, creates a + unique name for the tensor. + + Raises: + ValueError: If there is any inconsistency among the input arguments. + """ + # Take care of the argument default values. + fmt = fmt or ModeFormat.COMPRESSED + dtype = dtype or DType(Type.FLOAT64) + self._name = name or self._get_unique_name() + + self._dtype = dtype + # We currently use _coords and _values to host the sparse tensor value with + # COO format, and _dense_storage to host the dense tensor value. We haven't + # implement the conversion between the two storages yet. This will be + # improved in a follow up CL. + self._coords = [] + self._values = [] + self._dense_storage = None + self._stats = _Stats() + if value_or_shape is None or isinstance(value_or_shape, int) or isinstance( + value_or_shape, float): + # Create a scalar tensor and ignore the fmt parameter. + self._shape = [] + self._format = _make_format([], []) + if value_or_shape is not None: + self._dense_storage = np.array(value_or_shape, dtype=self._dtype.value) + elif (isinstance(value_or_shape, tuple) or isinstance( + value_or_shape, list)) and _all_instance_of(value_or_shape, int): + # Create a tensor with the specified shape and format. + self._shape = list(value_or_shape) + self._init_format(fmt) + else: + raise ValueError("Invalid first argument. " + "Must be a tuple or list for a shape or a single value" + f"if initializing a scalar tensor: {value_or_shape}.") + + def __repr__(self) -> str: + value_str = (f"{repr(self._dense_storage)})" if self.is_dense() else + f"{repr(self._coords)} {repr(self._values)})") + return (f"Tensor(_name={repr(self._name)} " + f"_dtype={repr(self._dtype)} : ") + value_str + + def insert(self, coords: List[int], val: Union[float, int]) -> None: + """Inserts a value to the given coordinate. + + Args: + coords: A list of integer coordinates. The length of the list must be the + same as the rank of the tensor. + val: A value being inserted. It is either an integral or a floating point + value. This value will be converted to the data type of the tensor. + + Raises: + ValueError: When there is any problem in the parameters. + """ + if not isinstance(coords, list): + raise ValueError(f"Non list coordinate detected: {coords}.") + if not _all_instance_of(coords, int): + raise ValueError(f"Non integer coordinate detected: {coords}.") + if (len(coords) != self.order or + any([c < 0 or c >= self._shape[i] for i, c in enumerate(coords)])): + raise ValueError("Invalid coordinate for rank: " + f"{self.order}, {coords}.") + + if not isinstance(val, int) and not isinstance(val, float): + raise ValueError(f"Value is neither int nor float: {val}.") + + self._coords.append(tuple(coords)) + self._values.append(self._dtype.value(val)) + + def is_dense(self) -> bool: + """Returns true if all the Tensor dimensions have a dense format.""" + return self._format.is_dense() + + def to_array(self) -> np.ndarray: + """Returns the numpy array for the Tensor. + + This is currenly only implemented for dense Tensor. + """ + if not self.is_dense(): + raise ValueError("Conversion from non-dense Tensor " + "to numpy array not supported yet.") + return self._dense_storage + + @staticmethod + def from_array(array: np.ndarray) -> "Tensor": + """Returns a dense tensor with the value copied from the input array. + + We currently only support the conversion of float64 numpy arrays to Tensor. + + Args: + array: The numpy array that provides the data type, shape and value for + the tensor. + + Returns: + A Tensor object. + + Raises: + ValueError if the data type of the numpy array is not float64. + """ + if array.dtype != np.float64: + raise ValueError(f"Expected float64 value type: {array.dtype}.") + tensor = Tensor(array.shape, ModeFormat.DENSE) + tensor._dense_storage = np.copy(array) + return tensor + + @staticmethod + def from_coo( + coordinates: List[Tuple[int, ...]], + values: List[_AnyRuntimeType], + fmt: Format, + dtype: DType, + ) -> "Tensor": + """Converts coordinates and values to a sparse tensor representation. + + Args: + coordinates: A list of coordinates with non-zero values. + values: The non-zero values. + fmt: The tensor storage format. + dtype: The tensor element data type. + + Returns: + A tensor with the given non-zero values and storage format. The shape of + the tensor has the minimum size for each dimension to make the given + coordinates valid. + """ + assert (isinstance(coordinates, List) and + _all_instance_of(coordinates, Tuple)) + assert (isinstance(values, List) and _all_instance_of(values, dtype.value)) + assert isinstance(fmt, Format) + + rank = fmt.rank() + assert all(len(c) == rank and _all_instance_of(c, int) for c in coordinates) + + # Find the maximum coordinate value for each dimension. + max_coordinate = list(map(max, zip(*coordinates))) + # The size of each dimension is one more that such a maximum coordinate + # value. + shape = [c + 1 for c in max_coordinate] + tensor = Tensor(shape, fmt) + tensor._coords = coordinates + tensor._values = values + + return tensor + + @property + def dtype(self) -> DType: + """Returns the data type for the Tensor.""" + return self._dtype + + @property + def format(self) -> Format: + """Returns the storage format for the Tensor.""" + return self._format + + @property + def name(self) -> str: + """Returns the name for the Tensor.""" + return self._name + + @property + def order(self) -> int: + """Returns the rank of the Tensor.""" + return len(self._shape) + + @property + def shape(self) -> List[int]: + """Returns the shape of the Tensor.""" + return self._shape + + def __getitem__(self, key) -> "Access": + """Verifies and processes a tensor access. + + In the tensor index notation, a tensor access T[i, j] is represented as + retrieving a value with key (i, j) from the tensor object T in Python. This + routine verifies the key for the tensor access and returns a tensor access + object. + + Args: + key: The key used to access the tensor, which could be any Python object + from user inputs. + + Returns: + The corresponding tensor access object. + + Raises: + ValueError: If key is not an IndexVar or a tuple of IndexVar. + """ + indices = _verify_and_normalize_indices(key) + return Access(self, indices) + + def __setitem__(self, key, value) -> None: + """Verifies and processes a tensor assignment. + + In the tensor index notation, a tensor assignment "T[i, j] = ..." is + represented as setting a value for a tensor object T via key (i, j) in + Python. This routine verifies the key, evaluates the value, and assigns the + value to the tensor. + + We only support assignment of dense tensor currently. + + Args: + key: The key used to access the tensor, which could be any Python object + from user inputs. + value: The value assigned to the tensor, which could be any Python object + from user inputs. + + Raises: + ValueError: If tensor is not a dense tensor, or the key is not an IndexVar + or a tuple of IndexVar, or the length of the indices is not the same as + the rank of the tensor. + """ + indices = _verify_and_normalize_indices(key) + if len(indices) != self.order: + raise ValueError("Mismatch between indices and tensor rank: " + f"len({indices}) != {self.order}.") + + result = value.evaluate(self, indices) + if self.is_dense(): + assert isinstance(result, np.ndarray) + self._dense_storage = result + else: + assert _all_instance_of(result, np.ndarray) and len(result) == 2 + assert (result[0].ndim, result[1].ndim) == (1, 2) + (self._values, self._coords) = result + + def mlir_tensor_type(self) -> ir.RankedTensorType: + """Returns the MLIR type for the tensor.""" + return _mlir_tensor_type(self._dtype, tuple(self._shape), + self._format.mlir_tensor_attr()) + + def dense_dst_ctype_pointer(self) -> ctypes.pointer: + """Returns the ctypes pointer for the pointer to an MemRefDescriptor. + + For a dense tensor output, the MLIR compiler allocates the storage for + the tensor. This routine returns the pointer to an MLIR MemRefDescriptor for + receiving the tensor. + """ + assert self.is_dense() + mem_ref_desc = runtime.make_nd_memref_descriptor( + self.order, np.ctypeslib.as_ctypes_type(self.dtype.value))() + return ctypes.pointer(ctypes.pointer(mem_ref_desc)) + + def ctype_pointer(self) -> ctypes.pointer: + """Returns the ctypes pointer for the pointer to the input tensor.""" + if self.is_dense(): + if self._dense_storage is None: + self._dense_storage = np.zeros(self._shape, self._dtype.value) + return _ctype_pointer_from_array(self._dense_storage) + + shape = np.array(self._shape, np.int64) + indices = np.array(self._coords, np.int64) + values = np.array(self._values, self._dtype.value) + ptr = utils.coo_tensor_to_sparse_tensor(_get_support_lib_name(), shape, + values, indices) + return ctypes.pointer(ctypes.cast(ptr, ctypes.c_void_p)) + + def get_coordinates_and_values( + self) -> Tuple[List[Tuple[int, ...]], List[_AnyRuntimeType]]: + """Returns the coordinates and values for the non-zero elements.""" + if not self.is_dense(): + return (self._coords, self._values) + + # Coordinates for non-zero elements, grouped by dimensions. + coords_by_dims = self._dense_storage.nonzero() + # Coordinates for non-zero elements, grouped by elements. + coords = np.transpose(coords_by_dims) + values = self._dense_storage[coords_by_dims] + return (coords, values) + + def _record_stats(self, structop: "_StructOpInfo"): + """Collects information for temporary tensors.""" + # Exclude user specified destination tensors. + if structop.dst_name == self.name: + return + + self._stats.add_element(structop) + + +def _emit_operand(op_def: lang.LinalgOpDef, indices: Tuple[IndexVar, ...], + name: str, kind: lang.OperandKind) -> lang.OperandDef: + """Emits an operand for a tensor access in the current linalg operation. + + Args: + op_def: A LinalgOpDef representing the current linalg dialect operation. + indices: A tuple of IndexVar used to access the tensor. + name: A unique string name of the tensor. + kind: An OperandKind for the operand. + + Returns: + An OperandDef representing the operand. + """ + dim_sym = _mlir_symbols_from_index_vars(indices) + opnd = lang.OperandDef(kind, lang.T, dim_sym) + op_def.add_operand(name, opnd) + return opnd + + +@dataclasses.dataclass(frozen=True) +class _DimInfo: + """Information for an operand dimension. + + Attributes: + dim: An integer for the size of the dimension. + mode_format: A ModeFormat for the dimension sparsity. + """ + dim: int + mode_format: ModeFormat + + +@dataclasses.dataclass() +class _ExprInfo: + """Expression information for validation and code generation. + + Attributes: + src_indices: A tuple of IndexVar for the indices used by the tensors in the + expression tree. + dim_infos: A tuple of _DimInfo, representing the dimension information + corresponding to the src_indices. + reduce_indices: A set of IndexVar for the indices reduced by the expression. + acc_reduce_indices: An accumulated set of IndexVar for the indices reduced + by the expression and its children. + structop_info: Information to support the code generation for a structured + op in the linalg dialect, if the corresponding expression node is the root + of a subtree for a structured op. + mlir_value: The MLIR value generated for the structured op. + """ + src_indices: Tuple[IndexVar, ...] + dim_infos: Tuple[_DimInfo, ...] + reduce_indices: Optional[Set[IndexVar]] = None + acc_reduce_indices: Optional[Set[IndexVar]] = None + structop_info: Optional[_StructOpInfo] = None + mlir_value: Optional[ir.Value] = None + + def __post_init__(self) -> None: + """Verifies and fix up attribute values. + + Verifies the consistency of the attributes and modifies the default values + to support convenient initializer syntax. + """ + assert len(self.src_indices) == len(self.dim_infos) + self.reduce_indices = self.reduce_indices or set() + self.acc_reduce_indices = self.acc_reduce_indices or set() + + +class IndexExpr(abc.ABC): + """The index notation base class. + + We support the TACO API index_expression class with an alias of this class. + """ + + def _verify_operand_and_build_expr(self, rhs, op: _BinaryOp) -> "_BinaryExpr": + """Verifies the RHS operand and returns a binary expression. + + Args: + rhs: The RHS of the binary operation, which could be any Python object + from user inputs. + op: A _BinaryOp object representing the binary operator. + + Raises: + ValueError: If rhs is not an IndexExpr. + """ + if not isinstance(rhs, IndexExpr): + raise ValueError(f"Expected IndexExpr: {rhs}") + return _BinaryExpr(op, self, rhs) + + def __add__(self, rhs) -> "_BinaryExpr": + """Defines the operator +. + + Args: + rhs: The value being added, which could be any Python object from user + inputs. + + Returns: + A _BinaryExpr object representing the operation. + + Raises: + ValueError: If rhs is not an IndexExpr. + """ + return self._verify_operand_and_build_expr(rhs, operator.add) + + def __mul__(self, rhs) -> "_BinaryExpr": + """Defines the operator *. + + Args: + rhs: The value being multiplied, which could be any Python object from + user inputs. + + Returns: + A _BinaryExpr object representing the operation. + + Raises: + ValueError: If rhs is not an IndexExpr. + """ + return self._verify_operand_and_build_expr(rhs, operator.mul) + + def __sub__(self, rhs) -> "_BinaryExpr": + """Defines the operator -. + + Args: + rhs: The value being subtracted, which could be any Python object from + user inputs. + + Returns: + A _BinaryExpr object representing the operation. + + Raises: + ValueError: If rhs is not an IndexExpr. + """ + return self._verify_operand_and_build_expr(rhs, operator.sub) + + @abc.abstractmethod + def _visit(self, + func: _ExprVisitor, + args, + *, + leaf_checker: _SubtreeLeafChecker = None) -> None: + """A post-order visitor. + + Args: + func: A callable applied to each node in the expression tree. + args: The variable-length arguments passed to the callable. These + arguments are grouped as an iterable and will be unpacked before passing + to the callable. This is to enable the keyword argument only syntax + after this argument. + leaf_checker: A callable object to identify nodes that should be treated + as leaf nodes to support partial tree visiting. + """ + pass + + @abc.abstractmethod + def _emit_expression( + self, + expr_to_opnd: Dict["IndexExpr", lang.OperandDef], + expr_to_info: _ExprInfoDict, + ) -> lang.ScalarExpression: + """Emits MLIR for the expression tree. + + Args: + expr_to_opnd: A dictionary for looking up structured op input operands for + the input nodes of the structured op. + expr_to_info: A dictionary for looking up code generation information for + expressions. + + Returns: + A linalg dialect ScalarExpression for the expression. + """ + pass + + @abc.abstractmethod + def dtype(self) -> DType: + """Returns the data type for the result of the expression.""" + pass + + def _emit_structured_op(self, expr_to_info: _ExprInfoDict) -> None: + """Emits a structured op in the linalg dialect for the expression tree. + + We define a DefineOpcallable in the domain specific language for the linalg + dialect and execute the callable to generate the structured op. Self is the + root of the expression tree for the structured op. + + Args: + expr_to_info: A dictionary for looking up code generation information for + expressions. + """ + op_info = expr_to_info[self].structop_info + op_name = op_info.dst_name + op_def = lang.LinalgOpDef(name=op_name) + op_callable = lang.DefinedOpCallable(op_name, op_def) + + # Collect the input expression nodes for the structured op. + expr_inputs = [] + self._visit( + _gather_structured_op_input, + (self, expr_to_info, expr_inputs), + leaf_checker=_is_structured_op_leaf, + ) + + # Create a linalg structured op operand for each input expression node and + # build a dictionary for looking up the information. + expr_to_input_opnd = { + e: _emit_structured_op_input(e, expr_to_info, op_def) + for e in expr_inputs + } + + # Emit the expression tree, which produces the value assigned to the + # destination tensor. + value = self._emit_expression(expr_to_input_opnd, expr_to_info) + # Emit the structured op representation for the destination tensor. + dst_opnd = _emit_operand(op_def, op_info.dst_indices, op_info.dst_name, + lang.OperandKind.OutputTensor) + dst_dim_syms = _mlir_dimensions_from_index_vars(op_info.dst_indices) + dst_use = lang.TensorUse(dst_opnd, dst_dim_syms) + + expr_info = expr_to_info[self] + # If the structured op reduces some indices, explicitly represent the + # reduction. This is done by generating a ReduceFn for the dimensions being + # reduced in the linalg dialect and calling the function with the value + # being reduced. We only support add reduction currently. + if expr_info.reduce_indices: + reduce_dims = _mlir_dimensions_from_index_vars(expr_info.reduce_indices) + value = lang.ReduceFn.add[reduce_dims](value) + + # Emit the assignment as a comprehension in the linalg dialect. + comp = lang.Comprehension((dst_use, value)) + op_def.comprehensions.append(comp) + + # The structured op in the linalg dialect requires an explicit + # initialization for the destination tensor. Emit MLIR to initialize the + # destination tensor. + init = op_info.emit_tensor_init() + + # Collect MLIR values for the linalg input operands, with the assumption + # that dictionary preserves the insertion order. + args = [ + expr_to_info[expr].mlir_value + for expr, opnd in expr_to_input_opnd.items() + ] + # Execute the DefineOpcallable object for the linalg dialect operation to + # emit MLIR for the linalg structured op. + expr_info.mlir_value = op_callable(*args, outs=[init]) + + def _identify_structured_ops( + self, + expr_to_info: _ExprInfoDict, + dst: Tensor, + dst_indices: Tuple[IndexVar, ...], + ) -> List["IndexExpr"]: + """Returns expression nodes for the roots of the identified structured ops. + + A structured op in the linalg dialect only supports reduction performed on + the whole expression. If the expression tree contains reduction that are + performed on part of the expression tree, the expression tree needs to be + implemented with multiple structured ops. This routine identifies all the + expression nodes that contain reduction as the root of structured ops in the + linalg dialect. + + Args: + expr_to_info: A dictionary for looking up code generation information for + expressions. + dst: A destination Tensor that accepts the value of the expression tree. + dst_indices: The indices used by the destination index expression. + + Returns: + An ordered list of IndexExpr for the root expressions of the structured + ops, where child expressions go before parent expressions that use their + results. + """ + reduce_indices = tuple( + set(expr_to_info[self].src_indices) - set(dst_indices)) + for reduce_index in reduce_indices: + _mark_structured_op_root(self, reduce_index, expr_to_info) + + self._visit(_accumulate_reduce_indices, (expr_to_info,)) + structop_roots = [] + self._visit(_gather_structured_op, (expr_to_info, structop_roots)) + + # Handle the root of the top level expression. + if not structop_roots or structop_roots[-1] != self: + # The top level expression is not a reduction. Add the top level + # expression as a structured op root. + structop_roots.append(self) + + # Use user specified information for the destination tensor to build an + # _StructOpInfo for the top level expression. + expr_to_info[self].structop_info = _StructOpInfo(dst_indices, + tuple(dst.shape), + self.dtype(), dst.name, + dst.format) + + return structop_roots + + def _validate_and_collect_expr_info( + self, + dst: Tensor, + dst_indices: Tuple[IndexVar, ...], + ) -> _ExprInfoDict: + """Propagates expression information for validation. + + Propagates the indices used by child expression nodes to parent expression + nodes. Also collects and validates the sizes for the dimensions + corresponding to the indices. + + Args: + dst: A destination Tensor that accepts the value of the expression tree. + dst_indices: The indices used by the destination index expression. + + Raises: + ValueError if there is any inconsistency in indices or dimensional + values. + + Returns: + A dictionary of (IndexExpr, _ExprInfo). + """ + expr_to_info = {} + # Validate the expression tree and construct expression information. + self._visit(_validate_and_collect_expr_info, (expr_to_info,)) + + # Validate the destination dimension information. + info = expr_to_info[self] + index_to_dim_info = {i: d for i, d in zip(info.src_indices, info.dim_infos)} + for i, d, in zip(dst_indices, dst.shape): + if i not in index_to_dim_info: + raise ValueError("Destination IndexVar not used in the " + f"source expression: {i}") + else: + if d != index_to_dim_info[i].dim: + raise ValueError(f"Inconsistent destination dimension for {i}: " + f"{d} vs {index_to_dim_info[i].dim}") + + return expr_to_info + + def _emit_assignment( + self, + module: ir.Module, + dst: Tensor, + dst_indices: Tuple[IndexVar, ...], + expr_to_info: _ExprInfoDict, + input_accesses: List["Access"], + ) -> None: + """Emits an MLIR function for assigning the expression to a tensor.""" + input_types = [a.tensor.mlir_tensor_type() for a in input_accesses] + + # Build the kernel for the operations. + with ir.InsertionPoint(module.body): + + @builtin.FuncOp.from_py_func(*input_types, name=_ENTRY_NAME) + def linalg_funcop(*args): + # Set up the mapping from the Access nodes to their MLIR values. + for e, mlir in zip(input_accesses, args): + expr_to_info[e].mlir_value = mlir + + # Emit structured ops in the linalg dialect to implement the assignment. + for structop_root in self._identify_structured_ops( + expr_to_info, dst, dst_indices): + structop_root._emit_structured_op(expr_to_info) + dst._record_stats(expr_to_info[structop_root].structop_info) + + # The function returns the MLIR value of the root expression. + return expr_to_info[self].mlir_value + + linalg_funcop.func_op.attributes[ + "llvm.emit_c_interface"] = ir.UnitAttr.get() + + def evaluate( + self, + dst: Tensor, + dst_indices: Tuple[IndexVar, ...], + ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """Evaluates tensor assignment dst[dst_indices] = expression. + + Args: + dst: The destination tensor. + dst_indices: The tuple of IndexVar used to access the destination tensor. + + Returns: + The result of the dense tensor represented in numpy ndarray or the sparse + tensor represented by two numpy ndarray for its non-zero values and + indices. + + Raises: + ValueError: If the expression is not proper or not supported. + """ + expr_to_info = self._validate_and_collect_expr_info(dst, dst_indices) + + # Compute a list of input accesses. + input_accesses = [] + self._visit(_gather_input_accesses_index_vars, (input_accesses,)) + + support_lib = _get_support_lib_name() + # Build and compile the module to produce the execution engine. + with ir.Context(), ir.Location.unknown(): + module = ir.Module.create() + self._emit_assignment(module, dst, dst_indices, expr_to_info, + input_accesses) + compiled_module = _compile_mlir(module) + + # We currently rely on an environment to pass in the full path of a + # supporting library for the execution engine. + engine = execution_engine.ExecutionEngine( + compiled_module, opt_level=_OPT_LEVEL, shared_libs=[support_lib]) + + # Gather the pointers for the input buffers. + input_pointers = [a.tensor.ctype_pointer() for a in input_accesses] + if dst.is_dense(): + # The pointer to receive dense output is the first argument to the + # execution engine. + arg_pointers = [dst.dense_dst_ctype_pointer()] + input_pointers + else: + # The pointer to receive sparse output is the last argument to the + # execution engine. The pointer to receive a sparse tensor output is a + # pointer to pointer of char. + arg_pointers = input_pointers + [ + ctypes.pointer(ctypes.pointer(ctypes.c_char(0))) + ] + + # Invoke the execution engine to run the module and return the result. + engine.invoke(_ENTRY_NAME, *arg_pointers) + + if dst.is_dense(): + return runtime.ranked_memref_to_numpy(arg_pointers[0][0]) + + # Check and return the sparse tensor output. + rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor( + support_lib, + ctypes.cast(arg_pointers[-1][0], ctypes.c_void_p), + np.float64, + ) + assert (np.equal(rank, dst.order) + and np.array_equal(shape, np.array(dst.shape)) and + np.equal(values.ndim, 1) and np.equal(values.shape[0], nse) and + np.equal(indices.ndim, 2) and np.equal(indices.shape[0], nse) and + np.equal(indices.shape[1], rank)) + return (values, indices) + + +@dataclasses.dataclass(frozen=True) +class Access(IndexExpr): + """The tensor access class. + + We support the TACO API access class with an alias of this class. + + Attributes: + tensor: A Tensor being accessed. + indices: A tuple of IndexVar, representing the indices used to access the + Tensor. + """ + tensor: Tensor + indices: Tuple[IndexVar, ...] + + def __post_init__(self) -> None: + """Verifies the tensor and indices for a tensor access. + + Raises: + ValueError: If indices is not a list of IndexVar or the len of indices + doesn't equal to the rank of the tensor. + """ + if (not isinstance(self.indices, tuple) or + not _all_instance_of(self.indices, IndexVar)): + raise ValueError(f"Indices contain non IndexVar: {str(self.indices)}.") + if self.tensor.order != len(self.indices): + raise ValueError("Invalid indices for rank: " + f"str{self.tensor.order} != len({str(self.indices)}).") + + def _emit_expression( + self, + expr_to_opnd: Dict[IndexExpr, lang.OperandDef], + expr_to_info: _ExprInfoDict, + ) -> lang.ScalarExpression: + """Emits a linalg dialect TensorUse expression for the tensor access.""" + assert self in expr_to_opnd + dims = _mlir_dimensions_from_index_vars(self.indices) + return lang.TensorUse(expr_to_opnd[self], dims) + + def _visit(self, + func: _ExprVisitor, + args, + *, + leaf_checker: _SubtreeLeafChecker = None) -> None: + if leaf_checker: + assert leaf_checker(self, *args) + func(self, *args) + + def dtype(self) -> DType: + return self.tensor.dtype + + +def _gather_input_accesses_index_vars( + expr: IndexExpr, + input_accesses: List[Access], +) -> None: + """Collects Access nodes.""" + if isinstance(expr, Access) and expr not in input_accesses: + input_accesses.append(expr) + + +def _op_to_callable(op: _BinaryOp) -> lang.ArithFnType: + """Returns the linalg dialect function object for the given operation.""" + op_to_callable = { + operator.add: lang.ArithFn.add, + operator.sub: lang.ArithFn.sub, + operator.mul: lang.ArithFn.mul, + } + return op_to_callable[op] + + +@dataclasses.dataclass(frozen=True) +class _BinaryExpr(IndexExpr): + """The representation for a binary operation. + + Attributes: + op: A _BinaryOp representing the binary operation. + a: An IndexExpr representing the first operand of the operation. + b: An IndexExpr representing the second operand of the operation. + """ + op: _BinaryOp + a: IndexExpr + b: IndexExpr + + def __post_init__(self) -> None: + """Verifies that the operands being added are IndexExpr.""" + assert isinstance(self.a, IndexExpr) and isinstance(self.b, IndexExpr) + + def _emit_expression( + self, + expr_to_opnd: Dict[IndexExpr, lang.OperandDef], + expr_to_info: _ExprInfoDict, + ) -> lang.ScalarExpression: + """Emits the expression tree and returns the expression.""" + # The current expression node is an internal node of the structured op. + if self not in expr_to_opnd: + a = self.a._emit_expression(expr_to_opnd, expr_to_info) + b = self.b._emit_expression(expr_to_opnd, expr_to_info) + return _op_to_callable(self.op)(a, b) + + # The current expression is a leaf node of the structured op. That is, it is + # a temporary tensor generated by its child structured op. + op_info = expr_to_info[self].structop_info + assert op_info is not None + dims = _mlir_dimensions_from_index_vars(op_info.dst_indices) + return lang.TensorUse(expr_to_opnd[self], dims) + + def _visit(self, + func: _ExprVisitor, + args, + *, + leaf_checker: _SubtreeLeafChecker = None) -> None: + """A post-order visitor.""" + if leaf_checker is None or not leaf_checker(self, *args): + self.a._visit(func, args, leaf_checker=leaf_checker) + self.b._visit(func, args, leaf_checker=leaf_checker) + func(self, *args) + + def dtype(self) -> DType: + """Returns the data type of the binary operation.""" + return self.a.dtype() + + +def _validate_and_collect_dim_info( + index_to_dim_info: Dict[IndexVar, _DimInfo], + indices: Tuple[IndexVar, ...], + dim_infos: Tuple[_DimInfo, ...], + expr: _BinaryExpr, +) -> None: + """Validates and collects the dimension information for an index notation. + + Validates (indices, dim_infos) against the information collected from other + source operands and is represented by index_to_dim_info. In particular, we + ensure that each IndexVar corresponds to only one dimension size. We also + aggregate the new information represented in (indices, dim_infos) to + index_to_dim_info. + + Args: + index_to_dim: A dictionary of (IndexVar, _DimInfo) collected from the + previous operands. + indices: The IndexVars to be validated. + dim_infos: The dimension information for the IndexVars to be validated. + expr: The binary expression where (indices, dim_infos) is used. + + Raises: + ValueError if there is any problem in the IndexVars or dimensional values. + """ + assert len(indices) == len(dim_infos) + for i, d in zip(indices, dim_infos): + if i not in index_to_dim_info: + index_to_dim_info[i] = d + else: + if d.dim != index_to_dim_info[i].dim: + raise ValueError(f"Inconsistent source dimension for {i}: " + f"{d.dim} vs {index_to_dim_info[i].dim}") + mode_format = _mode_format_estimator(expr.op)( + index_to_dim_info[i].mode_format, d.mode_format) + index_to_dim_info[i] = _DimInfo(d.dim, mode_format) + + +def _validate_and_collect_expr_info( + expr: IndexExpr, + expr_to_info: _ExprInfoDict, +) -> None: + """Validates dimension information and constructs _ExprInfo. + + Validates that dimensional values for the same IndexVar are the same. Collects + a list of IndexVar used by the expression and their corresponding dimensional + values. Constructs an _ExprInfo object to record the information for the + IndexExpr. + + This routine is passed to the post-order visitor as an _ExprVisitor object. + + Args: + expr: The IndexExpr being validated. + expr_to_info: The dictionary of (IndexExpr, _ExprInfo) for recording the + expression information. + + Raises: + ValueError if there is any problem in the IndexVars or dimensional values. + """ + # Objects of class Access can be shared by different expressions. Avoid + # processing Access objects multiple times by skipping the processing if expr + # is already in the dictionary. + if expr in expr_to_info: + return + + if isinstance(expr, Access): + src_indices = expr.indices + src_dims = tuple(expr.tensor.shape) + mode_formats = tuple(expr.tensor.format.format_pack.formats) + assert len(src_dims) == len(mode_formats) + dim_infos = tuple([_DimInfo(d, m) for d, m in zip(src_dims, mode_formats)]) + else: + assert isinstance(expr, _BinaryExpr) + a_info = expr_to_info[expr.a] + index_to_dim_info = { + i: d for i, d in zip(a_info.src_indices, a_info.dim_infos) + } + b_info = expr_to_info[expr.b] + _validate_and_collect_dim_info(index_to_dim_info, b_info.src_indices, + b_info.dim_infos, expr) + # Here we rely on the fact that dictionaries keep the insertion order for + # keys and values. + src_indices = tuple(index_to_dim_info.keys()) + dim_infos = tuple(index_to_dim_info.values()) + + expr_to_info[expr] = _ExprInfo(src_indices, dim_infos) + + +def _mark_structured_op_root( + expr: IndexExpr, + reduce_index: IndexVar, + expr_to_info: _ExprInfoDict, +) -> None: + """Identifies the root expression for a structured op in the linalg dialect. + + An linalg structured op can only perform reduction on the whole expression. + For a TACO tensor algebra expression, the reduction on an IndexVar is done at + the smallest expression that contains all the uses of the IndexVar. If such an + expression is only part of the whole expression, we need to split this + sub-expression tree out from its parent and implement the sub-expression as a + structured op. + + This routine identifies the root expression node for performing a reduction on + the given IndexVar. If the reduction of the given IndexVar should be performed + on expression X, then the IndexVar is added to expr_to_info[X].reduce_indices + + Args: + expr: The root IndexExpr for the tensor algebra expression. + reduce_index: The IndexVar which we want to find out the proper expression + to perform a reduction. + expr_to_info: The dictionary to look up _ExprInfo for IndexExpr. + """ + assert (isinstance(expr, _BinaryExpr)) + a_info = expr_to_info[expr.a] + b_info = expr_to_info[expr.b] + expr_info = expr_to_info[expr] + + if reduce_index in a_info.src_indices and reduce_index in b_info.src_indices: + expr_info.reduce_indices.add(reduce_index) + return + + if reduce_index in a_info.src_indices: + _mark_structured_op_root(expr.a, reduce_index, expr_to_info) + elif reduce_index in b_info.src_indices: + _mark_structured_op_root(expr.b, reduce_index, expr_to_info) + else: + assert False, "Unreachable path" + + +def _accumulate_reduce_indices( + expr: IndexExpr, + expr_to_info: _ExprInfoDict, +) -> None: + """Propagates reduction indices from child expressions to parent expressions. + + This routine is passed to the post-order visitor as an _ExprVisitor object. + + Args: + expr: The IndexExpr being visited. + expr_to_info: The dictionary of (IndexExpr, _ExprInfo) for recording the + expression information. + """ + assert expr in expr_to_info + expr_info = expr_to_info[expr] + + if isinstance(expr, _BinaryExpr): + a_info = expr_to_info[expr.a] + b_info = expr_to_info[expr.b] + expr_info.acc_reduce_indices = ( + a_info.acc_reduce_indices | b_info.acc_reduce_indices + | expr_info.reduce_indices) + else: + assert isinstance(expr, Access) + + +def _gather_structured_op( + expr: IndexExpr, + expr_to_info: _ExprInfoDict, + structop_roots: List[IndexExpr], +) -> None: + """Adds structured op root expression information to structop_roots. + + This routine is passed to the post-order visitor as an _ExprVisitor object. + + Args: + expr: The IndexExpr being visited. + expr_to_info: The dictionary to look up _ExprInfo for IndexExpr. + structop_roots: The resulting list of IndexExpr that are the roots for + linalg structured ops. + """ + if not expr_to_info[expr].reduce_indices: + return + + # If the expression is the root for reducing some indices, collect the indices + # and dimensions for the reduction result. + dst_indices = [] + dst_dims = [] + mode_fmts = [] + for i, d in zip(expr_to_info[expr].src_indices, expr_to_info[expr].dim_infos): + if i not in expr_to_info[expr].acc_reduce_indices: + dst_indices.append(i) + dst_dims.append(d.dim) + mode_fmts.append(d.mode_format) + + # Add the information to the dictionary. + op_info = _StructOpInfo( + tuple(dst_indices), + tuple(dst_dims), + expr.dtype(), + f"temp{len(structop_roots)}", + _make_format(mode_fmts), + ) + expr_to_info[expr].structop_info = op_info + + # Add the expression to the list of structured op roots. + structop_roots.append(expr) + + +def _is_structured_op_leaf( + expr: IndexExpr, + root: IndexExpr, + expr_to_info: _ExprInfoDict, + *unused_args, +) -> bool: + """Returns true iff the expression is a leaf node for a structured op. + + The root of a structured op is a leaf of its parent structured op that uses + its result. An expression node is a leaf node for the current structured op if + it is an Access node or the root for a structured op that is not the current + structured op. + + This routine is passed to the post-order visitor as a _SubtreeLeafChecker + object. Because the post-order visitor pass the same parameters to both + _SubtreeLeafChecker and _ExprVisitor, this routine may received unused + parameters. + + Args: + expr: The IndexExpr being visited. + root: The root of the current structured op. + expr_to_info: The dictionary to look up _ExprInfo for IndexExpr. + + Returns: + True if the current IndexExpr is a leaf for the current structured op. + """ + return (expr != root and + expr_to_info[expr].structop_info is not None) or isinstance( + expr, Access) + + +def _gather_structured_op_input( + expr: IndexExpr, + root: IndexExpr, + expr_to_info: _ExprInfoDict, + structop_inputs: List[IndexExpr], +) -> None: + """Adds the IndexExpr to structop_inputs if it is an input. + + If the current IndexExpr is an input for the current structured op, adds it to + structop_inputs. The current IndexExpr is an input if it is an Access node or + if it is the root for a structured op that is not the current structured op. + + This routine is passed to the post-order visitor as an _ExprVisitor object. + + Args: + expr: The IndexExpr being visited. + root: The root of the current structured op. + expr_to_info: The dictionary to look up _ExprInfo for IndexExpr. + structop_inputs: The resulting list of IndexExpr that provide input to the + current structured op. + """ + if (expr != root and expr not in structop_inputs) and ( + isinstance(expr, Access) or + (expr in expr_to_info and expr_to_info[expr].structop_info)): + structop_inputs.append(expr) + + +def _emit_structured_op_input( + expr: IndexExpr, + expr_to_info: _ExprInfoDict, + op_def: lang.LinalgOpDef, +) -> lang.OperandDef: + """Emits OperandDef in the linalg dialect for the input IndexExpr. + + Args: + expr: The input IndexExpr for the current structured op. + expr_to_info: The dictionary to look up _ExprInfo for IndexExpr. + op_def: The linalg operation for the current structured op. + + Returns: + An OperandDef in the linalg dialect for the input IndexExpr. + """ + op_info = expr_to_info[expr].structop_info + if op_info: + # The input is a temporary tensor produced by another structured op. + indices = op_info.dst_indices + name = op_info.dst_name + else: + # The input is a user provided tensor. + assert isinstance(expr, Access) + indices = expr.indices + name = expr.tensor.name + + dim_sym = _mlir_symbols_from_index_vars(indices) + opnd = lang.OperandDef(lang.OperandKind.InputTensor, lang.T, dim_sym) + op_def.add_operand(name, opnd) + return opnd diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py new file mode 100644 index 0000000000000..05704b92f6a84 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py @@ -0,0 +1,47 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Supports the PyTACO API with the MLIR-PyTACO implementation. + +See http://tensor-compiler.org/ for TACO tensor compiler. + +This module exports the MLIR-PyTACO implementation through the language defined +by PyTACO. In particular, it defines the function and type aliases and constants +needed for the PyTACO API to support the execution of PyTACO programs using the +MLIR-PyTACO implementation. +""" + +from . import mlir_pytaco +from . import mlir_pytaco_io + +# Functions defined by PyTACO API. +get_index_vars = mlir_pytaco.get_index_vars +from_array = mlir_pytaco.Tensor.from_array +read = mlir_pytaco_io.read +write = mlir_pytaco_io.write + +# Classes defined by PyTACO API. +dtype = mlir_pytaco.DType +mode_format = mlir_pytaco.ModeFormat +mode_ordering = mlir_pytaco.ModeOrdering +mode_format_pack = mlir_pytaco.ModeFormatPack +format = mlir_pytaco.Format +index_var = mlir_pytaco.IndexVar +tensor = mlir_pytaco.Tensor +index_expression = mlir_pytaco.IndexExpr +access = mlir_pytaco.Access + +# Data type constants defined by PyTACO API. +int16 = mlir_pytaco.DType(mlir_pytaco.Type.INT16) +int32 = mlir_pytaco.DType(mlir_pytaco.Type.INT32) +int64 = mlir_pytaco.DType(mlir_pytaco.Type.INT64) +float32 = mlir_pytaco.DType(mlir_pytaco.Type.FLOAT32) +float64 = mlir_pytaco.DType(mlir_pytaco.Type.FLOAT64) + +# Storage format constants defined by the PyTACO API. In PyTACO, each storage +# format constant has two aliasing names. +compressed = mlir_pytaco.ModeFormat.COMPRESSED +Compressed = mlir_pytaco.ModeFormat.COMPRESSED +dense = mlir_pytaco.ModeFormat.DENSE +Dense = mlir_pytaco.ModeFormat.DENSE diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py new file mode 100644 index 0000000000000..0ee69c78da37a --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py @@ -0,0 +1,206 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Experimental MLIR-PyTACO with sparse tensor support. + +See http://tensor-compiler.org/ for TACO tensor compiler. + +This module implements the PyTACO API for writing a tensor to a file or reading +a tensor from a file. + +See the following links for Matrix Market Exchange (.mtx) format and FROSTT +(.tns) format: + https://math.nist.gov/MatrixMarket/formats.html + http://frostt.io/tensors/file-formats.html +""" + +from typing import List, TextIO + +from . import mlir_pytaco + +# Define the type aliases so that we can write the implementation here as if +# it were part of mlir_pytaco.py. +Tensor = mlir_pytaco.Tensor +Format = mlir_pytaco.Format +DType = mlir_pytaco.DType +Type = mlir_pytaco.Type + +# Constants used in the implementation. +_MTX_FILENAME_SUFFIX = ".mtx" +_TNS_FILENAME_SUFFIX = ".tns" + +_MTX_HEAD = "%%MatrixMarket" +_MTX_MATRIX = "matrix" +_MTX_COORDINATE = "coordinate" +_MTX_REAL = "real" +_MTX_SYMMETRY = "symmetric" +_MTX_GENERAL = "general" +_SYMMETRY_FIELD_ID = 4 + +# The TACO supported header for .mtx has the following five fields: +# . %%MatrixMarket +# . matrix | tensor +# . coordinate | array +# . real +# . symmetric | general +# +# This is what we support currently. +_SUPPORTED_HEADER_FIELDS = ((_MTX_HEAD,), (_MTX_MATRIX,), (_MTX_COORDINATE,), + (_MTX_REAL,), (_MTX_GENERAL, _MTX_SYMMETRY)) + +_A_SPACE = " " +_MTX_COMMENT = "%" +_TNS_COMMENT = "#" + + +def _coordinate_from_strings(strings: List[str]) -> List[int]: + """"Return the coordinate represented by the input strings.""" + # Coordinates are 1-based in the text file and 0-based in memory. + return [int(s) - 1 for s in strings] + + +def _read_coordinate_format(file: TextIO, tensor: Tensor, + is_symmetric: bool) -> None: + """Reads tensor values in coordinate format.""" + rank = tensor.order + # Process the data for the tensor. + for line in file: + if not line: + continue + + fields = line.split(_A_SPACE) + if rank != len(fields) - 1: + raise ValueError("The format and data have mismatched ranks: " + f"{rank} vs {len(fields)-1}.") + coordinate = _coordinate_from_strings(fields[:-1]) + value = float(fields[-1]) + tensor.insert(coordinate, value) + if is_symmetric and coordinate[0] != coordinate[-1]: + coordinate.reverse() + tensor.insert(coordinate, value) + + +def _read_mtx(file: TextIO, fmt: Format) -> Tensor: + """Inputs tensor from a text file with .mtx format.""" + # The first line should have this five fields: + # head tensor-kind format data-type symmetry + fields = file.readline().rstrip("\n").split(_A_SPACE) + tuple_to_str = lambda x: "|".join(x) + if len(fields) != len(_SUPPORTED_HEADER_FIELDS): + raise ValueError( + "Expected first line with theses fields " + f"{' '.join(map(tuple_to_str, _SUPPORTED_HEADER_FIELDS))}: " + f"{' '.join(fields)}") + + for i, values in enumerate(_SUPPORTED_HEADER_FIELDS): + if fields[i] not in values: + raise ValueError(f"The {i}th field can only be one of these values " + f"{tuple_to_str(values)}: {fields[i]}") + + is_symmetric = (fields[_SYMMETRY_FIELD_ID] == _MTX_SYMMETRY) + # Skip leading empty lines or comment lines. + line = file.readline() + while not line or line[0] == _MTX_COMMENT: + line = file.readline() + + # Process the first data line with dimensions and number of non-zero values. + fields = line.split(_A_SPACE) + rank = fmt.rank() + if rank != len(fields) - 1: + raise ValueError("The format and data have mismatched ranks: " + f"{rank} vs {len(fields)-1}.") + shape = fields[:-1] + shape = [int(s) for s in shape] + num_non_zero = float(fields[-1]) + + # Read the tensor values in coordinate format. + tensor = Tensor(shape, fmt) + _read_coordinate_format(file, tensor, is_symmetric) + return tensor + + +def _read_tns(file: TextIO, fmt: Format) -> Tensor: + """Inputs tensor from a text file with .tns format.""" + rank = fmt.rank() + coordinates = [] + values = [] + dtype = DType(Type.FLOAT64) + + for line in file: + # Skip empty lines and comment lines. + if not line or line[0] == _TNS_COMMENT: + continue + + # Process each line with a coordinate and the value at the coordinate. + fields = line.split(_A_SPACE) + if rank != len(fields) - 1: + raise ValueError("The format and data have mismatched ranks: " + f"{rank} vs {len(fields)-1}.") + coordinates.append(tuple(_coordinate_from_strings(fields[:-1]))) + values.append(dtype.value(fields[-1])) + + return Tensor.from_coo(coordinates, values, fmt, dtype) + + +def _write_tns(file: TextIO, tensor: Tensor) -> None: + """Outputs a tensor to a file using .tns format.""" + coords, non_zeros = tensor.get_coordinates_and_values() + assert len(coords) == len(non_zeros) + # Output a coordinate and the corresponding value in a line. + for c, v in zip(coords, non_zeros): + # The coordinates are 1-based in the text file and 0-based in memory. + plus_one_to_str = lambda x: str(x + 1) + file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n") + + +def read(filename: str, fmt: Format) -> Tensor: + """Inputs a tensor from a given file. + + The name suffix of the file specifies the format of the input tensor. We + currently only support .mtx format for support sparse tensors. + + Args: + filename: A string input filename. + fmt: The storage format of the tensor. + + Raises: + ValueError: If filename doesn't end with .mtx or .tns, or fmt is not an + instance of Format or fmt is not a sparse tensor. + """ + if (not isinstance(filename, str) or + (not filename.endswith(_MTX_FILENAME_SUFFIX) and + not filename.endswith(_TNS_FILENAME_SUFFIX))): + raise ValueError("Expected string filename ends with " + f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: " + f"{filename}.") + if not isinstance(fmt, Format) or fmt.is_dense(): + raise ValueError(f"Expected a sparse Format object: {fmt}.") + + with open(filename, "r") as file: + return (_read_mtx(file, fmt) if filename.endswith(_MTX_FILENAME_SUFFIX) else + _read_tns(file, fmt)) + + +def write(filename: str, tensor: Tensor) -> None: + """Outputs a tensor to a given file. + + The name suffix of the file specifies the format of the output. We currently + only support .tns format. + + Args: + filename: A string output filename. + tensor: The tensor to output. + + Raises: + ValueError: If filename doesn't end with .tns or tensor is not a Tensor. + """ + if (not isinstance(filename, str) or + not filename.endswith(_TNS_FILENAME_SUFFIX)): + raise ValueError("Expected string filename ends with" + f" {_TNS_FILENAME_SUFFIX}: {filename}.") + if not isinstance(tensor, Tensor): + raise ValueError(f"Expected a Tensor object: {tensor}.") + + with open(filename, "w") as file: + return _write_tns(file, tensor) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py new file mode 100644 index 0000000000000..867a129e9a09b --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py @@ -0,0 +1,121 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This file contains the utilities to process sparse tensor outputs. + +from typing import Tuple +import ctypes +import functools +import numpy as np + + +@functools.lru_cache() +def _get_c_shared_lib(lib_name: str) -> ctypes.CDLL: + """Loads and returns the requested C shared library. + + Args: + lib_name: A string representing the C shared library. + + Returns: + The C shared library. + + Raises: + OSError: If there is any problem in loading the shared library. + ValueError: If the shared library doesn't contain the needed routines. + """ + # This raises OSError exception if there is any problem in loading the shared + # library. + c_lib = ctypes.CDLL(lib_name) + + try: + c_lib.convertToMLIRSparseTensor.restype = ctypes.c_void_p + except Exception as e: + raise ValueError("Missing function convertToMLIRSparseTensor from " + f"the supporting C shared library: {e} ") from e + + try: + c_lib.convertFromMLIRSparseTensor.restype = ctypes.c_void_p + except Exception as e: + raise ValueError("Missing function convertFromMLIRSparseTensor from " + f"the C shared library: {e} ") from e + + return c_lib + + +def sparse_tensor_to_coo_tensor( + lib_name: str, + sparse_tensor: ctypes.c_void_p, + dtype: np.dtype, +) -> Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]: + """Converts an MLIR sparse tensor to a COO-flavored format tensor. + + Args: + lib_name: A string for the supporting C shared library. + sparse_tensor: A ctypes.c_void_p to the MLIR sparse tensor descriptor. + dtype: The numpy data type for the tensor elements. + + Returns: + A tuple that contains the following values for the COO-flavored format + tensor: + rank: An integer for the rank of the tensor. + nse: An interger for the number of non-zero values in the tensor. + shape: A 1D numpy array of integers, for the shape of the tensor. + values: A 1D numpy array, for the non-zero values in the tensor. + indices: A 2D numpy array of integers, representing the indices for the + non-zero values in the tensor. + + Raises: + OSError: If there is any problem in loading the shared library. + ValueError: If the shared library doesn't contain the needed routines. + """ + c_lib = _get_c_shared_lib(lib_name) + + rank = ctypes.c_ulonglong(0) + nse = ctypes.c_ulonglong(0) + shape = ctypes.POINTER(ctypes.c_ulonglong)() + values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))() + indices = ctypes.POINTER(ctypes.c_ulonglong)() + c_lib.convertFromMLIRSparseTensor(sparse_tensor, ctypes.byref(rank), + ctypes.byref(nse), ctypes.byref(shape), + ctypes.byref(values), ctypes.byref(indices)) + + # Convert the returned values to the corresponding numpy types. + shape = np.ctypeslib.as_array(shape, shape=[rank.value]) + values = np.ctypeslib.as_array(values, shape=[nse.value]) + indices = np.ctypeslib.as_array(indices, shape=[nse.value, rank.value]) + return rank, nse, shape, values, indices + + +def coo_tensor_to_sparse_tensor(lib_name: str, np_shape: np.ndarray, + np_values: np.ndarray, + np_indices: np.ndarray) -> int: + """Converts a COO-flavored format sparse tensor to an MLIR sparse tensor. + + Args: + lib_name: A string for the supporting C shared library. + np_shape: A 1D numpy array of integers, for the shape of the tensor. + np_values: A 1D numpy array, for the non-zero values in the tensor. + np_indices: A 2D numpy array of integers, representing the indices for the + non-zero values in the tensor. + + Returns: + An integer for the non-null ctypes.c_void_p to the MLIR sparse tensor + descriptor. + + Raises: + OSError: If there is any problem in loading the shared library. + ValueError: If the shared library doesn't contain the needed routines. + """ + + rank = ctypes.c_ulonglong(len(np_shape)) + nse = ctypes.c_ulonglong(len(np_values)) + shape = np_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong)) + values = np_values.ctypes.data_as( + ctypes.POINTER(np.ctypeslib.as_ctypes_type(np_values.dtype))) + indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong)) + + c_lib = _get_c_shared_lib(lib_name) + ptr = c_lib.convertToMLIRSparseTensor(rank, nse, shape, values, indices) + assert ptr is not None, "Problem with calling convertToMLIRSparseTensor" + return ptr From e4a556268ea97320ba96de6e2f26235b4046c994 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Fri, 21 Jan 2022 17:59:07 +0100 Subject: [PATCH 195/946] Revert "[libc++] Use addressof in unordered_map." This reverts commit cab96169380296a496614f433507d86b743f0d02. This breaks the CI. --- libcxx/include/__hash_table | 40 +++++++-------- libcxx/include/unordered_map | 21 ++++---- ...rator.operators.addressof.compile.pass.cpp | 49 ------------------- .../assign_move.addressof.compile.pass.cpp | 42 ---------------- .../move.addressof.compile.pass.cpp | 33 ------------- .../move_alloc.addressof.compile.pass.cpp | 36 -------------- .../emplace_hint.addressof.compile.pass.cpp | 30 ------------ ...rase_const_iter.addressof.compile.pass.cpp | 27 ---------- .../erase_range.addressof.compile.pass.cpp | 27 ---------- ...nt_const_lvalue.addressof.compile.pass.cpp | 28 ----------- ...ible_value_type.addressof.compile.pass.cpp | 28 ----------- ...alue_value_type.addressof.compile.pass.cpp | 28 ----------- ...ry_emplace_hint.addressof.compile.pass.cpp | 40 --------------- .../swap.addressof.compile.pass.cpp | 29 ----------- .../move.addressof.compile.pass.cpp | 33 ------------- .../move_alloc.addressof.compile.pass.cpp | 36 -------------- .../emplace_hint.addressof.compile.pass.cpp | 30 ------------ 17 files changed, 30 insertions(+), 527 deletions(-) delete mode 100644 libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp delete mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index adc732cffb015..6b682ab27c6c3 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -308,9 +308,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_iterator& operator=(const __hash_iterator& __i) { - if (this != _VSTD::addressof(__i)) + if (this != &__i) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); + __get_db()->__iterator_copy(this, &__i); __node_ = __i.__node_; } return *this; @@ -406,7 +406,7 @@ public: : __node_(__x.__node_) { #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); + __get_db()->__iterator_copy(this, &__x); #endif } @@ -415,7 +415,7 @@ public: __hash_const_iterator(const __hash_const_iterator& __i) : __node_(__i.__node_) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); + __get_db()->__iterator_copy(this, &__i); } _LIBCPP_INLINE_VISIBILITY @@ -427,9 +427,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_iterator& operator=(const __hash_const_iterator& __i) { - if (this != _VSTD::addressof(__i)) + if (this != &__i) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); + __get_db()->__iterator_copy(this, &__i); __node_ = __i.__node_; } return *this; @@ -523,7 +523,7 @@ public: __bucket_(__i.__bucket_), __bucket_count_(__i.__bucket_count_) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); + __get_db()->__iterator_copy(this, &__i); } _LIBCPP_INLINE_VISIBILITY @@ -535,9 +535,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_local_iterator& operator=(const __hash_local_iterator& __i) { - if (this != _VSTD::addressof(__i)) + if (this != &__i) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); + __get_db()->__iterator_copy(this, &__i); __node_ = __i.__node_; __bucket_ = __i.__bucket_; __bucket_count_ = __i.__bucket_count_; @@ -655,7 +655,7 @@ public: __bucket_count_(__x.__bucket_count_) { #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); + __get_db()->__iterator_copy(this, &__x); #endif } @@ -666,7 +666,7 @@ public: __bucket_(__i.__bucket_), __bucket_count_(__i.__bucket_count_) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); + __get_db()->__iterator_copy(this, &__i); } _LIBCPP_INLINE_VISIBILITY @@ -678,9 +678,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator& operator=(const __hash_const_local_iterator& __i) { - if (this != _VSTD::addressof(__i)) + if (this != &__i) { - __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); + __get_db()->__iterator_copy(this, &__i); __node_ = __i.__node_; __bucket_ = __i.__bucket_; __bucket_count_ = __i.__bucket_count_; @@ -1615,7 +1615,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( __u.size() = 0; } #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, _VSTD::addressof(__u)); + __get_db()->swap(this, &__u); #endif } @@ -2021,7 +2021,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( const_iterator __p, __node_pointer __cp) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); if (__p != end() && key_eq()(*__p, __cp->__value_)) @@ -2148,7 +2148,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi( const_iterator __p, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); __node_holder __h = __construct_node(_VSTD::forward<_Args>(__args)...); @@ -2472,7 +2472,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { __next_pointer __np = __p.__node_; - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container erase(iterator) called with an iterator not" " referring to this container"); _LIBCPP_DEBUG_ASSERT(__p != end(), @@ -2492,10 +2492,10 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__first)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, "unordered container::erase(iterator, iterator) called with an iterator not" " referring to this container"); - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__last)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this, "unordered container::erase(iterator, iterator) called with an iterator not" " referring to this container"); for (const_iterator __p = __first; __first != __last; __p = __first) @@ -2727,7 +2727,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] = __u.__p1_.first().__ptr(); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, _VSTD::addressof(__u)); + __get_db()->swap(this, &__u); #endif } diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index accab28a99592..73edadab20990 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -519,7 +519,6 @@ template #include <__functional/is_transparent.h> #include <__hash_table> #include <__iterator/iterator_traits.h> -#include <__memory/addressof.h> #include <__node_handle> #include <__utility/forward.h> #include @@ -1187,7 +1186,7 @@ public: {return __table_.__insert_unique(__x);} iterator insert(const_iterator __p, const value_type& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not " "referring to this unordered_map"); ((void)__p); @@ -1208,7 +1207,7 @@ public: {return __table_.__insert_unique(_VSTD::move(__x));} iterator insert(const_iterator __p, value_type&& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1226,7 +1225,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __p, _Pp&& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::insert(const_iterator, value_type&&) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1242,7 +1241,7 @@ public: template _LIBCPP_INLINE_VISIBILITY iterator emplace_hint(const_iterator __p, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered_map::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1274,7 +1273,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); ((void)__h); @@ -1285,7 +1284,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__h) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); ((void)__h); @@ -1693,7 +1692,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, _VSTD::addressof(__u)); + __get_db()->swap(this, &__u); #endif } @@ -1713,7 +1712,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, _VSTD::addressof(__u)); + __get_db()->swap(this, &__u); #endif } @@ -2469,7 +2468,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, _VSTD::addressof(__u)); + __get_db()->swap(this, &__u); #endif } @@ -2490,7 +2489,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, _VSTD::addressof(__u)); + __get_db()->swap(this, &__u); #endif } diff --git a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp deleted file mode 100644 index 856b78293a107..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// Validate the constructors of the (const)(_local)_iterator classes to be -// properly guarded against ADL-hijacking operator&. - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -template -void test() { - FromIterator from; - ToIterator copy(from); - copy = from; - - ToIterator move(std::move(from)); - from = FromIterator(); - move = std::move(from); -} - -void test() { - { - using I = std::unordered_map::iterator; - using CI = std::unordered_map::const_iterator; - test(); - test(); - test(); - } - { - using IL = std::unordered_map::local_iterator; - using CIL = std::unordered_map::const_local_iterator; - test(); - test(); - test(); - } -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp deleted file mode 100644 index 6dbd7aaea2a8e..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp +++ /dev/null @@ -1,42 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// unordered_map& operator=(unordered_map&&) -// noexcept( -// allocator_type::propagate_on_container_move_assignment::value && -// is_nothrow_move_assignable::value && -// is_nothrow_move_assignable::value && -// is_nothrow_move_assignable::value); - -// Validate whether the container can be move-assigned with an ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - { - std::unordered_map mo; - std::unordered_map m; - m = std::move(mo); - } - { - std::unordered_map mo; - std::unordered_map m; - m = std::move(mo); - } -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp deleted file mode 100644 index e36c6525d631b..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// unordered_map(unordered_map&& u) -// noexcept( -// is_nothrow_move_constructible::value && -// is_nothrow_move_constructible::value && -// is_nothrow_move_constructible::value); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_map mo; - std::unordered_map m(std::move(mo)); -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp deleted file mode 100644 index 1fec0ee5d0f4b..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// unordered_map(unordered_map&& u, const allocator_type& a); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -#include "test_allocator.h" -#include "min_allocator.h" - -void test() { - using A = test_allocator>; - using C = std::unordered_map, - std::equal_to, A>; - - C mo; - C m(std::move(mo), A()); -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp deleted file mode 100644 index 58ddefd8cfbfc..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// template -// iterator emplace_hint(const_iterator position, Args&&... args); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_map m; - m.emplace_hint(m.cbegin()); -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp deleted file mode 100644 index 1461f2499baad..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// iterator erase(const_iterator p) - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_map m; - m.erase(m.cbegin()); -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp deleted file mode 100644 index 5f342f7b2152f..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// iterator erase(const_iterator first, const_iterator last) - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_map m; - m.erase(m.cbegin(), m.cend()); -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp deleted file mode 100644 index db1805e7d7e63..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// iterator insert(const_iterator p, const value_type& x); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_map m; - const std::pair v; - m.insert(m.cend(), v); -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp deleted file mode 100644 index 530b826b61e78..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// template ::value>::type> -// pair insert(P&& x); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test(std::unordered_map& m) { m.insert(m.cend(), *m.begin()); } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp deleted file mode 100644 index 80219cb193edd..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// iterator insert(const_iterator hint, value_type&& obj); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test(std::unordered_map& m) { - m.insert(m.cend(), std::pair{}); -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp deleted file mode 100644 index 2c667374d4fe8..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp +++ /dev/null @@ -1,40 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_map - -// template -// iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args); -// template -// iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args); -// template - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_map m; - { - const operator_hijacker k; - m.try_emplace(m.cend(), k); - } - { - operator_hijacker k; - m.try_emplace(m.cend(), std::move(k)); - } -} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp deleted file mode 100644 index f5b5f516d42b5..0000000000000 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// void swap(unordered_map& c) -// noexcept(allocator_traits::is_always_equal::value && -// noexcept(swap(declval(), declval())) && -// noexcept(swap(declval(), declval()))); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_map m1; - std::unordered_map m2; - std::swap(m1, m2); -} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp deleted file mode 100644 index 73b19f35e2048..0000000000000 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_multimap - -// unordered_multimap(unordered_multimap&&) -// noexcept( -// is_nothrow_move_constructible::value && -// is_nothrow_move_constructible::value && -// is_nothrow_move_constructible::value); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_multimap mo; - std::unordered_multimap m(std::move(mo)); -} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp deleted file mode 100644 index 6419a03666d65..0000000000000 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_multimap - -// unordered_multimap(unordered_map&& u, const allocator_type& a); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -#include "test_allocator.h" -#include "min_allocator.h" - -void test() { - using A = test_allocator>; - using C = std::unordered_multimap, - std::equal_to, A>; - - C mo; - C m(std::move(mo), A()); -} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp deleted file mode 100644 index 5e23b73cf34b3..0000000000000 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03 - -// - -// template , class Pred = equal_to, -// class Alloc = allocator>> -// class unordered_multimap - -// template -// iterator emplace_hint(const_iterator position, Args&&... args); - -// Validate whether the operation properly guards against ADL-hijacking operator& - -#include - -#include "test_macros.h" -#include "operator_hijacker.h" - -void test() { - std::unordered_multimap m; - m.emplace_hint(m.cbegin()); -} From 23a7bb541dae47e691be5380e96ca63f04d6d194 Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Fri, 21 Jan 2022 18:17:55 +0100 Subject: [PATCH 196/946] [clang-format] Fix comment in spaceRequiredBefore. NFC. --- clang/lib/Format/TokenAnnotator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 7fe0d319e5703..3ba81dfed38c2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3323,7 +3323,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // or import .....; if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis)) return true; - // No space between module :. + // Space between `module :` and `import :`. if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) && Right.is(TT_ModulePartitionColon)) return true; From 5659638418808697061f59b78e2f93fc15f2d7cd Mon Sep 17 00:00:00 2001 From: David Tenty Date: Fri, 21 Jan 2022 12:20:28 -0500 Subject: [PATCH 197/946] Revert "[compiler-rt][cmake] Use HandleOutOfTreeLLVM like libcxx and friends" This reverts commit 8c9f62ea90c70d538766a81ef5980c9223b8566b, which is causing build failures on the bots because it inadvertently changes the output directory of the compiler-rt libs when built as a runtime. Differential Revision: https://reviews.llvm.org/D117815 --- compiler-rt/CMakeLists.txt | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 974e2333c7abd..12946d74c797b 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -5,6 +5,13 @@ cmake_minimum_required(VERSION 3.13.4) +# Check if compiler-rt is built as a standalone project. +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD) + project(CompilerRT C CXX ASM) + set(COMPILER_RT_STANDALONE_BUILD TRUE) + set_property(GLOBAL PROPERTY USE_FOLDERS ON) +endif() + set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") # Add path for custom compiler-rt modules. @@ -15,16 +22,6 @@ list(INSERT CMAKE_MODULE_PATH 0 "${LLVM_COMMON_CMAKE_UTILS}/Modules" ) -# Check if compiler-rt is built as a standalone project. -if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD) - project(CompilerRT C CXX ASM) - set(COMPILER_RT_STANDALONE_BUILD TRUE) - set_property(GLOBAL PROPERTY USE_FOLDERS ON) - - # Find the LLVM sources and simulate LLVM CMake options. - include(HandleOutOfTreeLLVM) -endif() - if(CMAKE_CONFIGURATION_TYPES) set(CMAKE_CFG_RESOLVED_INTDIR "${CMAKE_CFG_INTDIR}/") else() From 26cbc430197a3432075c7c5dfec41765f92b97ed Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Fri, 21 Jan 2022 18:30:34 +0100 Subject: [PATCH 198/946] [flang] Remove target and require shell Fix failure from 68db0e25df4b1edaa2c6080eb88453ab01ea01d3 on arm buildbot. --- flang/test/Fir/basic-program.fir | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir index 02463bef99496..b417a6148d39b 100644 --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -1,4 +1,5 @@ -// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s +// RUN: tco %s | FileCheck %s +// REQUIRES: shell // Check that tco is working with a basic test. From 10e5c513b59bff0a049cc2a24bb7d41cd874ad7a Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Fri, 21 Jan 2022 09:53:14 -0800 Subject: [PATCH 199/946] Revert "[cmake] Duplicate `{llvm,compiler_rt}_check_linker_flag` for runtime libs and llvm" This reverts commit 4af11272f57a4a6fed2932e9e0857b2c1a707c51. --- cmake/Modules/CheckLinkerFlag.cmake | 17 +++++++++++++++++ compiler-rt/cmake/config-ix.cmake | 20 +++++++++++++------- libcxx/cmake/config-ix.cmake | 4 ++-- libunwind/cmake/config-ix.cmake | 8 ++++---- llvm/cmake/modules/LLVMCheckLinkerFlag.cmake | 12 ++---------- runtimes/CMakeLists.txt | 6 +++--- 6 files changed, 41 insertions(+), 26 deletions(-) create mode 100644 cmake/Modules/CheckLinkerFlag.cmake diff --git a/cmake/Modules/CheckLinkerFlag.cmake b/cmake/Modules/CheckLinkerFlag.cmake new file mode 100644 index 0000000000000..722fe5b1b8ead --- /dev/null +++ b/cmake/Modules/CheckLinkerFlag.cmake @@ -0,0 +1,17 @@ +include(CMakePushCheckState) +include(CheckCCompilerFlag) + +function(llvm_check_linker_flag flag dest) + # If testing a flag with check_c_compiler_flag, it gets added to the compile + # command only, but not to the linker command in that test. If the flag + # is vital for linking to succeed, the test would fail even if it would + # have succeeded if it was included on both commands. + # + # Therefore, try adding the flag to CMAKE_REQUIRED_FLAGS, which gets + # added to both compiling and linking commands in the tests. + + cmake_push_check_state() + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}") + check_c_compiler_flag("" ${dest}) + cmake_pop_check_state() +endfunction() diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 596f61e8c82ec..33693ce60321d 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -1,5 +1,4 @@ include(CMakePushCheckState) -include(LLVMCheckLinkerFlag) # Compat until CMake 3.18 include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) include(CheckIncludeFiles) @@ -7,6 +6,13 @@ include(CheckLibraryExists) include(CheckSymbolExists) include(TestBigEndian) +function(compiler_rt_check_linker_flag flag out_var) + cmake_push_check_state() + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}") + check_cxx_compiler_flag("" ${out_var}) + cmake_pop_check_state() +endfunction() + check_library_exists(c fopen "" COMPILER_RT_HAS_LIBC) if (COMPILER_RT_USE_BUILTINS_LIBRARY) include(HandleCompilerRT) @@ -165,12 +171,12 @@ check_library_exists(c++ __cxa_throw "" COMPILER_RT_HAS_LIBCXX) check_library_exists(stdc++ __cxa_throw "" COMPILER_RT_HAS_LIBSTDCXX) # Linker flags. -llvm_check_linker_flag(CXX "-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT) -llvm_check_linker_flag(CXX "-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG) +compiler_rt_check_linker_flag("-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT) +compiler_rt_check_linker_flag("-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG) if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") set(VERS_COMPAT_OPTION "-Wl,-z,gnu-version-script-compat") - llvm_check_linker_flag(CXX "${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT) + compiler_rt_check_linker_flag("${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT) endif() set(DUMMY_VERS ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/dummy.vers) @@ -181,10 +187,10 @@ if(COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT) # -z gnu-version-script-compat. string(APPEND VERS_OPTION " ${VERS_COMPAT_OPTION}") endif() -llvm_check_linker_flag(CXX "${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT) +compiler_rt_check_linker_flag("${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT) if(ANDROID) - llvm_check_linker_flag(CXX "-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL) + compiler_rt_check_linker_flag("-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL) check_library_exists(log __android_log_write "" COMPILER_RT_HAS_LIBLOG) endif() @@ -430,7 +436,7 @@ if(APPLE) -lc++ -lc++abi) - llvm_check_linker_flag(CXX "-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION) + compiler_rt_check_linker_flag("-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION) if(COMPILER_RT_HAS_APP_EXTENSION) list(APPEND DARWIN_COMMON_LINK_FLAGS "-fapplication-extension") endif() diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake index e114337f081a3..689a9d09c0179 100644 --- a/libcxx/cmake/config-ix.cmake +++ b/libcxx/cmake/config-ix.cmake @@ -1,6 +1,6 @@ include(CMakePushCheckState) include(CheckLibraryExists) -include(LLVMCheckLinkerFlag) # Compat until CMake 3.18 +include(CheckLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) include(CheckCSourceCompiles) @@ -12,7 +12,7 @@ include(CheckCSourceCompiles) # libunwind (and the compiler implicit -lunwind wouldn't succeed as the newly # built libunwind isn't installed yet). For those cases, it'd be good to # link with --uwnindlib=none. Check if that option works. -llvm_check_linker_flag(C "--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG) +llvm_check_linker_flag("--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG) if (LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none") endif() diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake index c1e1b4631abfe..c814611d43785 100644 --- a/libunwind/cmake/config-ix.cmake +++ b/libunwind/cmake/config-ix.cmake @@ -2,14 +2,14 @@ include(CMakePushCheckState) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) include(CheckLibraryExists) -include(LLVMCheckLinkerFlag) # Compat until CMake 3.18 +include(CheckLinkerFlag) include(CheckSymbolExists) include(CheckCSourceCompiles) # The compiler driver may be implicitly trying to link against libunwind, which # might not work if libunwind doesn't exist yet. Try to check if # --unwindlib=none is supported, and use that if possible. -llvm_check_linker_flag(C "--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG) +llvm_check_linker_flag("--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG) if (LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none") endif() @@ -34,11 +34,11 @@ endif() # required for the link to go through. We remove sanitizers from the # configuration checks to avoid spurious link errors. -llvm_check_linker_flag(C "-nostdlib++" LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG) +llvm_check_linker_flag(-nostdlib++ LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG) if (LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++") else() - llvm_check_linker_flag(C "-nodefaultlibs" LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG) + llvm_check_linker_flag(-nodefaultlibs LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG) if (LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs") endif() diff --git a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake index 79c4e2cb4c2cd..253dd768654a2 100644 --- a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake +++ b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake @@ -5,22 +5,14 @@ if (COMMAND check_linker_flag) check_linker_flag(${ARGN}) endmacro() else() - # Until the minimum CMAKE version is 3.18 - include(CheckCXXCompilerFlag) include(CMakePushCheckState) - # cmake builtin compatible, except we assume lang is C or CXX + # cmake builtin compatible, except we assume lang is CXX function(llvm_check_linker_flag lang flag out_var) cmake_push_check_state() set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${flag}") - if("${lang}" STREQUAL "C") - check_c_compiler_flag("" ${out_var}) - elseif("${lang}" STREQUAL "CXX") - check_cxx_compiler_flag("" ${out_var}) - else() - message(FATAL_ERROR "\"${lang}\" is not C or CXX") - endif() + check_cxx_compiler_flag("" ${out_var}) cmake_pop_check_state() endfunction() endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 1a50d9e8c98b3..cedce7b3541e5 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -88,7 +88,7 @@ set(LLVM_CMAKE_DIR ${LLVM_MAIN_SRC_DIR}/cmake/modules) set(LLVM_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../llvm) include(CheckLibraryExists) -include(LLVMCheckLinkerFlag) # Compat until CMake 3.18 +include(CheckLinkerFlag) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) @@ -100,7 +100,7 @@ if (NOT LLVM_RUNTIMES_LINKING_WORKS) # --unwindlib=none is supported, and use that if possible. # Don't add this if not necessary to fix linking, as it can break using # e.g. ASAN/TSAN. - llvm_check_linker_flag(C "--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG) + llvm_check_linker_flag("--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG) if (LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none") endif() @@ -110,7 +110,7 @@ endif() # Check for -nostdlib++ first; if there's no C++ standard library yet, # all check_cxx_compiler_flag commands will fail until we add -nostdlib++ # (or -nodefaultlibs). -llvm_check_linker_flag(C "-nostdlib++" LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG) +llvm_check_linker_flag(-nostdlib++ LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG) if (LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++") endif() From 5061eb6b0121af11a784d92e2dee5996858d04cd Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Fri, 21 Jan 2022 09:57:17 -0800 Subject: [PATCH 200/946] [Sparc] Don't define __sparcv9 and __sparcv9__ when targeting V8+ Currently, clang defines the three macros __sparcv9, __sparcv9__ and __sparc_v9__ when targeting the V8+ baseline, i.e. using the V9 instruction set on a 32-bit target. Since neither gcc nor SolarisStudio define __sparcv9 and __sparcv9__ when targeting V8+, some existing code such as the glibc breaks when defining either of these two macros on a 32-bit target as they are used to detect a 64-bit target. Update the tests accordingly. Fixes PR49562. Reviewed By: jrtc27, MaskRay, hvdijk Differential Revision: https://reviews.llvm.org/D98574 --- clang/lib/Basic/Targets/Sparc.cpp | 2 -- clang/test/Preprocessor/predefined-arch-macros.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/Basic/Targets/Sparc.cpp b/clang/lib/Basic/Targets/Sparc.cpp index 5eeb77406c342..9321024348014 100644 --- a/clang/lib/Basic/Targets/Sparc.cpp +++ b/clang/lib/Basic/Targets/Sparc.cpp @@ -156,8 +156,6 @@ void SparcV8TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__sparcv8__"); break; case CG_V9: - Builder.defineMacro("__sparcv9"); - Builder.defineMacro("__sparcv9__"); Builder.defineMacro("__sparc_v9__"); break; } diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 757008005ebaf..f0604de684fbe 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -3479,8 +3479,8 @@ // CHECK_SPARC-V9-NOT: #define __sparcv8 1 // CHECK_SPARC-V9-NOT: #define __sparcv8__ 1 // CHECK_SPARC-V9: #define __sparc_v9__ 1 -// CHECK_SPARC-V9: #define __sparcv9 1 -// CHECK_SPARC-V9: #define __sparcv9__ 1 +// CHECK_SPARC-V9-NOT: #define __sparcv9 1 +// CHECK_SPARC-V9-NOT: #define __sparcv9__ 1 // RUN: %clang -E -dM %s -o - 2>&1 \ // RUN: -target sparc-sun-solaris \ From e6ceec9c1d190cdd465548161df6c8ebbb327739 Mon Sep 17 00:00:00 2001 From: eopXD Date: Thu, 20 Jan 2022 10:16:00 -0800 Subject: [PATCH 201/946] [Clang][RISCV] Restrict rvv builtins with zve macros The `zve` extension specifies the maximum ELEN for both integer and floating point mode - defined by macro `__riscv_v_elen` and `__riscv_v_elen_fp`. This commit restricts the functions in riscv_vector.h by the zve defined macro-s. Change enum `RISCVExtension` to `RISCVPredefinedMacro` since now it contains not only extensions. Also added type alignment to it. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D112986 --- clang/utils/TableGen/RISCVVEmitter.cpp | 72 +++++++++++++++----------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index 84da6a5901a43..ea2d0b8d2f2f8 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -100,6 +100,9 @@ class RVVType { bool isValid() const { return Valid; } bool isScalar() const { return Scale.hasValue() && Scale.getValue() == 0; } bool isVector() const { return Scale.hasValue() && Scale.getValue() != 0; } + bool isVector(unsigned Width) const { + return isVector() && ElementBitwidth == Width; + } bool isFloat() const { return ScalarType == ScalarTypeKind::Float; } bool isSignedInteger() const { return ScalarType == ScalarTypeKind::SignedInteger; @@ -134,13 +137,15 @@ class RVVType { using RVVTypePtr = RVVType *; using RVVTypes = std::vector; +using RISCVPredefinedMacroT = uint8_t; -enum RISCVExtension : uint8_t { +enum RISCVPredefinedMacro : RISCVPredefinedMacroT { Basic = 0, - F = 1 << 1, - D = 1 << 2, - Zfh = 1 << 3, - RV64 = 1 << 4, + Zfh = 1 << 1, + RV64 = 1 << 2, + VectorMaxELen64 = 1 << 3, + VectorMaxELenFp32 = 1 << 4, + VectorMaxELenFp64 = 1 << 5, }; // TODO refactor RVVIntrinsic class design after support all intrinsic @@ -164,7 +169,7 @@ class RVVIntrinsic { // The types we use to obtain the specific LLVM intrinsic. They are index of // InputTypes. -1 means the return type. std::vector IntrinsicTypes; - uint8_t RISCVExtensions = 0; + RISCVPredefinedMacroT RISCVPredefinedMacros = 0; unsigned NF = 1; public: @@ -188,7 +193,9 @@ class RVVIntrinsic { bool isMask() const { return IsMask; } StringRef getIRName() const { return IRName; } StringRef getManualCodegen() const { return ManualCodegen; } - uint8_t getRISCVExtensions() const { return RISCVExtensions; } + RISCVPredefinedMacroT getRISCVPredefinedMacros() const { + return RISCVPredefinedMacros; + } unsigned getNF() const { return NF; } const std::vector &getIntrinsicTypes() const { return IntrinsicTypes; @@ -251,7 +258,8 @@ class RVVEmitter { // Emit the architecture preprocessor definitions. Return true when emits // non-empty string. - bool emitExtDefStr(uint8_t Extensions, raw_ostream &o); + bool emitMacroRestrictionStr(RISCVPredefinedMacroT PredefinedMacros, + raw_ostream &o); // Slice Prototypes string into sub prototype string and process each sub // prototype string individually in the Handler. void parsePrototypes(StringRef Prototypes, @@ -789,15 +797,17 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix, // Init RISC-V extensions for (const auto &T : OutInTypes) { if (T->isFloatVector(16) || T->isFloat(16)) - RISCVExtensions |= RISCVExtension::Zfh; - else if (T->isFloatVector(32) || T->isFloat(32)) - RISCVExtensions |= RISCVExtension::F; - else if (T->isFloatVector(64) || T->isFloat(64)) - RISCVExtensions |= RISCVExtension::D; + RISCVPredefinedMacros |= RISCVPredefinedMacro::Zfh; + if (T->isFloatVector(32)) + RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELenFp32; + if (T->isFloatVector(64)) + RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELenFp64; + if (T->isVector(64)) + RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELen64; } for (auto Extension : RequiredExtensions) { if (Extension == "RV64") - RISCVExtensions |= RISCVExtension::RV64; + RISCVPredefinedMacros |= RISCVPredefinedMacro::RV64; } // Init OutputType and InputTypes @@ -981,7 +991,7 @@ void RVVEmitter::createHeader(raw_ostream &OS) { // The same extension include in the same arch guard marco. llvm::stable_sort(Defs, [](const std::unique_ptr &A, const std::unique_ptr &B) { - return A->getRISCVExtensions() < B->getRISCVExtensions(); + return A->getRISCVPredefinedMacros() < B->getRISCVPredefinedMacros(); }); OS << "#define __rvv_ai static __inline__\n"; @@ -1280,15 +1290,16 @@ Optional RVVEmitter::computeType(BasicType BT, int Log2LMUL, void RVVEmitter::emitArchMacroAndBody( std::vector> &Defs, raw_ostream &OS, std::function PrintBody) { - uint8_t PrevExt = (*Defs.begin())->getRISCVExtensions(); - bool NeedEndif = emitExtDefStr(PrevExt, OS); + RISCVPredefinedMacroT PrevMacros = + (*Defs.begin())->getRISCVPredefinedMacros(); + bool NeedEndif = emitMacroRestrictionStr(PrevMacros, OS); for (auto &Def : Defs) { - uint8_t CurExt = Def->getRISCVExtensions(); - if (CurExt != PrevExt) { + RISCVPredefinedMacroT CurMacros = Def->getRISCVPredefinedMacros(); + if (CurMacros != PrevMacros) { if (NeedEndif) OS << "#endif\n\n"; - NeedEndif = emitExtDefStr(CurExt, OS); - PrevExt = CurExt; + NeedEndif = emitMacroRestrictionStr(CurMacros, OS); + PrevMacros = CurMacros; } if (Def->hasAutoDef()) PrintBody(OS, *Def); @@ -1297,19 +1308,22 @@ void RVVEmitter::emitArchMacroAndBody( OS << "#endif\n\n"; } -bool RVVEmitter::emitExtDefStr(uint8_t Extents, raw_ostream &OS) { - if (Extents == RISCVExtension::Basic) +bool RVVEmitter::emitMacroRestrictionStr(RISCVPredefinedMacroT PredefinedMacros, + raw_ostream &OS) { + if (PredefinedMacros == RISCVPredefinedMacro::Basic) return false; OS << "#if "; ListSeparator LS(" && "); - if (Extents & RISCVExtension::F) - OS << LS << "defined(__riscv_f)"; - if (Extents & RISCVExtension::D) - OS << LS << "defined(__riscv_d)"; - if (Extents & RISCVExtension::Zfh) + if (PredefinedMacros & RISCVPredefinedMacro::Zfh) OS << LS << "defined(__riscv_zfh)"; - if (Extents & RISCVExtension::RV64) + if (PredefinedMacros & RISCVPredefinedMacro::RV64) OS << LS << "(__riscv_xlen == 64)"; + if (PredefinedMacros & RISCVPredefinedMacro::VectorMaxELen64) + OS << LS << "(__riscv_v_elen >= 64)"; + if (PredefinedMacros & RISCVPredefinedMacro::VectorMaxELenFp32) + OS << LS << "(__riscv_v_elen_fp >= 32)"; + if (PredefinedMacros & RISCVPredefinedMacro::VectorMaxELenFp64) + OS << LS << "(__riscv_v_elen_fp >= 64)"; OS << "\n"; return true; } From 754d6af7c35983612241b9a077722f4471bbd683 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Wed, 5 Jan 2022 16:27:23 -0800 Subject: [PATCH 202/946] [NFC] Improve code reuse. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D116711 --- .../Instrumentation/HWAddressSanitizer.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 8d3bc1383e96d..fb10a99d13382 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1403,16 +1403,16 @@ bool HWAddressSanitizer::instrumentStack( size_t Size = getAllocaSizeInBytes(*AI); size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); + auto TagEnd = [&](Instruction *Node) { + IRB.SetInsertPoint(Node); + Value *UARTag = getUARTag(IRB, StackTag); + tagAlloca(IRB, AI, UARTag, AlignedSize); + }; bool StandardLifetime = UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT()); if (DetectUseAfterScope && StandardLifetime) { IntrinsicInst *Start = Info.LifetimeStart[0]; IRB.SetInsertPoint(Start->getNextNode()); - auto TagEnd = [&](Instruction *Node) { - IRB.SetInsertPoint(Node); - Value *UARTag = getUARTag(IRB, StackTag); - tagAlloca(IRB, AI, UARTag, AlignedSize); - }; tagAlloca(IRB, AI, Tag, Size); if (!forAllReachableExits(GetDT(), GetPDT(), Start, Info.LifetimeEnd, RetVec, TagEnd)) { @@ -1421,11 +1421,8 @@ bool HWAddressSanitizer::instrumentStack( } } else { tagAlloca(IRB, AI, Tag, Size); - for (auto *RI : RetVec) { - IRB.SetInsertPoint(RI); - Value *UARTag = getUARTag(IRB, StackTag); - tagAlloca(IRB, AI, UARTag, AlignedSize); - } + for (auto *RI : RetVec) + TagEnd(RI); if (!StandardLifetime) { for (auto &II : Info.LifetimeStart) II->eraseFromParent(); From 11754a4dbbad2c3be803d1a7366b861941550a6c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 21 Jan 2022 10:49:50 -0800 Subject: [PATCH 203/946] [RISCV] Use RVBUnary in more places to simplify some tablegen declarations. NFCI --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index aae646f02f13f..a8fffb2d7d792 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -597,27 +597,18 @@ def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd), // or gorci. Since Zbb is closer to being finalized than Zbp this will be // misleading to users. let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32] in { -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def REV8_RV32 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), - "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> { - let imm12 = { 0b01101, 0b0011000 }; -} +def REV8_RV32 : RVBUnary<0b0110100, 0b11000, 0b101, OPC_OP_IMM, "rev8">, + Sched<[WriteREV8, ReadREV8]>; } // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32] let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in { -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def REV8_RV64 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), - "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> { - let imm12 = { 0b01101, 0b0111000 }; -} +def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">, + Sched<[WriteREV8, ReadREV8]>; } // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] let Predicates = [HasStdExtZbbOrZbp] in { -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1), - "orc.b", "$rd, $rs1">, Sched<[WriteORCB, ReadORCB]> { - let imm12 = { 0b00101, 0b0000111 }; -} +def ORCB : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">, + Sched<[WriteORCB, ReadORCB]>; } // Predicates = [HasStdExtZbbOrZbp] let Predicates = [HasStdExtZbpOrZbkb] in From 4710750854cee1fdadf5f3381e9431655056b646 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Fri, 7 Jan 2022 17:19:41 -0500 Subject: [PATCH 204/946] [mlir][spirv] Support size-1 vector inserts during conversion Differential Revision: https://reviews.llvm.org/D115517 --- .../VectorToSPIRV/VectorToSPIRV.cpp | 22 ++++++++++++++----- .../test/Conversion/VectorToSPIRV/simple.mlir | 22 +++++++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp index 27037cb4b6f2a..051b691011d8a 100644 --- a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp +++ b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp @@ -157,6 +157,13 @@ struct VectorInsertOpConvert final LogicalResult matchAndRewrite(vector::InsertOp insertOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { + // Special case for inserting scalar values into size-1 vectors. + if (insertOp.getSourceType().isIntOrFloat() && + insertOp.getDestVectorType().getNumElements() == 1) { + rewriter.replaceOp(insertOp, adaptor.source()); + return success(); + } + if (insertOp.getSourceType().isa() || !spirv::CompositeType::isValid(insertOp.getDestVectorType())) return failure(); @@ -209,20 +216,23 @@ struct VectorInsertStridedSliceOpConvert final Value srcVector = adaptor.getOperands().front(); Value dstVector = adaptor.getOperands().back(); - // Insert scalar values not supported yet. - if (srcVector.getType().isa() || - dstVector.getType().isa()) - return failure(); - uint64_t stride = getFirstIntValue(insertOp.strides()); if (stride != 1) return failure(); + uint64_t offset = getFirstIntValue(insertOp.offsets()); + + if (srcVector.getType().isa()) { + assert(!dstVector.getType().isa()); + rewriter.replaceOpWithNewOp( + insertOp, dstVector.getType(), srcVector, dstVector, + rewriter.getI32ArrayAttr(offset)); + return success(); + } uint64_t totalSize = dstVector.getType().cast().getNumElements(); uint64_t insertSize = srcVector.getType().cast().getNumElements(); - uint64_t offset = getFirstIntValue(insertOp.offsets()); SmallVector indices(totalSize); std::iota(indices.begin(), indices.end(), 0); diff --git a/mlir/test/Conversion/VectorToSPIRV/simple.mlir b/mlir/test/Conversion/VectorToSPIRV/simple.mlir index 8f5cf197713d2..7a3e4b3289729 100644 --- a/mlir/test/Conversion/VectorToSPIRV/simple.mlir +++ b/mlir/test/Conversion/VectorToSPIRV/simple.mlir @@ -61,6 +61,17 @@ func @insert(%arg0 : vector<4xf32>, %arg1: f32) -> vector<4xf32> { // ----- +// CHECK-LABEL: @insert_size1_vector +// CHECK-SAME: %[[V:.*]]: vector<1xf32>, %[[S:.*]]: f32 +// CHECK: %[[R:.+]] = builtin.unrealized_conversion_cast %[[S]] +// CHECK: return %[[R]] +func @insert_size1_vector(%arg0 : vector<1xf32>, %arg1: f32) -> vector<1xf32> { + %1 = vector.insert %arg1, %arg0[0] : f32 into vector<1xf32> + return %1 : vector<1xf32> +} + +// ----- + // CHECK-LABEL: @extract_element // CHECK-SAME: %[[V:.*]]: vector<4xf32>, %[[ID:.*]]: i32 // CHECK: spv.VectorExtractDynamic %[[V]][%[[ID]]] : vector<4xf32>, i32 @@ -139,6 +150,17 @@ func @insert_strided_slice(%arg0: vector<2xf32>, %arg1: vector<4xf32>) -> vector // ----- +// CHECK-LABEL: @insert_size1_vector +// CHECK-SAME: %[[SUB:.*]]: vector<1xf32>, %[[FULL:.*]]: vector<3xf32> +// CHECK: %[[S:.+]] = builtin.unrealized_conversion_cast %[[SUB]] +// CHECK: spv.CompositeInsert %[[S]], %[[FULL]][2 : i32] : f32 into vector<3xf32> +func @insert_size1_vector(%arg0 : vector<1xf32>, %arg1: vector<3xf32>) -> vector<3xf32> { + %1 = vector.insert_strided_slice %arg0, %arg1 {offsets = [2], strides = [1]} : vector<1xf32> into vector<3xf32> + return %1 : vector<3xf32> +} + +// ----- + // CHECK-LABEL: @fma // CHECK-SAME: %[[A:.*]]: vector<4xf32>, %[[B:.*]]: vector<4xf32>, %[[C:.*]]: vector<4xf32> // CHECK: spv.GLSL.Fma %[[A]], %[[B]], %[[C]] : vector<4xf32> From fd0c6f53913f272ddd88948644fae36e63db120c Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Fri, 21 Jan 2022 19:29:08 +0100 Subject: [PATCH 205/946] [mlir] Move linalg::PadTensorOp to tensor::PadOp. RFC: https://llvm.discourse.group/t/rfc-move-linalg-padtensorop-to-tensor-padop/5785 Differential Revision: https://reviews.llvm.org/D117892 --- .../mlir/Dialect/Linalg/IR/LinalgOps.td | 202 ------- .../Dialect/Linalg/Transforms/HoistPadding.h | 11 +- .../Dialect/Linalg/Transforms/Transforms.h | 36 +- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 6 +- mlir/include/mlir/Dialect/Tensor/IR/Tensor.h | 1 + .../mlir/Dialect/Tensor/IR/TensorOps.td | 190 +++++- .../Tensor/IR/TensorTilingInterfaceImpl.h | 36 ++ .../include/mlir/Dialect/Tensor/Utils/Utils.h | 34 ++ mlir/include/mlir/InitAllDialects.h | 2 + .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 3 +- .../TosaToLinalg/TosaToLinalgNamed.cpp | 7 +- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 565 ------------------ .../Dialect/Linalg/Transforms/Bufferize.cpp | 4 +- .../Transforms/ComprehensiveBufferizePass.cpp | 2 +- .../Linalg/Transforms/HoistPadding.cpp | 27 +- .../Transforms/LinalgStrategyPasses.cpp | 6 +- .../Linalg/Transforms/PadOpInterchange.cpp | 13 +- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 47 +- .../Dialect/Linalg/Transforms/Transforms.cpp | 48 +- .../Linalg/Transforms/Vectorization.cpp | 75 ++- mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt | 1 + mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 19 +- mlir/lib/Dialect/Tensor/CMakeLists.txt | 1 + mlir/lib/Dialect/Tensor/IR/CMakeLists.txt | 18 + .../IR/TensorInferTypeOpInterfaceImpl.cpp | 43 ++ mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 253 ++++++++ .../Tensor/IR/TensorTilingInterfaceImpl.cpp | 279 +++++++++ mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt | 12 + mlir/lib/Dialect/Tensor/Utils/Utils.cpp | 54 ++ .../TosaToLinalg/tosa-to-linalg-named.mlir | 20 +- .../TosaToLinalg/tosa-to-linalg.mlir | 16 +- mlir/test/Dialect/Linalg/bufferize.mlir | 4 +- mlir/test/Dialect/Linalg/canonicalize.mlir | 204 +------ .../test/Dialect/Linalg/codegen-strategy.mlir | 2 +- .../Dialect/Linalg/generalize-pad-tensor.mlir | 8 +- mlir/test/Dialect/Linalg/hoist-padding.mlir | 80 +-- mlir/test/Dialect/Linalg/invalid.mlir | 65 -- .../test/Dialect/Linalg/lower-pad-tensor.mlir | 12 +- mlir/test/Dialect/Linalg/pad.mlir | 58 +- mlir/test/Dialect/Linalg/pad_fusion.mlir | 8 +- .../resolve-shaped-type-result-dims.mlir | 4 +- mlir/test/Dialect/Linalg/roundtrip.mlir | 71 --- .../Linalg/subtensor-of-padtensor.mlir | 80 +-- .../Dialect/Linalg/tile-and-fuse-tensors.mlir | 6 +- .../Dialect/Linalg/tile-pad-tensor-op.mlir | 22 +- mlir/test/Dialect/Linalg/vectorization.mlir | 52 +- mlir/test/Dialect/Tensor/canonicalize.mlir | 196 ++++++ mlir/test/Dialect/Tensor/invalid.mlir | 55 ++ mlir/test/Dialect/Tensor/ops.mlir | 74 +++ .../CPU/test-comprehensive-bufferize.mlir | 8 +- .../Dialect/Linalg/CPU/test-padtensor.mlir | 4 +- .../Dialect/Linalg/TestLinalgTransforms.cpp | 7 +- .../llvm-project-overlay/mlir/BUILD.bazel | 38 +- 53 files changed, 1639 insertions(+), 1450 deletions(-) create mode 100644 mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h create mode 100644 mlir/include/mlir/Dialect/Tensor/Utils/Utils.h create mode 100644 mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp create mode 100644 mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt create mode 100644 mlir/lib/Dialect/Tensor/Utils/Utils.cpp diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td index 4b8ae4985ca53..4150dee567fc7 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -18,7 +18,6 @@ include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" -include "mlir/Interfaces/TilingInterface.td" include "mlir/Interfaces/ViewLikeInterface.td" // Base class for Linalg dialect ops that do not correspond to library calls. @@ -130,207 +129,6 @@ def Linalg_InitTensorOp : Linalg_Op<"init_tensor", let hasCanonicalizer = 1; } -def Linalg_PadTensorOp : Linalg_Op<"pad_tensor", - [AttrSizedOperandSegments, NoSideEffect, - DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods]> { - let summary = "tensor pad operation"; - let description = [{ - `linalg.pad_tensor` is an operation that pads the `source` tensor - with given `low` and `high` padding config. - - The PadTensor operation supports the following arguments: - - * source: the "base" tensor on which to pad. - * low: A list contains the padding along the start of each - dimension, i.e `low`. - * high: A list contains the padding along the end of each - dimension, i.e. `high`. - * nofold: indicates that the operation should not be folded when source and - result types are equal. - - The result tensor dimensions are `low` + `dim` + `high` along that - dimension. The number of elements of `low` and `high` must match - the rank of the input tensor. They can be either a constant or a - dynamic value. - - The region of the `pad_tensor` operation returns the value to use - for the padding. The arguments of the region represent the index - of the source being accessed. There should be as many arguments as - the rank of the `source` tensor. The value `yield`-ed by the - region is used as the value of the view at the given position. - - If `nofold` is set, the padding operation will not be folded away even - if the source type and the padded type have the same static shape. This can - be used, e.g., for packing or promotion to faster memory. - - Example 1: - - ```mlir - %pad_value = ... : f32 - %0 = linalg.pad_tensor %0 low[1, 2] high[2, 3] { - ^bb0(%arg0 : index, %arg1 : index): - linalg.yield %pad_value : f32 - } : tensor to tensor - ``` - - Example 2: - - ```mlir - %pad_value = ... : f32 - %0 = linalg.pad_tensor %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] { - ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): - linalg.yield %pad_value : f32 - } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - ``` - - Example 3: - - ```mlir - %pad_value = ... : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor<2x3xf32> to tensor - ``` - - Example 4: - - ```mlir - // Force a padded value to be always exist with `nofold`. - %pad_value = ... : f32 - %0 = linalg.pad_tensor %arg0 nofold low[0, 0] high[0, 0] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor<2x3xf32> to tensor<2x3xf32> - ``` - }]; - - let arguments = (ins - AnyTensor:$source, - Variadic:$low, - Variadic:$high, - I64ArrayAttr:$static_low, - I64ArrayAttr:$static_high, - UnitAttr:$nofold); - - let regions = (region SizedRegion<1>:$region); - - let results = (outs AnyTensor:$result); - - // TODO: Remove custom when AllTypesMatch supports opt. operands. - let assemblyFormat = [{ - $source - (`nofold` $nofold^)? - `low` `` custom($low, $static_low) - `high` `` custom($high, $static_high) - $region attr-dict `:` type($source) `to` type($result) - }]; - - let extraClassDeclaration = [{ - static StringRef getStaticLowAttrName() { - return "static_low"; - } - - static StringRef getStaticHighAttrName() { - return "static_high"; - } - - RankedTensorType getSourceType() { - return source().getType().cast(); - } - RankedTensorType getResultType() { - return getResult().getType().cast(); - } - - // Infer the shape of the result tensor given the type of the source tensor - // and paddings. Known result dimensions that cannot necessarily be inferred - // from low/high padding sizes can be optionally specified. Those will be - // considered when computing the result type. - static RankedTensorType inferResultType( - RankedTensorType sourceType, - ArrayRef staticLow, - ArrayRef staticHigh, - ArrayRef resultShape = {}); - - // Return a PadTensorOp that pads `source` to `type` size where the static - // sizes are assumed to be greater than the dynamic sizes. The op performs - // "high" padding (i.e. it adds trailing padding values until the desired - // size is met). - static linalg::PadTensorOp createPadHighOp( - Type type, Value source, Value pad, bool nofold, Location loc, - OpBuilder & builder); - - // Return a PadTensorOp that pads `source to `type` size with `pad` value. - // I.e., a block will be created and the `pad` value will be yielded - // directly. If the type passed is nullptr, it is inferred. - static linalg::PadTensorOp createPadScalarOp( - Type type, Value source, Value pad, ArrayRef low, - ArrayRef high, bool nofold, Location loc, - OpBuilder & builder); - - // Return the pad value if it is a constant. Return null value otherwise. - Value getConstantPaddingValue(); - - // Return a vector of all the static or dynamic values (low/high padding) of - // the op. - inline SmallVector getMixedPadImpl(ArrayAttr staticAttrs, - ValueRange values) { - SmallVector res; - unsigned numDynamic = 0; - unsigned count = staticAttrs.size(); - for (unsigned idx = 0; idx < count; ++idx) { - if (ShapedType::isDynamic(staticAttrs[idx].cast().getInt())) - res.push_back(values[numDynamic++]); - else - res.push_back(staticAttrs[idx]); - } - return res; - } - SmallVector getMixedLowPad() { - return getMixedPadImpl(static_low(), low()); - } - SmallVector getMixedHighPad() { - return getMixedPadImpl(static_high(), high()); - } - // Return true if low padding is guaranteed to be 0. - bool hasZeroLowPad() { - return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) { - return getConstantIntValue(ofr) == static_cast(0); - }); - } - // Return true if high padding is guaranteed to be 0. - bool hasZeroHighPad() { - return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) { - return getConstantIntValue(ofr) == static_cast(0); - }); - } - }]; - - let builders = [ - // Build a PadTensorOp with mixed static and dynamic entries. - OpBuilder<(ins "Value":$source, "ArrayRef":$staticLow, - "ArrayRef":$staticHigh, "ValueRange":$low, "ValueRange":$high, - CArg<"bool", "false">:$nofold, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a PadTensorOp with all dynamic entries. - OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high, - CArg<"bool", "false">:$nofold, - CArg<"ArrayRef", "{}">:$attrs)>, - // Build a PadTensorOp with mixed static and dynamic entries and custom - // result type. If the type passed is nullptr, it is inferred. - OpBuilder<(ins "Type":$resultType, "Value":$source, - "ArrayRef":$low, "ArrayRef":$high, - CArg<"bool", "false">:$nofold, - CArg<"ArrayRef", "{}">:$attrs)>, - ]; - - let hasCanonicalizer = 1; - let hasFolder = 1; -} - def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, ReturnLike, Terminator]>, Arguments<(ins Variadic:$values)> { let summary = "Linalg yield operation"; diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h index 90e78ca0e274e..8d3315d5ea971 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h @@ -1,4 +1,4 @@ -//===- HoistPadding.h - Hoisting transformation for PadTensorOp -*- C++ -*-===// +//===- HoistPadding.h - Hoisting for tensor::PadOp -*- C++ --------------*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,8 +14,11 @@ namespace mlir { class Value; +namespace tensor { +class PadOp; +} // namespace tensor + namespace linalg { -class PadTensorOp; /// Mechanically hoist padding operations on tensors by `numLoops` into a new, /// generally larger tensor. This achieves packing of multiple padding ops into @@ -59,8 +62,8 @@ class PadTensorOp; /// } /// } /// ``` -FailureOr hoistPaddingOnTensors(PadTensorOp opToHoist, int numLoops, - PadTensorOp &hoistedOp); +FailureOr hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops, + tensor::PadOp &hoistedOp); } // namespace linalg } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index f52276e8e9e69..f5e99d5afe83e 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1132,18 +1132,18 @@ void populateLinalgDistributeTiledLoopPattern( // Op-specific patterns. //===----------------------------------------------------------------------===// -/// PadTensorOp is not canonicalized away yet, so we provide a transformation to -/// `linalg.generic`. -struct PadTensorOpTransformationPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +/// tensor::PadOp is not canonicalized away yet, so we provide a transformation +/// to `linalg.generic`. +struct PadOpTransformationPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(PadTensorOp padOp, + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override; }; /// Pad the operands of `opToPad` to a static bounding box. Use `paddingFunc` /// and `nofoldFunc` to set the padding value and the nofold attribute of the -/// introduced PadTensorOps, respectively. Update `paddedOp` to the cloned +/// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned /// statically shaped operation and return the extracted dynamically shaped /// results. If padding fails, return failure. FailureOr> @@ -1153,23 +1153,23 @@ rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, LinalgOp &paddedOp); using OptimizeCopyFn = - std::function; + std::function; -/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and +/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and /// InsertSliceOp. For now, only constant padding values are supported. /// `OptimizeCopyFn` can be used to customize copying step optimization. -struct GeneralizePadTensorOpPattern : public OpRewritePattern { - GeneralizePadTensorOpPattern(MLIRContext *context, - OptimizeCopyFn optimizeCopyFn = nullptr, - PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), +struct GeneralizePadOpPattern : public OpRewritePattern { + GeneralizePadOpPattern(MLIRContext *context, + OptimizeCopyFn optimizeCopyFn = nullptr, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), optimizeCopyFn(std::move(optimizeCopyFn)) {} - LogicalResult matchAndRewrite(PadTensorOp padOp, + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override; protected: OptimizeCopyFn optimizeCopyFn; - Value createFillOrGenerateOp(PatternRewriter &rewriter, PadTensorOp padOp, + Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp, Value dest, const SmallVector &dynSizes) const; }; @@ -1179,9 +1179,9 @@ struct GeneralizePadTensorOpPattern : public OpRewritePattern { /// are used to encode a certain ordering of pattern application. To avoid /// scattering magic constants throughout the code base, the patterns must be /// added with this function. `baseBenefit` can be used to offset the benefit -/// of all PadTensorOp vectorization patterns by a certain value. -void populatePadTensorOpVectorizationPatterns(RewritePatternSet &patterns, - PatternBenefit baseBenefit = 1); +/// of all tensor::PadOp vectorization patterns by a certain value. +void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, + PatternBenefit baseBenefit = 1); /// Match and rewrite for the pattern: /// ``` diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 7646ef2b5df67..b466d7726f502 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -107,12 +107,12 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp( OpBuilder &b, Location loc, Value source, ArrayRef offsets, ArrayRef sizes, ArrayRef strides); -/// Create a PadTensorOp that pads `source` to the size of the statically sized -/// `type` whose static sizes are assumed to be greater than the dynamic +/// Create a tensor::PadOp that pads `source` to the size of the statically +/// sized `type` whose static sizes are assumed to be greater than the dynamic /// `source` size. The padding introduces trailing `pad` values until the target /// size is met. If `source` is defined by one or more LinalgOps that have been /// padded with the same value and sizes, return their padded result instead of -/// creating a PadTensorOp. +/// creating a tensor::PadOp. /// /// Example: /// ``` diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h index db8a07a689ee2..cfec22be37e5f 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h +++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h @@ -19,6 +19,7 @@ #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Interfaces/TilingInterface.h" #include "mlir/Interfaces/ViewLikeInterface.h" //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 1a95d921fee22..05cb41d791d35 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -14,6 +14,7 @@ include "mlir/Interfaces/CastInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/TilingInterface.td" include "mlir/Interfaces/ViewLikeInterface.td" class Tensor_Op traits = []> @@ -777,6 +778,190 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> { let extraClassDeclaration = commonExtraClassDeclaration; } +//===----------------------------------------------------------------------===// +// PadOp +//===----------------------------------------------------------------------===// + +def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect]> { + let summary = "tensor pad operation"; + let description = [{ + `tensor.pad` is an operation that pads the `source` tensor + with given `low` and `high` padding config. + + The PadTensor operation supports the following arguments: + + * source: the "base" tensor on which to pad. + * low: A list contains the padding along the start of each + dimension, i.e `low`. + * high: A list contains the padding along the end of each + dimension, i.e. `high`. + * nofold: indicates that the operation should not be folded when source and + result types are equal. + + The result tensor dimensions are `low` + `dim` + `high` along that + dimension. The number of elements of `low` and `high` must match + the rank of the input tensor. They can be either a constant or a + dynamic value. + + The region of the `tensor.pad` operation returns the value to use + for the padding. The arguments of the region represent the index + of the source being accessed. There should be as many arguments as + the rank of the `source` tensor. The value `yield`-ed by the + region is used as the value of the view at the given position. + + If `nofold` is set, the padding operation will not be folded away even + if the source type and the padded type have the same static shape. This can + be used, e.g., for packing or promotion to faster memory. + + Example 1: + + ```mlir + %pad_value = ... : f32 + %0 = tensor.pad %0 low[1, 2] high[2, 3] { + ^bb0(%arg0 : index, %arg1 : index): + tensor.yield %pad_value : f32 + } : tensor to tensor + ``` + + Example 2: + + ```mlir + %pad_value = ... : f32 + %0 = tensor.pad %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] { + ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index): + tensor.yield %pad_value : f32 + } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + ``` + + Example 3: + + ```mlir + %pad_value = ... : f32 + %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<2x3xf32> to tensor + ``` + + Example 4: + + ```mlir + // Force a padded value to be always exist with `nofold`. + %pad_value = ... : f32 + %0 = tensor.pad %arg0 nofold low[0, 0] high[0, 0] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<2x3xf32> to tensor<2x3xf32> + ``` + }]; + + let arguments = (ins + AnyTensor:$source, + Variadic:$low, + Variadic:$high, + I64ArrayAttr:$static_low, + I64ArrayAttr:$static_high, + UnitAttr:$nofold); + + let regions = (region SizedRegion<1>:$region); + + let results = (outs AnyTensor:$result); + + // TODO: Remove custom when AllTypesMatch supports opt. operands. + let assemblyFormat = [{ + $source + (`nofold` $nofold^)? + `low` `` custom($low, $static_low) + `high` `` custom($high, $static_high) + $region attr-dict `:` type($source) `to` type($result) + }]; + + let extraClassDeclaration = [{ + static StringRef getStaticLowAttrName() { + return "static_low"; + } + + static StringRef getStaticHighAttrName() { + return "static_high"; + } + + RankedTensorType getSourceType() { + return source().getType().cast(); + } + RankedTensorType getResultType() { + return getResult().getType().cast(); + } + + // Infer the shape of the result tensor given the type of the source tensor + // and paddings. Known result dimensions that cannot necessarily be inferred + // from low/high padding sizes can be optionally specified. Those will be + // considered when computing the result type. + static RankedTensorType inferResultType( + RankedTensorType sourceType, + ArrayRef staticLow, + ArrayRef staticHigh, + ArrayRef resultShape = {}); + + // Return the pad value if it is a constant. Return null value otherwise. + Value getConstantPaddingValue(); + + // Return a vector of all the static or dynamic values (low/high padding) of + // the op. + inline SmallVector getMixedPadImpl(ArrayAttr staticAttrs, + ValueRange values) { + SmallVector res; + unsigned numDynamic = 0; + unsigned count = staticAttrs.size(); + for (unsigned idx = 0; idx < count; ++idx) { + if (ShapedType::isDynamic(staticAttrs[idx].cast().getInt())) + res.push_back(values[numDynamic++]); + else + res.push_back(staticAttrs[idx]); + } + return res; + } + SmallVector getMixedLowPad() { + return getMixedPadImpl(static_low(), low()); + } + SmallVector getMixedHighPad() { + return getMixedPadImpl(static_high(), high()); + } + // Return true if low padding is guaranteed to be 0. + bool hasZeroLowPad() { + return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) { + return getConstantIntValue(ofr) == static_cast(0); + }); + } + // Return true if high padding is guaranteed to be 0. + bool hasZeroHighPad() { + return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) { + return getConstantIntValue(ofr) == static_cast(0); + }); + } + }]; + + let builders = [ + // Build a PadOp with mixed static and dynamic entries. + OpBuilder<(ins "Value":$source, "ArrayRef":$staticLow, + "ArrayRef":$staticHigh, "ValueRange":$low, "ValueRange":$high, + CArg<"bool", "false">:$nofold, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build a PadOp with all dynamic entries. + OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high, + CArg<"bool", "false">:$nofold, + CArg<"ArrayRef", "{}">:$attrs)>, + // Build a PadOp with mixed static and dynamic entries and custom + // result type. If the type passed is nullptr, it is inferred. + OpBuilder<(ins "Type":$resultType, "Value":$source, + "ArrayRef":$low, "ArrayRef":$high, + CArg<"bool", "false">:$nofold, + CArg<"ArrayRef", "{}">:$attrs)>, + ]; + + let hasCanonicalizer = 1; + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // YieldOp @@ -784,16 +969,17 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> { def Tensor_YieldOp : Tensor_Op<"yield", [NoSideEffect, ReturnLike, Terminator, - HasParent<"::mlir::tensor::GenerateOp">]> { + HasParent<"::mlir::tensor::GenerateOp, ::mlir::tensor::PadOp">]> { let summary = "Yield a value from a region"; let description = [{ This operation is used to yield a single value from a within a region. It is used to create dynamically sized tensors - (see `tensor.generate` op). + (see `tensor.generate` and `tensor.pad` ops). }]; let arguments = (ins AnyType:$value); let assemblyFormat = "$value attr-dict `:` type($value)"; + // Dummy builder to appease code in templated ensureTerminator that // GenerateOp's auto-generated parser calls. let builders = [OpBuilder<(ins), [{ /* nothing to do */ }]>]; diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h new file mode 100644 index 0000000000000..6cd819758eab7 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h @@ -0,0 +1,36 @@ +//===- TensorTilingOpInterfaceImpl.h - ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Tiling interface for TensorOps with ExternalModel. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_ +#define MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_ + +#include "mlir/IR/Dialect.h" + +namespace mlir { +namespace tensor { + +/// Registers external models for Tiling interface for tensor ops. +/// Currently, it registers: +/// +/// * TilingInterface for `tensor.pad`. +/// +/// Unfortunately, a "normal" internal registration is not possible at the +/// moment, because of the dependency of the interface implementation for these +/// ops on `affine.apply` and Affine dialect already depends on TensorOps. In +/// order to break the cyclic dependency (TensorOps->AffineOps->TensorOps) the +/// implementation is moved to a separate library. +void registerTilingOpInterfaceExternalModels(mlir::DialectRegistry ®istry); + +} // namespace tensor +} // namespace mlir + +#endif // MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_ diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h new file mode 100644 index 0000000000000..4b4c53896b7d8 --- /dev/null +++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h @@ -0,0 +1,34 @@ +//===- Utils.h - Utilities to support the Tensor dialect -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_TENSOR_UTILS_UTILS_H_ +#define MLIR_DIALECT_TENSOR_UTILS_UTILS_H_ + +#include "mlir/Dialect/Tensor/IR/Tensor.h" + +namespace mlir { +namespace tensor { + +// Return a PadOp that pads `source` to `type` size where the static +// sizes are assumed to be greater than the dynamic sizes. The op performs +// "high" padding (i.e. it adds trailing padding values until the desired +// size is met). +PadOp createPadHighOp(Type type, Value source, Value pad, bool nofold, + Location loc, OpBuilder &builder); + +// Return a PadOp that pads `source to `type` size with `pad` value. +// I.e., a block will be created and the `pad` value will be yielded +// directly. If the type passed is nullptr, it is inferred. +PadOp createPadScalarOp(Type type, Value source, Value pad, + ArrayRef low, ArrayRef high, + bool nofold, Location loc, OpBuilder &builder); + +} // namespace tensor +} // namespace mlir + +#endif // MLIR_DIALECT_TENSOR_UTILS_UTILS_H_ diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index 40e9781edcc90..86f4e1db7a284 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -43,6 +43,7 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h" +#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/X86Vector/X86VectorDialect.h" @@ -86,6 +87,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { x86vector::X86VectorDialect>(); // clang-format on tensor::registerInferTypeOpInterfaceExternalModels(registry); + tensor::registerTilingOpInterfaceExternalModels(registry); } /// Append all the MLIR dialects to the registry contained in the given context. diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 0f53b30125625..6833a0c2d72cb 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/CoversionUtils.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" @@ -1932,7 +1933,7 @@ class PadConverter : public OpRewritePattern { highValues.push_back(highVal); } - auto newPadOp = linalg::PadTensorOp::createPadScalarOp( + auto newPadOp = tensor::createPadScalarOp( padOp.getType(), input, padConstant, lowValues, highValues, /*nofold=*/false, loc, rewriter); diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp index 54012c9760eb2..4fcd2cf56c35c 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -17,6 +17,7 @@ #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/CoversionUtils.h" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" @@ -55,9 +56,9 @@ static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, Value padValue = rewriter.create(loc, padAttr); - return linalg::PadTensorOp::createPadScalarOp( - RankedTensorType::get(paddedShape, inputETy), input, padValue, - lowIndices, highIndices, /*nofold=*/false, loc, rewriter) + return tensor::createPadScalarOp(RankedTensorType::get(paddedShape, inputETy), + input, padValue, lowIndices, highIndices, + /*nofold=*/false, loc, rewriter) .result(); } diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index f41e8487858f3..3ca3932a44eec 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1079,561 +1079,6 @@ LogicalResult InitTensorOp::reifyResultShapes( return success(); } -//===----------------------------------------------------------------------===// -// PadTensorOp -//===----------------------------------------------------------------------===// - -// TODO: Replace custom directive with AllTypesMatch as soon as it -// supports optional types. -void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand, - Type typeToInfer, Type typeToInferFrom) {} - -ParseResult parseInferType(OpAsmParser &parser, - Optional optOperand, - Type &typeToInfer, Type typeToInferFrom) { - if (optOperand) - typeToInfer = typeToInferFrom; - return success(); -} - -static LogicalResult verify(PadTensorOp op) { - auto sourceType = op.source().getType().cast(); - auto resultType = op.result().getType().cast(); - auto expectedType = PadTensorOp::inferResultType( - sourceType, extractFromI64ArrayAttr(op.static_low()), - extractFromI64ArrayAttr(op.static_high())); - for (int i = 0, e = sourceType.getRank(); i < e; ++i) { - if (resultType.getDimSize(i) == expectedType.getDimSize(i)) - continue; - if (expectedType.isDynamicDim(i)) - continue; - return op.emitError("specified type ") - << resultType << " does not match the inferred type " - << expectedType; - } - - auto ®ion = op.region(); - unsigned rank = resultType.getRank(); - Block &block = region.front(); - if (block.getNumArguments() != rank) - return op.emitError("expected the block to have ") << rank << " arguments"; - - // Note: the number and type of yield values are checked in the YieldOp. - for (const auto &en : llvm::enumerate(block.getArgumentTypes())) { - if (!en.value().isIndex()) - return op.emitOpError("expected block argument ") - << (en.index() + 1) << " to be an index"; - } - - return success(); -} - -RankedTensorType PadTensorOp::inferResultType(RankedTensorType sourceType, - ArrayRef staticLow, - ArrayRef staticHigh, - ArrayRef resultShape) { - unsigned rank = sourceType.getRank(); - assert(staticLow.size() == rank && "unexpected staticLow size mismatch"); - assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch"); - assert((resultShape.empty() || resultShape.size() == rank) && - "unexpected resultShape size mismatch"); - - SmallVector inferredShape; - for (auto i : llvm::seq(0, rank)) { - if (sourceType.isDynamicDim(i) || - staticLow[i] == ShapedType::kDynamicSize || - staticHigh[i] == ShapedType::kDynamicSize) { - inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize - : resultShape[i]); - } else { - int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i]; - assert((resultShape.empty() || size == resultShape[i] || - resultShape[i] == ShapedType::kDynamicSize) && - "mismatch between inferred shape and result shape"); - inferredShape.push_back(size); - } - } - - return RankedTensorType::get(inferredShape, sourceType.getElementType()); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source, - ArrayRef staticLow, - ArrayRef staticHigh, ValueRange low, - ValueRange high, bool nofold, - ArrayRef attrs) { - auto sourceType = source.getType().cast(); - auto resultType = inferResultType(sourceType, staticLow, staticHigh); - build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow), - b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr()); - result.addAttributes(attrs); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source, - ValueRange low, ValueRange high, bool nofold, - ArrayRef attrs) { - auto sourceType = source.getType().cast(); - unsigned rank = sourceType.getRank(); - SmallVector staticVector(rank, ShapedType::kDynamicSize); - build(b, result, source, staticVector, staticVector, low, high, nofold, - attrs); -} - -void PadTensorOp::build(OpBuilder &b, OperationState &result, Type resultType, - Value source, ArrayRef low, - ArrayRef high, bool nofold, - ArrayRef attrs) { - assert(resultType.isa()); - auto sourceType = source.getType().cast(); - SmallVector dynamicLow, dynamicHigh; - SmallVector staticLow, staticHigh; - // staticLow and staticHigh have full information of the padding config. - // This will grow staticLow and staticHigh with 1 value. If the config is - // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1 - // value as well. - dispatchIndexOpFoldResults(low, dynamicLow, staticLow, - ShapedType::kDynamicSize); - dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh, - ShapedType::kDynamicSize); - if (!resultType) { - resultType = - PadTensorOp::inferResultType(sourceType, staticLow, staticHigh); - } - build(b, result, resultType, source, dynamicLow, dynamicHigh, - b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh), - nofold ? b.getUnitAttr() : UnitAttr()); - result.addAttributes(attrs); -} - -PadTensorOp PadTensorOp::createPadScalarOp(Type type, Value source, Value pad, - ArrayRef low, - ArrayRef high, - bool nofold, Location loc, - OpBuilder &builder) { - auto padTensorOp = - builder.create(loc, type, source, low, high, nofold); - int rank = padTensorOp.getResultType().getRank(); - SmallVector blockArgTypes(rank, builder.getIndexType()); - SmallVector blockArgLocs(rank, loc); - auto ®ion = padTensorOp.region(); - // `builder.createBlock` changes the insertion point within the block. Create - // a guard to reset the insertion point of the builder after it is destroyed. - OpBuilder::InsertionGuard guard(builder); - builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); - builder.create(loc, pad); - return padTensorOp; -} - -PadTensorOp PadTensorOp::createPadHighOp(Type type, Value source, Value pad, - bool nofold, Location loc, - OpBuilder &b) { - SmallVector low, high; - auto rankedTensorType = type.cast(); - assert(rankedTensorType.hasStaticShape()); - for (const auto &en : enumerate(rankedTensorType.getShape())) { - AffineExpr d0; - bindDims(b.getContext(), d0); - auto dimOp = b.createOrFold(loc, source, en.index()); - Value paddingWidth = - makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}); - high.push_back(paddingWidth); - low.push_back(b.createOrFold(loc, 0)); - } - return PadTensorOp::createPadScalarOp(type, source, pad, low, high, nofold, - loc, b); -} - -LogicalResult PadTensorOp::reifyResultShapes( - OpBuilder &b, ReifiedRankedShapedTypeDims &reifiedReturnShapes) { - Location loc = getLoc(); - auto lowPad = getMixedLowPad(); - auto highPad = getMixedHighPad(); - SmallVector shapes; - for (auto dim : llvm::seq(0, getSourceType().getRank())) { - // Shape along each dimension is source dim + low pad + high pad. - SmallVector mapOperands; - mapOperands.push_back(b.createOrFold(loc, source(), dim)); - AffineExpr expr = b.getAffineDimExpr(0); - unsigned numSymbols = 0; - auto addOpFoldResult = [&](OpFoldResult valueOrAttr) { - if (Value v = valueOrAttr.dyn_cast()) { - expr = expr + b.getAffineSymbolExpr(numSymbols++); - mapOperands.push_back(v); - return; - } - int64_t staticValue = - valueOrAttr.get().cast().getInt(); - expr = expr + staticValue; - }; - addOpFoldResult(lowPad[dim]); - addOpFoldResult(highPad[dim]); - shapes.push_back(applyMapToValues( - b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]); - } - reifiedReturnShapes.emplace_back(std::move(shapes)); - return success(); -} - -//===----------------------------------------------------------------------===// -// Methods related to PadTensor tiling. -//===----------------------------------------------------------------------===// - -SmallVector PadTensorOp::getDestinationOperands(OpBuilder &b) { - ReifiedRankedShapedTypeDims reifiedShapes; - (void)reifyResultShapes(b, reifiedShapes); - SmallVector mixedSizes = getAsOpFoldResult(reifiedShapes[0]); - Value initTensor = b.create(getLoc(), mixedSizes, - getResultType().getElementType()); - return {initTensor}; -} - -SmallVector PadTensorOp::getLoopIteratorTypes() { - SmallVector iteratorTypes(getResultType().getRank(), - getParallelIteratorTypeName()); - return iteratorTypes; -} - -SmallVector PadTensorOp::getIterationDomain(OpBuilder &b) { - ReifiedRankedShapedTypeDims reifiedShapes; - (void)reifyResultShapes(b, reifiedShapes); - Value zero = b.create(getLoc(), 0); - Value one = b.create(getLoc(), 1); - // Initialize all the ranges to {zero, one, one}. All the `ub`s are - // overwritten. - SmallVector loopRanges(reifiedShapes[0].size(), {zero, one, one}); - for (const auto &ub : enumerate(reifiedShapes[0])) - loopRanges[ub.index()].size = ub.value(); - return loopRanges; -} - -SmallVector PadTensorOp::getTiledImplementation( - OpBuilder &b, ValueRange dest, ArrayRef offsets, - ArrayRef sizes, bool /*tileDestOperands*/) { - // Only constant padding value supported. - Value padValue = getConstantPaddingValue(); - if (!padValue) - return {}; - - // Helper variables and functions for various arithmetic operations. These are - // used extensively for computing new offset/length and padding values. - Location loc = getLoc(); - AffineExpr dim0, dim1; - bindDims(b.getContext(), dim0, dim1); - // Add two integers. - auto addMap = AffineMap::get(2, 0, {dim0 + dim1}); - auto add = [&](Value v1, Value v2) { - return b.createOrFold(loc, addMap, ValueRange{v1, v2}); - }; - // Subtract two integers. - auto subMap = AffineMap::get(2, 0, {dim0 - dim1}); - auto sub = [&](Value v1, Value v2) { - return b.createOrFold(loc, subMap, ValueRange{v1, v2}); - }; - // Take the minimum of two integers. - auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext()); - auto min = [&](Value v1, Value v2) { - return b.createOrFold(loc, idMap, ValueRange{v1, v2}); - }; - // Take the maximum of two integers. - auto max = [&](Value v1, Value v2) { - return b.createOrFold(loc, idMap, ValueRange{v1, v2}); - }; - // Zero index-typed integer. - auto zero = b.create(loc, 0); - - // Helper function for filling static/dynamic low/high padding indices vectors - // of PadTensorOp. - auto appendIndex = [&](Value val, SmallVector &dynIndices, - SmallVector &staticIndices) { - if (auto constInt = getConstantIntValue(val)) { - staticIndices.push_back(*constInt); - } else { - staticIndices.push_back(ShapedType::kDynamicSize); - dynIndices.push_back(val); - } - }; - - // Compute new offsets, lengths, low padding, high padding. - SmallVector newOffsets, newLengths, newStrides; - SmallVector newLows, newHighs; - SmallVector staticNewLows, staticNewHighs; - // Set to true if the original data source is not read at all. - bool hasZeroLen = false; - // Same as hasZeroLen, but for dynamic dimension sizes. This condition - // is true if the original data source turns out to be unused at runtime. - Value dynHasZeroLenCond; - - int64_t rank = getSourceType().getRank(); - for (unsigned dim = 0; dim < rank; ++dim) { - auto low = getValueOrCreateConstantIndexOp(b, loc, getMixedLowPad()[dim]); - bool hasLowPad = getConstantIntValue(low) != static_cast(0); - auto high = getValueOrCreateConstantIndexOp(b, loc, getMixedHighPad()[dim]); - bool hasHighPad = getConstantIntValue(high) != static_cast(0); - auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]); - auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]); - auto srcSize = b.createOrFold(loc, source(), dim); - - // The new amount of low padding is `low - offset`. Except for the case - // where none of the low padding is read. In that case, the new amount of - // low padding is zero. - // - // Optimization: If low = 0, then newLow = 0. - Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero; - appendIndex(newLow, newLows, staticNewLows); - - // Start reading the data from position `offset - low`. Since the original - // read may have started in the low padding zone, this value could be - // negative. Therefore, start reading from: - // - // max(offset - low, 0) - // - // The original read could also have started in the high padding zone. - // In that case, set the offset to the end of source tensor. The new - // ExtractSliceOp length will be zero in that case. (Effectively reading no - // data from the source.) - // - // Optimization: If low = 0, then the formula can be simplified. - Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize) - : min(offset, srcSize); - newOffsets.push_back(getAsOpFoldResult(newOffset)); - - // The original ExtractSliceOp was reading until position `offset + length`. - // Therefore, the corresponding position within the source tensor is: - // - // offset + length - low - // - // In case the original ExtractSliceOp stopped reading within the low - // padding zone, this value can be negative. In that case, the end position - // of the read should be zero. (Similar to newOffset.) - // - // The original read could also have stopped in the high padding zone. - // In that case, set the end positition of the read should be the end of the - // source tensor. (Similar to newOffset.) - // - // endLoc = min(max(offset - low + length, 0), srcSize) - // - // The new ExtractSliceOp length is `endLoc - newOffset`. - // - // Optimization: If low = 0, then the formula can be simplified. - Value endLoc = hasLowPad - ? min(max(add(sub(offset, low), length), zero), srcSize) - : min(add(offset, length), srcSize); - Value newLength = sub(endLoc, newOffset); - newLengths.push_back(getAsOpFoldResult(newLength)); - - // Check if newLength is zero. In that case, no SubTensorOp should be - // executed. - if (auto newLengthInt = getConstantIntValue(newLength)) { - hasZeroLen |= *newLengthInt == 0; - } else { - Value check = b.create(loc, arith::CmpIPredicate::eq, - newLength, zero); - dynHasZeroLenCond = - dynHasZeroLenCond - ? b.create(loc, check, dynHasZeroLenCond) - : check; - } - - // The amount of high padding is simply the number of elements remaining, - // so that the result has the same length as the original ExtractSliceOp. - // As an optimization, if the original high padding is zero, then the new - // high padding must also be zero. - Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero; - appendIndex(newHigh, newHighs, staticNewHighs); - - // Only unit stride supported. - newStrides.push_back(b.getIndexAttr(1)); - } - - // The shape of the result can be obtained from the sizes passed in. - SmallVector dynDims; - SmallVector shape; - dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize); - RankedTensorType resultType = - RankedTensorType::get(shape, getResultType().getElementType()); - - // Insert cast to ensure that types match. (May be folded away.) - auto castResult = [&](Value val) -> Operation * { - auto castOp = b.create(loc, resultType, val); - return castOp; - }; - - // In cases where the original data source is unused: Emit a GenerateOp and - // do not generate a SliceOp. (The result shape of the SliceOp would - // have a dimension of size 0, the semantics of which is unclear.) - auto createGenerateOp = [&]() { - // Create GenerateOp. - auto generateOp = b.create( - loc, resultType, dynDims, - [&](OpBuilder &builder, Location gLoc, ValueRange indices) { - builder.create(gLoc, padValue); - }); - return castResult(generateOp); - }; - - // Emit a SliceOp and a PadTensorOp. Should not be used in cases where - // the result shape of the new SliceOp has a zero dimension. - auto createPadTensorOfSubTensor = [&]() { - // Create pad_tensor(subtensor(x)). - auto newSliceOp = b.create( - loc, source(), newOffsets, newLengths, newStrides); - auto newPadTensorOp = b.create( - loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs); - - // Copy region to new PadTensorOp. - BlockAndValueMapping bvm; - region().cloneInto(&newPadTensorOp.getRegion(), bvm); - - // Cast result and return. - return castResult(newPadTensorOp); - }; - - // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known - // that the original data source x is not used. - if (hasZeroLen) { - return {createGenerateOp()}; - } - - // If there are dynamic dimensions: Generate an scf.if check to avoid creating - // SliceOps with result dimensions of size 0 at runtime. - if (dynHasZeroLenCond) { - auto result = b.create( - loc, resultType, dynHasZeroLenCond, - /*thenBuilder=*/ - [&](OpBuilder &b, Location loc) { - b.create(loc, createGenerateOp()->getResult(0)); - }, - /*elseBuilder=*/ - [&](OpBuilder &b, Location loc) { - b.create(loc, - createPadTensorOfSubTensor()->getResult(0)); - }); - return {result}; - } - return {createPadTensorOfSubTensor()}; -} - -namespace { -// Folds linalg.pad_tensor when padding is static zeros and the attribute -// doesn't request otherwise. -struct FoldStaticZeroPadding : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PadTensorOp padTensorOp, - PatternRewriter &rewriter) const override { - if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad()) - return failure(); - if (padTensorOp.nofold()) - return failure(); - rewriter.replaceOpWithNewOp( - padTensorOp, padTensorOp.result().getType(), padTensorOp.source()); - return success(); - } -}; - -// Fold CastOp into PadTensorOp when adding static information. -struct FoldSourceTensorCast : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PadTensorOp padTensorOp, - PatternRewriter &rewriter) const override { - auto castOp = padTensorOp.source().getDefiningOp(); - if (!tensor::canFoldIntoConsumerOp(castOp)) - return failure(); - - auto newResultType = PadTensorOp::inferResultType( - castOp.source().getType().cast(), - extractFromI64ArrayAttr(padTensorOp.static_low()), - extractFromI64ArrayAttr(padTensorOp.static_high()), - padTensorOp.getResultType().getShape()); - - if (newResultType == padTensorOp.getResultType()) { - rewriter.updateRootInPlace(padTensorOp, [&]() { - padTensorOp.sourceMutable().assign(castOp.source()); - }); - } else { - auto newOp = rewriter.create( - padTensorOp->getLoc(), newResultType, padTensorOp.source(), - padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(), - padTensorOp.static_high(), padTensorOp.nofold()); - BlockAndValueMapping mapper; - padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); - - rewriter.replaceOpWithNewOp( - padTensorOp, padTensorOp.getResultType(), newOp); - } - return success(); - } -}; - -// Fold CastOp using the result of PadTensorOp back into the latter if it adds -// static information. -struct FoldTargetTensorCast : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(PadTensorOp padTensorOp, - PatternRewriter &rewriter) const override { - if (!padTensorOp.result().hasOneUse()) - return failure(); - auto tensorCastOp = - dyn_cast(*padTensorOp->getUsers().begin()); - if (!tensorCastOp) - return failure(); - if (!tensor::preservesStaticInformation(padTensorOp.result().getType(), - tensorCastOp.dest().getType())) - return failure(); - - auto replacementOp = rewriter.create( - padTensorOp.getLoc(), tensorCastOp.dest().getType(), - padTensorOp.source(), padTensorOp.low(), padTensorOp.high(), - padTensorOp.static_low(), padTensorOp.static_high(), - padTensorOp.nofold()); - replacementOp.region().takeBody(padTensorOp.region()); - - rewriter.replaceOp(padTensorOp, replacementOp.result()); - rewriter.replaceOp(tensorCastOp, replacementOp.result()); - return success(); - } -}; -} // namespace - -void PadTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add(context); - results.add(context); -} - -/// Return the padding value of the PadTensorOp if it constant. In this context, -/// "constant" means an actual constant or "defined outside of the block". -/// -/// Values are considered constant in three cases: -/// - A ConstantLike value. -/// - A basic block argument from a different block. -/// - A value defined outside of the block. -/// -/// If the padding value is not constant, an empty Value is returned. -Value PadTensorOp::getConstantPaddingValue() { - auto yieldOp = dyn_cast(getRegion().front().getTerminator()); - if (!yieldOp || yieldOp.values().size() != 1) - return {}; - Value padValue = yieldOp.values().front(); - // Check if yield value is a constant. - if (matchPattern(padValue, m_Constant())) - return padValue; - // Check if yield value is defined inside the PadTensorOp block. - if (padValue.getParentBlock() == &getRegion().front()) - return {}; - // Else: Yield value defined outside of the PadTensorOp block. - return padValue; -} - -OpFoldResult PadTensorOp::fold(ArrayRef) { - if (getResultType().hasStaticShape() && getResultType() == getSourceType() && - !nofold()) - return source(); - return {}; -} - //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// @@ -1687,16 +1132,6 @@ static LogicalResult verify(linalg::YieldOp op) { if (auto linalgOp = dyn_cast(parentOp)) return verifyYield(op, cast(parentOp)); - if (auto padTensorOp = dyn_cast(parentOp)) { - if (op.getNumOperands() != 1) - return op.emitOpError("expected single yield operand (got ") - << op->getNumOperands() << ")"; - if (op.getOperand(0).getType() != - padTensorOp.getType().cast().getElementType()) - return op.emitOpError("expected yield type to match shape element type"); - return success(); - } - if (auto tiledLoopOp = dyn_cast(parentOp)) { // Check if output args with tensor types match results types. SmallVector tensorOuts; diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp index 1283dec2a0a5b..9e45ed2a8fbf0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp @@ -320,7 +320,7 @@ struct LinalgBufferizePass : public LinalgBufferizeBase { target.addLegalDialect(); - target.addIllegalOp(); @@ -363,5 +363,5 @@ void mlir::linalg::populateLinalgBufferizePatterns( VectorTransferWriteOpConverter >(typeConverter, patterns.getContext()); // clang-format on - patterns.add(patterns.getContext()); + patterns.add(patterns.getContext()); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp index 5ea77641b7f06..3c8b9c9606952 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -67,7 +67,7 @@ struct LinalgComprehensiveModuleBufferize static void applyEnablingTransformations(ModuleOp moduleOp) { RewritePatternSet patterns(moduleOp.getContext()); - patterns.add(moduleOp.getContext()); + patterns.add(moduleOp.getContext()); (void)applyPatternsAndFoldGreedily(moduleOp, std::move(patterns)); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp index a4edd8a87bba7..21c92ee304dfa 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -1,4 +1,4 @@ -//===- HoistPadding.cpp - Hoisting transformation for PadTensorOp ---------===// +//===- HoistPadding.cpp - Hoisting for tensor::PadOp ----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -37,7 +37,7 @@ using llvm::dbgs; using namespace mlir; using namespace mlir::linalg; -/// Analysis class to support PadTensorOp hoisting across multiple enclosing +/// Analysis class to support tensor::PadOp hoisting across multiple enclosing /// loops. The failure conditions are: /// 1. Pad op has a use that is not an input of a LinalgOp. /// 2. Pad op does not have a constant padding value. @@ -53,7 +53,7 @@ using namespace mlir::linalg; /// 8. There is no enclosing scf::ForOp that indexes the padded data. /// Other cases succeed and will trigger hoisting of the pad op. struct HoistingAnalysis { - HoistingAnalysis(PadTensorOp padTensorOp, int numLoops); + HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops); bool isValid() { return valid; } @@ -98,7 +98,7 @@ struct HoistingAnalysis { /// ``` /// dropNonIndexDependencies(%padded_slice, %slice) /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice. - LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp, + LogicalResult dropNonIndexDependencies(tensor::PadOp padTensorOp, tensor::ExtractSliceOp sliceOp); /// Encodes whether the analysis is valid and hoisting can proceed. @@ -107,7 +107,7 @@ struct HoistingAnalysis { /// Return true if all uses of `padTensorOp` are an input tensor of some /// LinalgOp. -static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) { +static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) { for (OpOperand &use : padTensorOp.result().getUses()) { auto linalgUser = dyn_cast(use.getOwner()); if (!linalgUser || !linalgUser.isInputTensor(&use)) { @@ -126,7 +126,7 @@ static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) { /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm. /// Control-flow and other containing ops with regions are not modeled atm. static void -getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels, +getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels, SmallVector &reverseEnclosingLoops) { AsmState state(padTensorOp->getParentOfType()); (void)state; @@ -143,7 +143,7 @@ getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels, } } -HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) { +HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) { valid = false; // Bail on any use that isn't an input of a Linalg op. @@ -232,7 +232,7 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) { } LogicalResult -HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp, +HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp, tensor::ExtractSliceOp sliceOp) { // Set of all values used for index computation. SetVector indexEdges; @@ -373,9 +373,9 @@ static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer, ValueRange{ivVal, lbVal, stepVal}); } -FailureOr mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist, +FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops, - PadTensorOp &hoistedOp) { + tensor::PadOp &hoistedOp) { LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops << " loops\n"); HoistingAnalysis analysis(opToHoist, numLoops); @@ -399,7 +399,7 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist, // Create the packed tensor into which we amortize // padding. SmallVector packedShape(nPackedLoops, ShapedType::kDynamicSize); - // TODO: go grab dims when necessary, for now PadTensorOp returns a static + // TODO: go grab dims when necessary, for now tensor::PadOp returns a static // tensor. llvm::append_range(packedShape, paddedTensorType.getShape()); auto packedTensorType = @@ -463,7 +463,7 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist, // sizes = [1 .. 1, paddedShape]. SmallVector sizes(nPackedLoops, b.getIndexAttr(1)); for (int64_t sz : paddedTensorType.getShape()) { - // TODO: go grab dims when necessary, for now PadTensorOp returns a static + // TODO: go grab dims when necessary, for now tensor::PadOp returns a static // tensor. assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes"); sizes.push_back(b.getIndexAttr(sz)); @@ -506,6 +506,7 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist, loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides); // Make the newly cloned `opToHoist` available to the caller. - hoistedOp = cast(bvm.lookup(opToHoist.result()).getDefiningOp()); + hoistedOp = + cast(bvm.lookup(opToHoist.result()).getDefiningOp()); return newResult; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp index 31f8fa5b369e3..025adc2c56b2b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -100,7 +100,7 @@ struct LinalgStrategyTilePass filter); else tilingPattern.add(ctx, options, filter); - if (anchorOpName == linalg::PadTensorOp::getOperationName()) + if (anchorOpName == tensor::PadOp::getOperationName()) populatePadTensorTilingPatterns(tilingPattern, options); (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern)); } @@ -302,12 +302,12 @@ struct LinalgStrategyVectorizePass std::move(vectorizationPatterns)); // Apply the pad tensor op vectorization separately to avoid running the - // GenericPadTensorOpVectorizationPattern too early. + // GenericPadOpVectorizationPattern too early. // TODO: Improve once we have better infrastructure to control pattern // application. if (vectorizePadding) { RewritePatternSet patterns(funcOp.getContext()); - linalg::populatePadTensorOpVectorizationPatterns(patterns); + linalg::populatePadOpVectorizationPatterns(patterns); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } } diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp index 64de5197266be..78b5305c8a1ec 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp @@ -38,9 +38,9 @@ namespace { /// ``` /// /// if the `linalg.generic` has all parallel iterator types. -struct FusePadTensorOp : OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(PadTensorOp padOp, +struct FusePadOp : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override { // Only works on padding op that sets the padded value to a constant. Value padValue = padOp.getConstantPaddingValue(); @@ -61,7 +61,10 @@ struct FusePadTensorOp : OpRewritePattern { padOp, "only supported for ops with all parallel iterator types"); } ReifiedRankedShapedTypeDims resultShape; - if (failed(padOp.reifyResultShapes(rewriter, resultShape)) || + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(padOp.getOperation()); + if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter, + resultShape)) || resultShape.size() != 1) { return rewriter.notifyMatchFailure( padOp, "failed to get shape of pad op result"); @@ -118,5 +121,5 @@ struct FusePadTensorOp : OpRewritePattern { void mlir::linalg::populateFusePadTensorWithProducerLinalgOpPatterns( RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); + patterns.add(patterns.getContext()); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index f005846ef4667..22fc7df2d69aa 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -338,18 +338,18 @@ mlir::linalg::tileLinalgOp(RewriterBase &b, LinalgOp op, return failure(); } -/// Generate a loop nest around a given PadTensorOp (for tiling). `newPadOp` -/// and `loopNest` are output parameters that return the new (tiled) PadTensorOp -/// and the loop nest. -static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op, - PadTensorOp &newPadOp, LoopNest &loopNest, - const LinalgTilingOptions &options) { +/// Generate a loop nest around a given tensor::PadOp (for tiling). `newPadOp` +/// and `loopNest` are output parameters that return the new (tiled) +/// tensor::PadOp and the loop nest. +static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op, + tensor::PadOp &newPadOp, LoopNest &loopNest, + const LinalgTilingOptions &options) { Location loc = op.getLoc(); OpBuilder::InsertionGuard g(builder); builder.setInsertionPoint(op); - // Clone PadTensorOp so that the existing op can be replaced more easily. - newPadOp = cast(builder.clone(*op.getOperation())); + // Clone tensor::PadOp so that the existing op can be replaced more easily. + newPadOp = cast(builder.clone(*op.getOperation())); // Get rank and tile sizes. int64_t rank = op.getResultType().getRank(); SmallVector tileSizes = @@ -358,7 +358,9 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op, Value zero = builder.create(loc, 0); tileSizes.append(rank - tileSizes.size(), zero); // Compute lower and upper bounds of the loop nest. - SmallVector ranges = op.getIterationDomain(builder); + TilingInterface tilingInterface = + dyn_cast(op.getOperation()); + SmallVector ranges = tilingInterface.getIterationDomain(builder); SmallVector lbs, dims, allDims, steps; for (int64_t i = 0; i < rank; ++i) { allDims.push_back(ranges[i].size); @@ -369,7 +371,8 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op, } } // Generate loop nest: One loop per dimension. - SmallVector destOperand = op.getDestinationOperands(builder); + SmallVector destOperand = + tilingInterface.getDestinationOperands(builder); loopNest = mlir::scf::buildLoopNest( builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(destOperand), [&](OpBuilder &b, Location loc, ValueRange localIvs, @@ -379,8 +382,8 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op, computeTileOffsets(b, loc, localIvs, tileSizes); SmallVector sizes = computeTileSizes(b, loc, localIvs, tileSizes, allDims); - // Create ExtractSliceOp: Extract a tile from the PadTensorOp. - // Note: The PadTensorOp is located outside of the loop nest. It is + // Create ExtractSliceOp: Extract a tile from the tensor::PadOp. + // Note: The tensor::PadOp is located outside of the loop nest. It is // later moved inside by ExtractSliceOfPadTensorSwapPattern. auto map = AffineMap::getMultiDimIdentityMap(rank, b.getContext()); Value tiledOutput = @@ -399,21 +402,21 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op, } namespace { -struct PadTensorOpTilingPattern : public OpRewritePattern { - PadTensorOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt) - : OpRewritePattern(ctx), options(std::move(opt)) {} +struct PadOpTilingPattern : public OpRewritePattern { + PadOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt) + : OpRewritePattern(ctx), options(std::move(opt)) {} - LogicalResult matchAndRewrite(PadTensorOp op, + LogicalResult matchAndRewrite(tensor::PadOp op, PatternRewriter &rewriter) const override { if (op->hasAttr(LinalgTransforms::kLinalgTransformMarker)) return failure(); - PadTensorOp newPadOp; + tensor::PadOp newPadOp; LoopNest loopNest; - if (failed(tilePadTensorOp(rewriter, op, newPadOp, loopNest, options))) + if (failed(tilePadOp(rewriter, op, newPadOp, loopNest, options))) return failure(); newPadOp->setAttr(LinalgTransforms::kLinalgTransformMarker, rewriter.getUnitAttr()); - // Replace all uses of the original PadTensorOp. + // Replace all uses of the original tensor::PadOp. rewriter.replaceOp(op, loopNest.getResults()[0]); return success(); } @@ -470,7 +473,7 @@ void mlir::linalg::populateLinalgTilingCanonicalizationPatterns( tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx); InitTensorOp::getCanonicalizationPatterns(patterns, ctx); - PadTensorOp::getCanonicalizationPatterns(patterns, ctx); + tensor::PadOp::getCanonicalizationPatterns(patterns, ctx); ctx->getLoadedDialect()->getCanonicalizationPatterns(patterns); CanonicalizationPatternList< @@ -489,13 +492,13 @@ static void insertTilingPatterns(RewritePatternSet &patterns, #define GET_OP_LIST #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" >::insert(patterns, options, f); - patterns.add(ctx, options); + patterns.add(ctx, options); } void mlir::linalg::populatePadTensorTilingPatterns( RewritePatternSet &patterns, const LinalgTilingOptions &options) { auto *ctx = patterns.getContext(); - patterns.add(ctx, options); + patterns.add(ctx, options); } static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 462f3c668f5c3..9eb7b7cfe751c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -160,9 +160,9 @@ LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() { /// Helper function that tries to pad `opOperand`. Exit early for scalar /// operands, if `paddingFunc` returns failure, or if `opOperand` is not defined /// by an ExtractSliceOp. Otherwise, try to pad the operand even if it already -/// has a static shape. Set `result` to the result of the created PadTensorOp or -/// and return success if the operand either has been padded to a static shape -/// or already had a static shape and failure otherwise. +/// has a static shape. Set `result` to the result of the created tensor::PadOp +/// or and return success if the operand either has been padded to a static +/// shape or already had a static shape and failure otherwise. static LogicalResult padOperandToSmallestStaticBoundingBox( OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand, const PaddingValueComputationFunction &paddingFunc, @@ -528,10 +528,10 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite( // Hoist the padding. for (const auto &en : enumerate(depths)) { OpOperand &opOperand = paddedOp->getOpOperand(en.index()); - auto padTensorOp = opOperand.get().getDefiningOp(); + auto padTensorOp = opOperand.get().getDefiningOp(); if (!padTensorOp || en.value() == 0) continue; - PadTensorOp hoistedOp; + tensor::PadOp hoistedOp; FailureOr newResult = hoistPaddingOnTensors(padTensorOp, en.value(), hoistedOp); if (failed(newResult)) @@ -749,10 +749,11 @@ static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { return SmallVector(nParallelLoops, getParallelIteratorTypeName()); } -/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp (to +/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp (to /// initialize with pad_val) and GenericOp (to copy contents). -LogicalResult PadTensorOpTransformationPattern::matchAndRewrite( - linalg::PadTensorOp padOp, PatternRewriter &rewriter) const { +LogicalResult +PadOpTransformationPattern::matchAndRewrite(tensor::PadOp padOp, + PatternRewriter &rewriter) const { auto inputShapedType = padOp.source().getType().cast(); auto resultShapedType = padOp.result().getType().cast(); @@ -767,9 +768,8 @@ LogicalResult PadTensorOpTransformationPattern::matchAndRewrite( // 1. A BBarg from a different block. // 2. A value defined outside of the current block. Block &block = padOp.region().front(); - auto yieldOp = cast(block.getTerminator()); - assert(yieldOp.getNumOperands() == 1 && "expected single operand yield"); - Value padValue = yieldOp.values().front(); + auto yieldOp = cast(block.getTerminator()); + Value padValue = yieldOp.value(); Operation *definingOp = padValue.getDefiningOp(); if (definingOp && definingOp->getBlock() == &block) return failure(); @@ -812,8 +812,8 @@ LogicalResult PadTensorOpTransformationPattern::matchAndRewrite( /// Filling `dest` using FillOp constant padding value if possible. /// Otherwise, generate a tensor::GenerateOp. -Value GeneralizePadTensorOpPattern::createFillOrGenerateOp( - PatternRewriter &rewriter, PadTensorOp padOp, Value dest, +Value GeneralizePadOpPattern::createFillOrGenerateOp( + PatternRewriter &rewriter, tensor::PadOp padOp, Value dest, const SmallVector &dynSizes) const { auto padValue = padOp.getConstantPaddingValue(); if (padValue) @@ -825,20 +825,12 @@ Value GeneralizePadTensorOpPattern::createFillOrGenerateOp( // Copy region to new op. BlockAndValueMapping bvm; padOp.region().cloneInto(&generateOp.getRegion(), bvm); - // Rewrite linalg::YieldOp to tensor::YieldOp. - OpBuilder::InsertionGuard guard(rewriter); - auto yieldOp = - dyn_cast(generateOp.getRegion().front().getTerminator()); - assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator"); - assert(yieldOp.values().size() == 1); - rewriter.setInsertionPoint(yieldOp); - rewriter.replaceOpWithNewOp(yieldOp, yieldOp.values()[0]); return generateOp; } LogicalResult -GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp, - PatternRewriter &rewriter) const { +GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp, + PatternRewriter &rewriter) const { // Given an OpFoldResult, return an index-typed value. auto getIdxValue = [&](OpFoldResult ofr) { if (auto val = ofr.dyn_cast()) @@ -877,10 +869,10 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp, if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded()) return success(); - // PadTensorOps cannot be optimized. Generate a InsertSliceOp instead + // tensor::PadOps cannot be optimized. Generate a InsertSliceOp instead // for copying the PadOp source. auto sourceType = padOp.getSourceType(); - // Compute size of source of PadTensorOp. + // Compute size of source of tensor::PadOp. SmallVector srcSizes; for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) { if (sourceType.isDynamicDim(dim)) { @@ -901,15 +893,17 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp, LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const { - auto padOp = sliceOp.source().getDefiningOp(); + auto padOp = sliceOp.source().getDefiningOp(); if (!padOp) return failure(); // Only unit stride supported. if (!sliceOp.hasUnitStride()) return failure(); + TilingInterface tilingInterface = + dyn_cast(padOp.getOperation()); Operation *tiledPadOp = - padOp + tilingInterface .getTiledImplementation( rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(), sliceOp.getMixedSizes(), /*tileDestOperands=*/false) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 7472d9ee20898..e0b2c64056674 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -682,20 +682,19 @@ static SmallVector ofrToIndexValues(OpBuilder &builder, Location loc, return result; } -/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and +/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and /// InsertSliceOp. For now, only constant padding values are supported. /// If there is enough static type information, TransferReadOps and /// TransferWriteOps may be generated instead of InsertSliceOps. -struct GenericPadTensorOpVectorizationPattern - : public GeneralizePadTensorOpPattern { - GenericPadTensorOpVectorizationPattern(MLIRContext *context, - PatternBenefit benefit = 1) - : GeneralizePadTensorOpPattern(context, tryVectorizeCopy, benefit) {} - /// Vectorize the copying of a PadTensorOp's source. This is possible if +struct GenericPadOpVectorizationPattern : public GeneralizePadOpPattern { + GenericPadOpVectorizationPattern(MLIRContext *context, + PatternBenefit benefit = 1) + : GeneralizePadOpPattern(context, tryVectorizeCopy, benefit) {} + /// Vectorize the copying of a tensor::PadOp's source. This is possible if /// each dimension size is statically know in the source type or the result /// type (or both). static LogicalResult tryVectorizeCopy(PatternRewriter &rewriter, - PadTensorOp padOp, Value dest) { + tensor::PadOp padOp, Value dest) { auto sourceType = padOp.getSourceType(); auto resultType = padOp.getResultType(); @@ -767,13 +766,13 @@ struct GenericPadTensorOpVectorizationPattern } }; -/// Base pattern for rewriting PadTensorOps whose result is consumed by a +/// Base pattern for rewriting tensor::PadOps whose result is consumed by a /// given operation type OpTy. template -struct VectorizePadTensorOpUserPattern : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; +struct VectorizePadOpUserPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(PadTensorOp padOp, + LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const final { bool changed = false; // Insert users in vector, because some users may be replaced/removed. @@ -785,10 +784,10 @@ struct VectorizePadTensorOpUserPattern : public OpRewritePattern { protected: virtual LogicalResult rewriteUser(PatternRewriter &rewriter, - PadTensorOp padOp, OpTy op) const = 0; + tensor::PadOp padOp, OpTy op) const = 0; }; -/// Rewrite use of PadTensorOp result in TransferReadOp. E.g.: +/// Rewrite use of tensor::PadOp result in TransferReadOp. E.g.: /// ``` /// %0 = linalg.pad_tensor %src ... : tensor to tensor<17x5xf32> /// %r = vector.transfer_read %0[%c0, %c0], %cst @@ -807,12 +806,12 @@ struct VectorizePadTensorOpUserPattern : public OpRewritePattern { /// - `xferOp` has no out-of-bounds dims or mask. /// - Low padding is static 0. /// - Single, scalar padding value. -struct PadTensorOpVectorizationWithTransferReadPattern - : public VectorizePadTensorOpUserPattern { - using VectorizePadTensorOpUserPattern< - vector::TransferReadOp>::VectorizePadTensorOpUserPattern; +struct PadOpVectorizationWithTransferReadPattern + : public VectorizePadOpUserPattern { + using VectorizePadOpUserPattern< + vector::TransferReadOp>::VectorizePadOpUserPattern; - LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp, + LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp, vector::TransferReadOp xferOp) const override { // Low padding must be static 0. if (!padOp.hasZeroLowPad()) @@ -837,7 +836,7 @@ struct PadTensorOpVectorizationWithTransferReadPattern } }; -/// Rewrite use of PadTensorOp result in TransferWriteOp. +/// Rewrite use of tensor::PadOp result in TransferWriteOp. /// This pattern rewrites TransferWriteOps that write to a padded tensor /// value, where the same amount of padding is immediately removed again after /// the write. In such cases, the TransferWriteOp can write to the non-padded @@ -869,12 +868,12 @@ struct PadTensorOpVectorizationWithTransferReadPattern /// ExtractSliceOp trims the same amount of padding that was added /// beforehand. /// - Single, scalar padding value. -struct PadTensorOpVectorizationWithTransferWritePattern - : public VectorizePadTensorOpUserPattern { - using VectorizePadTensorOpUserPattern< - vector::TransferWriteOp>::VectorizePadTensorOpUserPattern; +struct PadOpVectorizationWithTransferWritePattern + : public VectorizePadOpUserPattern { + using VectorizePadOpUserPattern< + vector::TransferWriteOp>::VectorizePadOpUserPattern; - LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp, + LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp, vector::TransferWriteOp xferOp) const override { // TODO: support 0-d corner case. if (xferOp.getTransferRank() == 0) @@ -925,7 +924,7 @@ struct PadTensorOpVectorizationWithTransferWritePattern /// sizes may turn out to be equal at runtime. bool hasSameTensorSize(Value beforePadding, tensor::ExtractSliceOp afterTrimming) const { - // If the input to PadTensorOp is a CastOp, try with with both CastOp + // If the input to tensor::PadOp is a CastOp, try with with both CastOp // result and CastOp operand. if (auto castOp = beforePadding.getDefiningOp()) if (hasSameTensorSize(castOp.source(), afterTrimming)) @@ -1000,7 +999,7 @@ struct PadTensorOpVectorizationWithTransferWritePattern } }; -/// Rewrite use of PadTensorOp result in InsertSliceOp. E.g.: +/// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.: /// ``` /// %0 = linalg.pad_tensor %src ... : tensor to tensor<17x5xf32> /// %r = tensor.insert_slice %0 @@ -1023,12 +1022,12 @@ struct PadTensorOpVectorizationWithTransferWritePattern /// - Only unit strides in `insertOp`. /// - Single, scalar padding value. /// - `padOp` result not used as destination. -struct PadTensorOpVectorizationWithInsertSlicePattern - : public VectorizePadTensorOpUserPattern { - using VectorizePadTensorOpUserPattern< - tensor::InsertSliceOp>::VectorizePadTensorOpUserPattern; +struct PadOpVectorizationWithInsertSlicePattern + : public VectorizePadOpUserPattern { + using VectorizePadOpUserPattern< + tensor::InsertSliceOp>::VectorizePadOpUserPattern; - LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp, + LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp, tensor::InsertSliceOp insertOp) const override { // Low padding must be static 0. if (!padOp.hasZeroLowPad()) @@ -1087,14 +1086,14 @@ struct PadTensorOpVectorizationWithInsertSlicePattern } }; -void mlir::linalg::populatePadTensorOpVectorizationPatterns( +void mlir::linalg::populatePadOpVectorizationPatterns( RewritePatternSet &patterns, PatternBenefit baseBenefit) { - patterns.add(patterns.getContext(), - baseBenefit); + patterns.add(patterns.getContext(), + baseBenefit); // Try these specialized patterns first before resorting to the generic one. - patterns.add( + patterns.add( patterns.getContext(), baseBenefit.getBenefit() + 1); } diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt index a55955654dafe..1231f378a306d 100644 --- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt @@ -12,5 +12,6 @@ add_mlir_dialect_library(MLIRLinalgUtils MLIRSCF MLIRPass MLIRStandard + MLIRTensorUtils MLIRTransformUtils ) diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 61be7bc6c6461..bf37719325ccb 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -23,6 +23,7 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/StandardOps/Utils/Utils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Utils/Utils.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineExprVisitor.h" @@ -328,7 +329,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, // Exit if `source` is not defined by an ExtractSliceOp. auto sliceOp = source.getDefiningOp(); if (!sliceOp) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Search the `source` use-def chain for padded LinalgOps. Value current = sliceOp.source(); @@ -339,22 +340,22 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, OpResult opResult = current.cast(); current = linalgOp.getOutputOperand(opResult.getResultNumber())->get(); } - auto padTensorOp = current ? current.getDefiningOp() : nullptr; + auto padTensorOp = current ? current.getDefiningOp() : nullptr; - // Exit if the search fails to match a PadTensorOp at the end of the matched + // Exit if the search fails to match a tensor::PadOp at the end of the matched // LinalgOp sequence. if (!padTensorOp) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the padded result type does not match. if (sliceOp.source().getType() != type) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the LinalgOps are not high padded. if (llvm::any_of(padTensorOp.getMixedLowPad(), [](OpFoldResult ofr) { return getConstantIntValue(ofr) != static_cast(0); })) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if `padTensorOpSliceOp`, which defines the slice used by // `padTensorOp`, is rank-reducing. @@ -362,7 +363,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, padTensorOp.source().getDefiningOp(); if (!padTensorOpSliceOp || sliceOp.getMixedSizes().size() != padTensorOpSliceOp.getMixedSizes().size()) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size // of the slice padded by `padTensorOp`. @@ -372,7 +373,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it)); })) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Exit if the padding values do not match. Attribute padTensorOpPadAttr, padAttr; @@ -380,7 +381,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, if (!padTensorOpPad || !matchPattern(padTensorOpPad, m_Constant(&padTensorOpPadAttr)) || !matchPattern(pad, m_Constant(&padAttr)) || padTensorOpPadAttr != padAttr) - return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b); + return tensor::createPadHighOp(type, source, pad, nofold, loc, b); // Return the padded result if the padding values and sizes match. return sliceOp.source(); diff --git a/mlir/lib/Dialect/Tensor/CMakeLists.txt b/mlir/lib/Dialect/Tensor/CMakeLists.txt index 9f57627c321fb..31167e6af908b 100644 --- a/mlir/lib/Dialect/Tensor/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(IR) add_subdirectory(Transforms) +add_subdirectory(Utils) diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt index 87aeaab6ca976..df2807f318e04 100644 --- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_OPTIONAL_SOURCES TensorDialect.cpp TensorInferTypeOpInterfaceImpl.cpp TensorOps.cpp + TensorTilingInterfaceImpl.cpp ) add_mlir_dialect_library(MLIRTensor @@ -43,3 +44,20 @@ add_mlir_dialect_library(MLIRTensorInferTypeOpInterfaceImpl MLIRSupport MLIRTensor ) + +add_mlir_dialect_library(MLIRTensorTilingInterfaceImpl + TensorTilingInterfaceImpl.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor + + LINK_LIBS PUBLIC + MLIRAffine + MLIRIR + MLIRLinalg + MLIRSCF + MLIRStandard + MLIRSupport + MLIRTensor + MLIRTilingInterface + ) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp index 588b635805893..bb7bd82f40a68 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp @@ -161,6 +161,48 @@ struct ReifyExpandOrCollapseShapeOp } }; +namespace { + +struct ReifyPadOp + : public ReifyRankedShapedTypeOpInterface::ExternalModel { + LogicalResult + reifyResultShapes(Operation *op, OpBuilder &b, + ReifiedRankedShapedTypeDims &reifiedReturnShapes) const { + auto padOp = cast(op); + Location loc = padOp.getLoc(); + auto lowPad = padOp.getMixedLowPad(); + auto highPad = padOp.getMixedHighPad(); + SmallVector shapes; + for (auto dim : llvm::seq(0, padOp.getSourceType().getRank())) { + // Shape along each dimension is source dim + low pad + high pad. + SmallVector mapOperands; + mapOperands.push_back( + b.createOrFold(loc, padOp.source(), dim)); + AffineExpr expr = b.getAffineDimExpr(0); + unsigned numSymbols = 0; + auto addOpFoldResult = [&](OpFoldResult valueOrAttr) { + if (Value v = valueOrAttr.dyn_cast()) { + expr = expr + b.getAffineSymbolExpr(numSymbols++); + mapOperands.push_back(v); + return; + } + int64_t staticValue = + valueOrAttr.get().cast().getInt(); + expr = expr + staticValue; + }; + addOpFoldResult(lowPad[dim]); + addOpFoldResult(highPad[dim]); + shapes.push_back(applyMapToValues( + b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]); + } + reifiedReturnShapes.emplace_back(std::move(shapes)); + return success(); + } +}; + +} // namespace + void mlir::tensor::registerInferTypeOpInterfaceExternalModels( DialectRegistry ®istry) { registry @@ -169,4 +211,5 @@ void mlir::tensor::registerInferTypeOpInterfaceExternalModels( registry .addOpInterface>(); + registry.addOpInterface(); } diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 613edde638683..42f57a9cf99bd 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -476,6 +476,7 @@ static LogicalResult verify(GenerateOp op) { // Ensure that the region yields an element of the right type. auto yieldOp = llvm::cast(op.body().getBlocks().front().getTerminator()); + if (yieldOp.value().getType() != resultTy.getElementType()) return op.emitOpError( "body must be terminated with a `yield` operation of the tensor " @@ -1482,6 +1483,258 @@ Value mlir::tensor::createCanonicalRankReducingInsertSliceOp(OpBuilder &b, sizes, strides); } +//===----------------------------------------------------------------------===// +// PadOp +//===----------------------------------------------------------------------===// + +// TODO: Replace custom directive with AllTypesMatch as soon as it +// supports optional types. +void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand, + Type typeToInfer, Type typeToInferFrom) {} + +ParseResult parseInferType(OpAsmParser &parser, + Optional optOperand, + Type &typeToInfer, Type typeToInferFrom) { + if (optOperand) + typeToInfer = typeToInferFrom; + return success(); +} + +static LogicalResult verify(PadOp op) { + auto sourceType = op.source().getType().cast(); + auto resultType = op.result().getType().cast(); + auto expectedType = PadOp::inferResultType( + sourceType, extractFromI64ArrayAttr(op.static_low()), + extractFromI64ArrayAttr(op.static_high())); + for (int i = 0, e = sourceType.getRank(); i < e; ++i) { + if (resultType.getDimSize(i) == expectedType.getDimSize(i)) + continue; + if (expectedType.isDynamicDim(i)) + continue; + return op.emitError("specified type ") + << resultType << " does not match the inferred type " + << expectedType; + } + + auto ®ion = op.region(); + unsigned rank = resultType.getRank(); + Block &block = region.front(); + if (block.getNumArguments() != rank) + return op.emitError("expected the block to have ") << rank << " arguments"; + + // Note: the number and type of yield values are checked in the YieldOp. + for (const auto &en : llvm::enumerate(block.getArgumentTypes())) { + if (!en.value().isIndex()) + return op.emitOpError("expected block argument ") + << (en.index() + 1) << " to be an index"; + } + + // Ensure that the region yields an element of the right type. + auto yieldOp = llvm::cast(block.getTerminator()); + if (yieldOp.value().getType() != + op.getType().cast().getElementType()) + return op.emitOpError("expected yield type to match shape element type"); + + return success(); +} + +RankedTensorType PadOp::inferResultType(RankedTensorType sourceType, + ArrayRef staticLow, + ArrayRef staticHigh, + ArrayRef resultShape) { + unsigned rank = sourceType.getRank(); + assert(staticLow.size() == rank && "unexpected staticLow size mismatch"); + assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch"); + assert((resultShape.empty() || resultShape.size() == rank) && + "unexpected resultShape size mismatch"); + + SmallVector inferredShape; + for (auto i : llvm::seq(0, rank)) { + if (sourceType.isDynamicDim(i) || + staticLow[i] == ShapedType::kDynamicSize || + staticHigh[i] == ShapedType::kDynamicSize) { + inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize + : resultShape[i]); + } else { + int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i]; + assert((resultShape.empty() || size == resultShape[i] || + resultShape[i] == ShapedType::kDynamicSize) && + "mismatch between inferred shape and result shape"); + inferredShape.push_back(size); + } + } + + return RankedTensorType::get(inferredShape, sourceType.getElementType()); +} + +void PadOp::build(OpBuilder &b, OperationState &result, Value source, + ArrayRef staticLow, ArrayRef staticHigh, + ValueRange low, ValueRange high, bool nofold, + ArrayRef attrs) { + auto sourceType = source.getType().cast(); + auto resultType = inferResultType(sourceType, staticLow, staticHigh); + build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow), + b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr()); + result.addAttributes(attrs); +} + +void PadOp::build(OpBuilder &b, OperationState &result, Value source, + ValueRange low, ValueRange high, bool nofold, + ArrayRef attrs) { + auto sourceType = source.getType().cast(); + unsigned rank = sourceType.getRank(); + SmallVector staticVector(rank, ShapedType::kDynamicSize); + build(b, result, source, staticVector, staticVector, low, high, nofold, + attrs); +} + +void PadOp::build(OpBuilder &b, OperationState &result, Type resultType, + Value source, ArrayRef low, + ArrayRef high, bool nofold, + ArrayRef attrs) { + assert(resultType.isa()); + auto sourceType = source.getType().cast(); + SmallVector dynamicLow, dynamicHigh; + SmallVector staticLow, staticHigh; + // staticLow and staticHigh have full information of the padding config. + // This will grow staticLow and staticHigh with 1 value. If the config is + // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1 + // value as well. + dispatchIndexOpFoldResults(low, dynamicLow, staticLow, + ShapedType::kDynamicSize); + dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh, + ShapedType::kDynamicSize); + if (!resultType) { + resultType = PadOp::inferResultType(sourceType, staticLow, staticHigh); + } + build(b, result, resultType, source, dynamicLow, dynamicHigh, + b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh), + nofold ? b.getUnitAttr() : UnitAttr()); + result.addAttributes(attrs); +} + +namespace { +// Folds tensor.pad when padding is static zeros and the attribute +// doesn't request otherwise. +struct FoldStaticZeroPadding : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PadOp padTensorOp, + PatternRewriter &rewriter) const override { + if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad()) + return failure(); + if (padTensorOp.nofold()) + return failure(); + rewriter.replaceOpWithNewOp( + padTensorOp, padTensorOp.result().getType(), padTensorOp.source()); + return success(); + } +}; + +// Fold CastOp into PadOp when adding static information. +struct FoldSourceTensorCast : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PadOp padTensorOp, + PatternRewriter &rewriter) const override { + auto castOp = padTensorOp.source().getDefiningOp(); + if (!tensor::canFoldIntoConsumerOp(castOp)) + return failure(); + + auto newResultType = PadOp::inferResultType( + castOp.source().getType().cast(), + extractFromI64ArrayAttr(padTensorOp.static_low()), + extractFromI64ArrayAttr(padTensorOp.static_high()), + padTensorOp.getResultType().getShape()); + + if (newResultType == padTensorOp.getResultType()) { + rewriter.updateRootInPlace(padTensorOp, [&]() { + padTensorOp.sourceMutable().assign(castOp.source()); + }); + } else { + auto newOp = rewriter.create( + padTensorOp->getLoc(), newResultType, padTensorOp.source(), + padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(), + padTensorOp.static_high(), padTensorOp.nofold()); + BlockAndValueMapping mapper; + padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); + + rewriter.replaceOpWithNewOp( + padTensorOp, padTensorOp.getResultType(), newOp); + } + return success(); + } +}; + +// Fold CastOp using the result of PadOp back into the latter if it adds +// static information. +struct FoldTargetTensorCast : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(PadOp padTensorOp, + PatternRewriter &rewriter) const override { + if (!padTensorOp.result().hasOneUse()) + return failure(); + auto tensorCastOp = + dyn_cast(*padTensorOp->getUsers().begin()); + if (!tensorCastOp) + return failure(); + if (!tensor::preservesStaticInformation(padTensorOp.result().getType(), + tensorCastOp.dest().getType())) + return failure(); + + auto replacementOp = rewriter.create( + padTensorOp.getLoc(), tensorCastOp.dest().getType(), + padTensorOp.source(), padTensorOp.low(), padTensorOp.high(), + padTensorOp.static_low(), padTensorOp.static_high(), + padTensorOp.nofold()); + replacementOp.region().takeBody(padTensorOp.region()); + + rewriter.replaceOp(padTensorOp, replacementOp.result()); + rewriter.replaceOp(tensorCastOp, replacementOp.result()); + return success(); + } +}; +} // namespace + +void PadOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results + .add( + context); +} + +/// Return the padding value of the PadOp if it constant. In this context, +/// "constant" means an actual constant or "defined outside of the block". +/// +/// Values are considered constant in three cases: +/// - A ConstantLike value. +/// - A basic block argument from a different block. +/// - A value defined outside of the block. +/// +/// If the padding value is not constant, an empty Value is returned. +Value PadOp::getConstantPaddingValue() { + auto yieldOp = dyn_cast(getRegion().front().getTerminator()); + if (!yieldOp) + return {}; + Value padValue = yieldOp.value(); + // Check if yield value is a constant. + if (matchPattern(padValue, m_Constant())) + return padValue; + // Check if yield value is defined inside the PadOp block. + if (padValue.getParentBlock() == &getRegion().front()) + return {}; + // Else: Yield value defined outside of the PadOp block. + return padValue; +} + +OpFoldResult PadOp::fold(ArrayRef) { + if (getResultType().hasStaticShape() && getResultType() == getSourceType() && + !nofold()) + return source(); + return {}; +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp new file mode 100644 index 0000000000000..d206dc2ce9e90 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -0,0 +1,279 @@ +//===- TensorTilingInterface.cpp - Tiling Interface models *- C++ ------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/Utils/Utils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Interfaces/TilingInterface.h" + +using namespace mlir; +using namespace mlir::tensor; + +namespace { + +struct PadOpTiling : public TilingInterface::ExternalModel { + + SmallVector getDestinationOperands(Operation *op, OpBuilder &b) const { + ReifiedRankedShapedTypeDims reifiedShapes; + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(op); + (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes); + + auto padOp = cast(op); + SmallVector mixedSizes = getAsOpFoldResult(reifiedShapes[0]); + Value initTensor = b.create( + op->getLoc(), mixedSizes, padOp.getResultType().getElementType()); + return {initTensor}; + } + + SmallVector getLoopIteratorTypes(Operation *op) const { + auto padOp = cast(op); + SmallVector iteratorTypes(padOp.getResultType().getRank(), + getParallelIteratorTypeName()); + return iteratorTypes; + } + + SmallVector getIterationDomain(Operation *op, OpBuilder &b) const { + ReifiedRankedShapedTypeDims reifiedShapes; + ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface = + dyn_cast(op); + (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes); + + Location loc = op->getLoc(); + Value zero = b.create(loc, 0); + Value one = b.create(loc, 1); + // Initialize all the ranges to {zero, one, one}. All the `ub`s are + // overwritten. + SmallVector loopRanges(reifiedShapes[0].size(), {zero, one, one}); + for (const auto &ub : enumerate(reifiedShapes[0])) + loopRanges[ub.index()].size = ub.value(); + return loopRanges; + } + + SmallVector + getTiledImplementation(Operation *op, OpBuilder &b, ValueRange dest, + ArrayRef offsets, + ArrayRef sizes, + bool /*tileDestOperands*/) const { + auto padOp = cast(op); + // Only constant padding value supported. + Value padValue = padOp.getConstantPaddingValue(); + if (!padValue) + return {}; + + // Helper variables and functions for various arithmetic operations. These + // are used extensively for computing new offset/length and padding values. + Location loc = op->getLoc(); + AffineExpr dim0, dim1; + bindDims(b.getContext(), dim0, dim1); + // Add two integers. + auto addMap = AffineMap::get(2, 0, {dim0 + dim1}); + auto add = [&](Value v1, Value v2) { + return b.createOrFold(loc, addMap, ValueRange{v1, v2}); + }; + // Subtract two integers. + auto subMap = AffineMap::get(2, 0, {dim0 - dim1}); + auto sub = [&](Value v1, Value v2) { + return b.createOrFold(loc, subMap, ValueRange{v1, v2}); + }; + // Take the minimum of two integers. + auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext()); + auto min = [&](Value v1, Value v2) { + return b.createOrFold(loc, idMap, ValueRange{v1, v2}); + }; + // Take the maximum of two integers. + auto max = [&](Value v1, Value v2) { + return b.createOrFold(loc, idMap, ValueRange{v1, v2}); + }; + // Zero index-typed integer. + auto zero = b.create(loc, 0); + + // Helper function for filling static/dynamic low/high padding indices + // vectors of PadOp. + auto appendIndex = [&](Value val, SmallVector &dynIndices, + SmallVector &staticIndices) { + if (auto constInt = getConstantIntValue(val)) { + staticIndices.push_back(*constInt); + } else { + staticIndices.push_back(ShapedType::kDynamicSize); + dynIndices.push_back(val); + } + }; + + // Compute new offsets, lengths, low padding, high padding. + SmallVector newOffsets, newLengths, newStrides; + SmallVector newLows, newHighs; + SmallVector staticNewLows, staticNewHighs; + // Set to true if the original data source is not read at all. + bool hasZeroLen = false; + // Same as hasZeroLen, but for dynamic dimension sizes. This condition + // is true if the original data source turns out to be unused at runtime. + Value dynHasZeroLenCond; + + int64_t rank = padOp.getSourceType().getRank(); + for (unsigned dim = 0; dim < rank; ++dim) { + auto low = + getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedLowPad()[dim]); + bool hasLowPad = getConstantIntValue(low) != static_cast(0); + auto high = + getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedHighPad()[dim]); + bool hasHighPad = getConstantIntValue(high) != static_cast(0); + auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]); + auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]); + auto srcSize = b.createOrFold(loc, padOp.source(), dim); + + // The new amount of low padding is `low - offset`. Except for the case + // where none of the low padding is read. In that case, the new amount of + // low padding is zero. + // + // Optimization: If low = 0, then newLow = 0. + Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero; + appendIndex(newLow, newLows, staticNewLows); + + // Start reading the data from position `offset - low`. Since the original + // read may have started in the low padding zone, this value could be + // negative. Therefore, start reading from: + // + // max(offset - low, 0) + // + // The original read could also have started in the high padding zone. + // In that case, set the offset to the end of source tensor. The new + // ExtractSliceOp length will be zero in that case. (Effectively reading + // no data from the source.) + // + // Optimization: If low = 0, then the formula can be simplified. + Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize) + : min(offset, srcSize); + newOffsets.push_back(getAsOpFoldResult(newOffset)); + + // The original ExtractSliceOp was reading until position `offset + + // length`. Therefore, the corresponding position within the source tensor + // is: + // + // offset + length - low + // + // In case the original ExtractSliceOp stopped reading within the low + // padding zone, this value can be negative. In that case, the end + // position of the read should be zero. (Similar to newOffset.) + // + // The original read could also have stopped in the high padding zone. + // In that case, set the end positition of the read should be the end of + // the source tensor. (Similar to newOffset.) + // + // endLoc = min(max(offset - low + length, 0), srcSize) + // + // The new ExtractSliceOp length is `endLoc - newOffset`. + // + // Optimization: If low = 0, then the formula can be simplified. + Value endLoc = + hasLowPad ? min(max(add(sub(offset, low), length), zero), srcSize) + : min(add(offset, length), srcSize); + Value newLength = sub(endLoc, newOffset); + newLengths.push_back(getAsOpFoldResult(newLength)); + + // Check if newLength is zero. In that case, no SubTensorOp should be + // executed. + if (auto newLengthInt = getConstantIntValue(newLength)) { + hasZeroLen |= *newLengthInt == 0; + } else { + Value check = b.create(loc, arith::CmpIPredicate::eq, + newLength, zero); + dynHasZeroLenCond = + dynHasZeroLenCond + ? b.create(loc, check, dynHasZeroLenCond) + : check; + } + + // The amount of high padding is simply the number of elements remaining, + // so that the result has the same length as the original ExtractSliceOp. + // As an optimization, if the original high padding is zero, then the new + // high padding must also be zero. + Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero; + appendIndex(newHigh, newHighs, staticNewHighs); + + // Only unit stride supported. + newStrides.push_back(b.getIndexAttr(1)); + } + + // The shape of the result can be obtained from the sizes passed in. + SmallVector dynDims; + SmallVector shape; + dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize); + RankedTensorType resultType = + RankedTensorType::get(shape, padOp.getResultType().getElementType()); + + // Insert cast to ensure that types match. (May be folded away.) + auto castResult = [&](Value val) -> Operation * { + auto castOp = b.create(loc, resultType, val); + return castOp; + }; + + // In cases where the original data source is unused: Emit a GenerateOp and + // do not generate a SliceOp. (The result shape of the SliceOp would + // have a dimension of size 0, the semantics of which is unclear.) + auto createGenerateOp = [&]() { + // Create GenerateOp. + auto generateOp = b.create( + loc, resultType, dynDims, + [&](OpBuilder &builder, Location gLoc, ValueRange indices) { + builder.create(gLoc, padValue); + }); + return castResult(generateOp); + }; + + // Emit a SliceOp and a PadOp. Should not be used in cases where + // the result shape of the new SliceOp has a zero dimension. + auto createPadTensorOfSubTensor = [&]() { + // Create pad_tensor(subtensor(x)). + auto newSliceOp = b.create( + loc, padOp.source(), newOffsets, newLengths, newStrides); + auto newPadOp = b.create(loc, newSliceOp, staticNewLows, + staticNewHighs, newLows, newHighs); + + // Copy region to new PadOp. + BlockAndValueMapping bvm; + padOp.region().cloneInto(&newPadOp.getRegion(), bvm); + + // Cast result and return. + return castResult(newPadOp); + }; + + // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known + // that the original data source x is not used. + if (hasZeroLen) + return {createGenerateOp()}; + + // If there are dynamic dimensions: Generate an scf.if check to avoid + // creating SliceOps with result dimensions of size 0 at runtime. + if (dynHasZeroLenCond) { + auto result = b.create( + loc, resultType, dynHasZeroLenCond, + /*thenBuilder=*/ + [&](OpBuilder &b, Location loc) { + b.create(loc, createGenerateOp()->getResult(0)); + }, + /*elseBuilder=*/ + [&](OpBuilder &b, Location loc) { + b.create(loc, + createPadTensorOfSubTensor()->getResult(0)); + }); + return {result}; + } + return {createPadTensorOfSubTensor()}; + } +}; + +} // namespace + +void mlir::tensor::registerTilingOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addOpInterface(); +} diff --git a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt new file mode 100644 index 0000000000000..19a00b5bc6eb9 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt @@ -0,0 +1,12 @@ +add_mlir_dialect_library(MLIRTensorUtils + Utils.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor + + LINK_LIBS PUBLIC + MLIRAffine + MLIRArithmetic + MLIRIR + MLIRTensor +) diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp new file mode 100644 index 0000000000000..c7054cf50d060 --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -0,0 +1,54 @@ +//===- Utils.cpp - Utilities to support the Tensor dialect ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for the Tensor dialect. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Tensor/Utils/Utils.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" + +using namespace mlir; +using namespace mlir::tensor; + +PadOp mlir::tensor::createPadScalarOp(Type type, Value source, Value pad, + ArrayRef low, + ArrayRef high, bool nofold, + Location loc, OpBuilder &builder) { + auto padTensorOp = + builder.create(loc, type, source, low, high, nofold); + int rank = padTensorOp.getResultType().getRank(); + SmallVector blockArgTypes(rank, builder.getIndexType()); + SmallVector blockArgLocs(rank, loc); + auto ®ion = padTensorOp.region(); + // `builder.createBlock` changes the insertion point within the block. Create + // a guard to reset the insertion point of the builder after it is destroyed. + OpBuilder::InsertionGuard guard(builder); + builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); + builder.create(loc, pad); + return padTensorOp; +} + +PadOp mlir::tensor::createPadHighOp(Type type, Value source, Value pad, + bool nofold, Location loc, OpBuilder &b) { + SmallVector low, high; + auto rankedTensorType = type.cast(); + assert(rankedTensorType.hasStaticShape()); + for (const auto &en : enumerate(rankedTensorType.getShape())) { + AffineExpr d0; + bindDims(b.getContext(), d0); + auto dimOp = b.createOrFold(loc, source, en.index()); + Value paddingWidth = + makeComposedAffineApply(b, loc, en.value() - d0, {dimOp}); + high.push_back(paddingWidth); + low.push_back(b.createOrFold(loc, 0)); + } + return createPadScalarOp(type, source, pad, low, high, nofold, loc, b); +} diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir index cac5cb5d7eb22..bd08b1ae2be4d 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -153,8 +153,8 @@ func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { // CHECK-LABEL: @max_pool_padded func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] - // CHECK-DAG: linalg.yield [[CONST]] + // CHECK-DAG: [[PAD:%.+]] = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] + // CHECK-DAG: tensor.yield [[CONST]] // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) @@ -206,7 +206,7 @@ func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { // Initial piece computes the sum of the pooling region, with appropriate padding. // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] // CHECK: [[CONST:%.+]] = arith.constant 0 // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) @@ -268,7 +268,7 @@ func @avg_pool_dyn(%arg0: tensor) -> (tensor) { // The calculations remain the same as above, only testing for dyn behavior // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[PAD:.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: %[[PAD:.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] // CHECK: %[[POOLINIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62] // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [4, 4] @@ -386,8 +386,8 @@ func @conv2d_dyn(%input: tensor, %weights: tensor<28x3x3x27xf32> // CHECK-LABEL: @conv2d_padded_f32 func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C0]] + // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: tensor.yield %[[C0]] // CHECK: linalg.conv_2d_nhwc_hwcf %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) return @@ -398,8 +398,8 @@ func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x // CHECK-LABEL: @conv2d_quant func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { // CHECK: %[[C22:.+]] = arith.constant -22 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C22]] + // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: tensor.yield %[[C22]] // CHECK: linalg.conv_2d_nhwc_hwcf_q %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> return @@ -481,8 +481,8 @@ func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x // CHECK-LABEL: @depthwise_conv_quant func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { // CHECK: [[PADV:%.+]] = arith.constant -128 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield [[PADV]] + // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: tensor.yield [[PADV]] // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] // CHECK: [[CST0:%.+]] = arith.constant 0 diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 452c04b3489cd..55b8bce54b1a2 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1158,9 +1158,9 @@ func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32 - // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { // CHECK: ^bb0(%arg1: index, %arg2: index): - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.yield [[CST]] // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32> %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xf32>, tensor<2x2xi32>) -> (tensor<4x9xf32>) return %1 : tensor<4x9xf32> @@ -1169,8 +1169,8 @@ func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) { %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> // CHECK: [[CST:%.+]] = arith.constant 0 : i32 - // CHECK: linalg.pad_tensor - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.pad + // CHECK: tensor.yield [[CST]] %1 = "tosa.pad"(%arg0, %0) : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>) return %1 : tensor<4x9xi32> } @@ -1178,8 +1178,8 @@ func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) { func @pad_quant(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) { %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> // CHECK: [[CST:%.+]] = arith.constant 42 : i32 - // CHECK: linalg.pad_tensor - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.pad + // CHECK: tensor.yield [[CST]] %1 = "tosa.pad"(%arg0, %0) { quantization_info = { input_zp = 42 : i32}} : (tensor<1x2xi32>, tensor<2x2xi32>) -> (tensor<4x9xi32>) return %1 : tensor<4x9xi32> } @@ -1194,9 +1194,9 @@ func @pad_float_explicit(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index // CHECK-DAG: [[CST:%.+]] = arith.constant 4.200000e+01 : f32 - // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { + // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]] { // CHECK: ^bb0(%arg1: index, %arg2: index): - // CHECK: linalg.yield [[CST]] + // CHECK: tensor.yield [[CST]] // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32> %1 = arith.constant dense<42.0> : tensor %2 = "tosa.pad"(%arg0, %0, %1) : (tensor<1x2xf32>, tensor<2x2xi32>, tensor) -> (tensor<4x9xf32>) diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir index 45e722d9f74b6..80bd5f8363e3e 100644 --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -277,9 +277,9 @@ func @bufferize_tensor_collapse_shape(%arg0: tensor<4x5xf32>) -> tensor<20xf32> func @pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { + %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32> return %out : tensor<4x?x?x?xf32> } diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 4d844b3035261..44cb18f11d152 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -282,7 +282,7 @@ func @fold_init_tensor_with_slice // CHECK-NOT: linalg.fill // CHECK-NOT: linalg.matmul // CHECK-NOT: linalg.generic -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: return func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>, %arg2: tensor, %high : index) { @@ -296,146 +296,15 @@ func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>, ^bb(%3: i32) : linalg.yield %3 : i32 } -> tensor<7x7xi32> - %3 = linalg.pad_tensor %arg2 low[%c0, %c0] high[%high, %high] { - ^bb0(%arg9: index, %arg10: index): - linalg.yield %cst : f32 + %3 = tensor.pad %arg2 low[%c0, %c0] high[%high, %high] { + ^bb0(%arg9: index, %arg10: index): + tensor.yield %cst : f32 } : tensor to tensor<2x4xf32> return } // ----- -// CHECK-LABEL: func @pad_tensor_same_static_shape( -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor -// CHECK: return %[[ARG0]] -func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) - -> tensor<5x6xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[%a, 0] high[0, %a] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor<5x6xf32> to tensor<5x6xf32> - return %0 : tensor<5x6xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape( -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK: %[[PAD:.*]] = linalg.pad_tensor -// CHECK: return %[[PAD]] -func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) - -> tensor<5x6xf32> { - %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 nofold low[%a, 0] high[0, %a] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor<5x6xf32> to tensor<5x6xf32> - return %0 : tensor<5x6xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_after_cast_different_shape( -// CHECK-SAME: %[[INPUT:.*]]: tensor) -> tensor { -// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]] -// CHECK-SAME: low[0, 0, 1, 1] high[0, 0, 1, 1] { -// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): -// CHECK: linalg.yield %[[CST]] : f32 -// CHECK: } : tensor to tensor -// CHECK: %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] : -// CHECK-SAME: tensor to tensor -// CHECK: return %[[DYNAMIC]] : tensor -// CHECK: } -func @pad_tensor_after_cast_different_shape(%arg0: tensor) - -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %dynamic = tensor.cast %arg0 : tensor to tensor - %padded = linalg.pad_tensor %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst: f32 - } : tensor to tensor - return %padded: tensor -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_after_cast_same_shape( -// CHECK-SAME: %[[INPUT:.*]]: tensor, -// CHECK-SAME: %[[PADDING:.*]]: index) -> tensor { -// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]] -// CHECK-SAME: low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1] { -// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): -// CHECK: linalg.yield %[[CST]] : f32 -// CHECK: } : tensor to tensor -// CHECK: return %[[PADDED:.*]] : tensor -// CHECK: } -func @pad_tensor_after_cast_same_shape(%arg0: tensor, %padding : index) - -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - %dynamic = tensor.cast %arg0 : tensor to tensor - %padded = linalg.pad_tensor %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst: f32 - } : tensor to tensor - return %padded: tensor -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_of_cast( -// CHECK-NOT: tensor.cast -// CHECK: linalg.pad_tensor -// CHECK: tensor<8x?xf32> to tensor<8x32xf32> -func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> { - %c0 = arith.constant 0 : index - %cst = arith.constant 0.000000e+00 : f32 - %0 = tensor.cast %t : tensor<8x?xf32> to tensor - %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %s] { - ^bb0(%arg9: index, %arg10: index): - linalg.yield %cst : f32 - } : tensor to tensor<8x32xf32> - return %1 : tensor<8x32xf32> -} - -// ----- - -// CHECK-LABEL: @cast_of_pad_more_static -func @cast_of_pad_more_static(%arg0: tensor, %padding: index) -> tensor<32x32xf32> { - %cst = arith.constant 0.000000e+00 : f32 - // CHECK: %[[PAD:.*]] = linalg.pad_tensor - // CHECK: tensor to tensor<32x32xf32> - %padded = linalg.pad_tensor %arg0 low[%padding, %padding] high[0, 0] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor to tensor - // CHECK-NOT: tensor.cast - %casted = tensor.cast %padded : tensor to tensor<32x32xf32> - // CHECK: return %[[PAD]] - return %casted : tensor<32x32xf32> -} - -// ----- - -// CHECK-LABEL: @cast_of_pad_less_static -func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor { - %cst = arith.constant 0.000000e+00 : f32 - // CHECK: linalg.pad_tensor - %padded = linalg.pad_tensor %arg0 low[%padding, %padding, %padding] high[0, 0, 0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %cst : f32 - } : tensor<32x?x?xf32> to tensor<32x?x?xf32> - // CHECK: %[[CAST:.*]] = tensor.cast - %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor - // CHECK: return %[[CAST]] - return %casted : tensor -} - -// ----- - func @propogate_casts(%arg0 : tensor, %arg1 : f32, %arg2 : index, %arg3 : index) -> tensor { %c0 = arith.constant 0 : index @@ -579,71 +448,6 @@ func @fold_tiled_loop_inputs(%A: memref<192xf32>, %A_tensor: tensor<192xf32>, // ----- -func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> { - %c0 = arith.constant 0 : index - %cst = arith.constant 0.0 : f32 - %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor - %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %c0] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor to tensor<4x4xf32> - return %1 : tensor<4x4xf32> -} -// CHECK-LABEL: @tensor_pad_cast -// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32> -// CHECK: return %[[ARG0]] - -// ----- - -// CHECK-LABEL: func @fold_pad_tensor_source_cast( -// CHECK-SAME: %[[ARG0:.*]]: tensor<4x?xf32> -// CHECK-NOT: tensor.cast -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] -func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> { - %cst = arith.constant 0.0 : f32 - %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[0, 1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %cst : f32 - } : tensor to tensor<4x4xf32> - return %1 : tensor<4x4xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_static_zero_cast( -// CHECK-SAME: %[[ARG0:.*]]: tensor -// CHECK-NOT: linalg.pad_tensor -// CHECK: %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor to tensor<2x3x4xf32> -// CHECK: return %[[RESULT]] -func @pad_static_zero_cast(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { - %c0 = arith.constant 0 : index - %0 = linalg.pad_tensor %arg0 low[0, %c0, 0] high[0, 0, %c0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 - } : tensor to tensor<2x3x4xf32> - - return %0 : tensor<2x3x4xf32> -} - -// ----- - -// CHECK-LABEL: func @pad_nofold_static_zero( -// CHECK-SAME: %[[ARG0:.*]]: tensor -// CHECK: %[[PAD:.*]] = linalg.pad_tensor -// CHECK: return %[[PAD]] -func @pad_nofold_static_zero(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { - %c0 = arith.constant 0 : index - %0 = linalg.pad_tensor %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 - } : tensor to tensor<2x3x4xf32> - - return %0 : tensor<2x3x4xf32> -} - -// ----- - func private @some_use(%i : index, %j : index) // CHECK-LABEL: func @init_canonicalize diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir index 7119db8f0ccd1..d698d63758e4c 100644 --- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir +++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir @@ -48,7 +48,7 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7 func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> { // Check the padding of the input operands has been hoisted out of the tile loop nest. - // CHECK-PAD-COUNT=2: linalg.pad_tensor %{{.*}} nofold + // CHECK-PAD-COUNT=2: tensor.pad %{{.*}} nofold // CHECK-PAD: scf.for // Check CSE eliminates the duplicate min operations introduced by tiling. // CHECK-PAD: affine.min #[[MAP0]] diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir index 9dd1c1e1ef967..8b5e2a313d5a5 100644 --- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir @@ -9,9 +9,9 @@ // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { + %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32> return %0 : tensor<1x32x32x1xf32> } @@ -38,9 +38,9 @@ func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor func @generalize_pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> { %c0 = arith.constant 0 : index %cst = arith.constant 0.0 : f32 - %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { + %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1] { ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32> return %out : tensor<4x?x?x?xf32> } diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir index 566abcfbc39ec..416dfe37e93d0 100644 --- a/mlir/test/Dialect/Linalg/hoist-padding.mlir +++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir @@ -18,7 +18,7 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>, // MATVEC: %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = // MATVEC: %[[PIDX0:.*]] = affine.apply #[[DIV4]](%[[PIV0]]) // MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [4] - // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]] + // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]] // MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]] // MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] = @@ -29,9 +29,9 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>, // MATVEC-DAG: %[[IDX0:.*]] = affine.apply #[[DIV4]](%[[IV0]]) // MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32> - %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] { + %3 = tensor.pad %2 nofold low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the packed input vector. @@ -67,7 +67,7 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>, // MATVEC: %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]]) // MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]] // MATVEC: %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]]) - // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]] + // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]] // MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]] // MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] = @@ -80,13 +80,13 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>, // MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]] %3 = tensor.extract_slice %arg1[%arg3] [%1] [1] : tensor<12xf32> to tensor %4 = affine.apply #map1(%1) - %5 = linalg.pad_tensor %2 low[%c0, %c0] high[%c0, %4] { + %5 = tensor.pad %2 low[%c0, %c0] high[%c0, %4] { ^bb0(%arg5: index, %arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<24x?xf32> to tensor<24x5xf32> - %6 = linalg.pad_tensor %3 low[%c0] high[%4] { + %6 = tensor.pad %3 low[%c0] high[%4] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5xf32> // Check matvec uses the packed input vector. @@ -127,7 +127,7 @@ func @dynamic_size(%arg0: tensor<24x?xf32>, // MATVEC: %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]])[%[[D0]]] // MATVEC: %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]] // MATVEC: %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]]) - // MATVEC: %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]] + // MATVEC: %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]] // MATVEC: %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]] // MATVEC: scf.for %[[IV0:[0-9a-zA-Z]*]] = @@ -140,13 +140,13 @@ func @dynamic_size(%arg0: tensor<24x?xf32>, // MATVEC-DAG: %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]] %4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor to tensor %5 = affine.apply #map1(%2) - %6 = linalg.pad_tensor %3 low[%c0, %c0] high[%c0, %5] { + %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5] { ^bb0(%arg5: index, %arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<24x?xf32> to tensor<24x4xf32> - %7 = linalg.pad_tensor %4 nofold low[%c0] high[%5] { + %7 = tensor.pad %4 nofold low[%c0] high[%5] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<4xf32> // Check matvec uses the packed input vector. @@ -174,13 +174,13 @@ func @non_constant_padding(%arg0: tensor<24x12xf32>, // Check the non constant padding is not hoisted. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]] %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32> - %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] { + %3 = tensor.pad %2 nofold low[%c0] high[%c0] { ^bb0(%arg5: index): %5 = arith.index_cast %arg3 : index to i32 %6 = arith.sitofp %5 : i32 to f32 - linalg.yield %6 : f32 + tensor.yield %6 : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -209,13 +209,13 @@ func @non_constant_op_padding(%arg0: tensor<24x12xf32>, // Check the non constant op padding is not hoisted. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[V0:.*]] = tensor.extract %[[ARG1]][%[[IV0]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]] - // MATVEC: linalg.yield %[[V0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]] + // MATVEC: tensor.yield %[[V0]] %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = tensor.extract %arg1[%arg3] : tensor<12xf32> - %4 = linalg.pad_tensor %2 nofold low[%c0] high[%c0] { + %4 = tensor.pad %2 nofold low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %3 : f32 + tensor.yield %3 : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -247,12 +247,12 @@ func @non_index_operand(%arg0: tensor<24x12xf32>, // Check the index_cast prevents hoisting due to its non index operand. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[IDX0:.*]] = arith.index_cast %[[ARG3]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = arith.index_cast %arg3 : i32 to index - %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] { + %4 = tensor.pad %2 nofold low[%3] high[%3] { ^bb0(%arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -284,12 +284,12 @@ func @memory_effect(%arg0: tensor<24x12xf32>, // Check the load prevents hoisting due to its memory effect. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[IDX0:.*]] = memref.load %[[ARG3]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = memref.load %arg3[%c0] : memref - %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] { + %4 = tensor.pad %2 nofold low[%3] high[%3] { ^bb0(%arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -321,15 +321,15 @@ func @index_result_loop(%arg0: tensor<24x12xf32>, // Check the unexpected operation with a region prevents hoisting. // MATVEC: %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]] // MATVEC: %[[IDX0:.*]] = scf.for {{.*}} step %[[ARG3]] - // MATVEC: %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]] + // MATVEC: %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]] %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32> %3 = scf.for %arg6 = %c0 to %c12 step %arg3 iter_args(%arg7 = %c0) -> (index) { %6 = arith.addi %arg3, %arg7 : index scf.yield %6 : index } - %4 = linalg.pad_tensor %2 nofold low[%3] high[%3] { + %4 = tensor.pad %2 nofold low[%3] high[%3] { ^bb0(%arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<4xf32> to tensor<4xf32> // Check matvec uses the padded input vector. @@ -361,7 +361,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>, // Check the second input operand is hoisted by two loop nests. // MATMUL: %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = // MATMUL: %[[T1:.*]] = tensor.extract_slice %[[ARG1]] - // MATMUL: %[[T2:.*]] = linalg.pad_tensor %[[T1]] + // MATMUL: %[[T2:.*]] = tensor.pad %[[T1]] // MATMUL: scf.for %[[IV0:[0-9a-zA-Z]*]] = %0 = scf.for %arg3 = %c0 to %c12 step %c5 iter_args(%arg4 = %arg2) -> (tensor<12x24xf32>) { @@ -372,9 +372,9 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>, %3 = affine.apply #map1(%1) // Check the fused and padded fill op does not prevent hoisting. - %4 = linalg.pad_tensor %2 nofold low[%c0, %c0] high[%3, %c0] { + %4 = tensor.pad %2 nofold low[%c0, %c0] high[%3, %c0] { ^bb0(%arg5: index, %arg6: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5x24xf32> %5 = linalg.fill(%cst, %4) : f32, tensor<5x24xf32> -> tensor<5x24xf32> %6 = tensor.extract_slice %5[0, 0] [%1, 24] [1, 1] : tensor<5x24xf32> to tensor @@ -382,7 +382,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>, // Check the first input operand is hoisted by one loop nest. // MATMUL: %[[T3:.*]] = scf.for %[[PIV1:[0-9a-z]+]] = // MATMUL: %[[T4:.*]] = tensor.extract_slice %[[ARG0]] - // MATMUL: %[[T5:.*]] = linalg.pad_tensor %[[T4]] + // MATMUL: %[[T5:.*]] = tensor.pad %[[T4]] // MATMUL: scf.for %[[IV1:[0-9a-zA-Z]*]] = %7 = scf.for %arg5 = %c0 to %c6 step %c3 iter_args(%arg6 = %6) -> (tensor) { @@ -393,20 +393,20 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>, %9 = tensor.extract_slice %arg0[%arg3, %arg5] [%1, 3] [1, 1] : tensor<12x6xf32> to tensor %10 = tensor.extract_slice %arg1[%arg5, 0] [3, 24] [1, 1] : tensor<6x24xf32> to tensor<3x24xf32> %11 = tensor.extract_slice %arg6[0, 0] [%1, 24] [1, 1] : tensor to tensor - %12 = linalg.pad_tensor %9 nofold low[%c0, %c0] high[%3, %c0] { + %12 = tensor.pad %9 nofold low[%c0, %c0] high[%3, %c0] { ^bb0(%arg7: index, %arg8: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5x3xf32> - %13 = linalg.pad_tensor %10 nofold low[%c0, %c0] high[%c0, %c0] { + %13 = tensor.pad %10 nofold low[%c0, %c0] high[%c0, %c0] { ^bb0(%arg7: index, %arg8: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<3x24xf32> to tensor<3x24xf32> // Check the output padding is not hoisted. - // MATMUL: %[[T8:.*]] = linalg.pad_tensor - %14 = linalg.pad_tensor %11 nofold low[%c0, %c0] high[%3, %c0] { + // MATMUL: %[[T8:.*]] = tensor.pad + %14 = tensor.pad %11 nofold low[%c0, %c0] high[%3, %c0] { ^bb0(%arg7: index, %arg8: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<5x24xf32> // Check matmul uses the padded operands. diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index b205e30213498..40defe47a1cb6 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -353,71 +353,6 @@ func @init_tensor_err(%arg0 : index) // ----- - -func @pad_result_type(%arg0: tensor, %arg1: index, %arg2: i32) -> tensor { - // expected-error @+1 {{specified type 'tensor' does not match the inferred type 'tensor}} - %0 = linalg.pad_tensor %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] { - ^bb0(%arg3: index, %arg4: index): - linalg.yield %arg2 : i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_number_of_block_args(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+1 {{expected the block to have 2 arguments}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: index, %arg3: index, %arg4: index): - linalg.yield %arg1 : i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_no_block(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_block_args(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+1 {{op expected block argument 1 to be an index}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: i32, %arg3: i32): - linalg.yield %arg1 : i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_num_yields(%arg0: tensor, %arg1: i32) -> tensor { - // expected-error @+3 {{op expected single yield operand (got 2)}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: index, %arg3: index): - linalg.yield %arg1, %arg1 : i32, i32 - } : tensor to tensor - return %0 : tensor -} - -// ----- - -func @pad_yield_type(%arg0: tensor, %arg1: i8) -> tensor { - // expected-error @+3 {{op expected yield type to match shape element type}} - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg2: index, %arg3: index): - linalg.yield %arg1 : i8 - } : tensor to tensor - return %0 : tensor -} - -// ----- - func @illegal_fill_tensor_no_return(%arg0 : index, %arg1 : index, %arg2 : f32) { %0 = linalg.init_tensor [%arg0, %arg1] : tensor diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir index 0e4c62447e507..c6a3b1eed30f1 100644 --- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir @@ -6,9 +6,9 @@ func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3xf32> { %cst = arith.constant 0.000000e+00 : f32 %0 = bufferization.to_tensor %arg0 : memref<1x28x28x1xf32> - %1 = linalg.pad_tensor %0 low[1, 1, 1, 2] high[0, 2, 2, 0] { + %1 = tensor.pad %0 low[1, 1, 1, 2] high[0, 2, 2, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32> %2 = bufferization.to_memref %1 : memref<2x31x31x3xf32> return %2 : memref<2x31x31x3xf32> @@ -25,9 +25,9 @@ func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3x // CHECK-LABEL: func @pad_tensor_no_memrefs func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> { %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[1, 2, 2] high[0, 2, 2] { + %0 = tensor.pad %arg0 low[1, 2, 2] high[0, 2, 2] { ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28xf32> to tensor<2x32x32xf32> return %0 : tensor<2x32x32xf32> } @@ -43,9 +43,9 @@ func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> { // CHECK-LABEL: func @pad_tensor_detailed func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { + %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32> return %0 : tensor<1x32x32x1xf32> } diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir index 31163f5a6be8c..36879b7254a7e 100644 --- a/mlir/test/Dialect/Linalg/pad.mlir +++ b/mlir/test/Dialect/Linalg/pad.mlir @@ -31,10 +31,10 @@ func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>, // Check statically sized matmul inputs with partially divisible sizes are padded. // MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS2]]] - // MATMUL: %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold + // MATMUL: %[[T3:.*]] = tensor.pad %[[T0]] nofold // MATMUL-SAME: [%[[C0]], %[[C0]]] // MATMUL-SAME: [%[[C0]], %[[V0]] - // MATMUL: %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold + // MATMUL: %[[T4:.*]] = tensor.pad %[[T1]] nofold // Check the statically sized matmul output with fully divisible sizes is not padded. // MATMUL: %[[T5:.*]] = linalg.matmul @@ -74,7 +74,7 @@ func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>, // Check the statically sized matmul output with partially divisible sizes is padded. // MATMUL: %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS1]]] - // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] low + // MATMUL: %[[T1:.*]] = tensor.pad %[[T0]] low // MATMUL-SAME: [%[[C0]], %[[C0]]] // MATMUL-SAME: [%[[C0]], %[[V0]] @@ -137,11 +137,11 @@ func @dynamic_sizes(%arg0: tensor, // Check all matmul operands are padded. // MATMUL: %[[V0:.*]] = affine.apply #[[MAP3]]()[%[[TS0]]] // MATMUL: %[[V1:.*]] = affine.apply #[[MAP4]]()[%[[TS2]]] - // MATMUL: %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold + // MATMUL: %[[T3:.*]] = tensor.pad %{{.*}} nofold // MATMUL-SAME: [%[[C0]], %[[C0]]] // MATMUL-SAME: [%[[V0]], %[[V1]] - // MATMUL: %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold - // MATMUL: %[[T5:.*]] = linalg.pad_tensor %{{.*}} low + // MATMUL: %[[T4:.*]] = tensor.pad %{{.*}} nofold + // MATMUL: %[[T5:.*]] = tensor.pad %{{.*}} low // Check the dynamic matmul has been erased. // MATMUL-NOT: = linalg.matmul {{.*}} tensor @@ -172,7 +172,7 @@ func @pad_multiple(%arg0: tensor<64x64xf32>, %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Check both fill operations are padded by the same pad tensor operation. - // FILL: %[[T0:.*]] = linalg.pad_tensor + // FILL: %[[T0:.*]] = tensor.pad // FILL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]]) // FILL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]) // FILL: = tensor.extract_slice %[[T2]] @@ -197,20 +197,20 @@ func @compose_padding(%arg0: tensor<64x64xf32>, // MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] // MATMUL-SAME: [0, 0] // MATMUL-SAME: [%[[SIZE]], %[[SIZE]]] - // MATMUL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] + // MATMUL: %[[T1:.*]] = tensor.pad %[[T0]] // MATMUL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]] // MATMUL: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %3 = linalg.fill(%cst, %2) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %4 = tensor.extract_slice %3[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Check there are no additional pad tensor operations. - // MATMUL-NOT: linalg.pad_tensor + // MATMUL-NOT: tensor.pad // Check the matmul directly uses the result of the fill operation. // MATMUL: %[[T4:.*]] = linalg.matmul ins(%[[T3]] @@ -233,16 +233,16 @@ func @different_padding_values(%arg0: tensor<64x64xf32>, %cst = arith.constant 42.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Different padding values prevent composing the paddings (42.0 vs. 0.0). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor return %5 : tensor @@ -258,16 +258,16 @@ func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>, %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Different dynamic sizes prevent composing the paddings (%iv0 vs %size). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor return %5 : tensor @@ -283,16 +283,16 @@ func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>, %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<64x64xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32> %3 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor // Different dynamic ranks prevent composing the paddings ([%size, %size, 1] vs [%size, %size]). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %4 = linalg.matmul ins(%3, %3 : tensor, tensor) outs(%3 : tensor) -> tensor return %4 : tensor @@ -308,16 +308,16 @@ func @different_padding_static_sizes(%arg0: tensor<62x62xf32>, %cst = arith.constant 0.0 : f32 %size = affine.min #map0()[%iv0] %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor - %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0] { + %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0] { ^bb0(%arg3: index, %arg4: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<62x62xf32> %2 = linalg.fill(%cst, %1) : f32, tensor<62x62xf32> -> tensor<62x62xf32> %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor // Different static sizes prevent composing the paddings (62 vs 64 derived from #map0). // MATMUL: = linalg.fill - // MATMUL: = linalg.pad_tensor + // MATMUL: = tensor.pad // MATMUL: = linalg.matmul %5 = linalg.matmul ins(%4, %4 : tensor, tensor) outs(%4 : tensor) -> tensor return %5 : tensor @@ -336,7 +336,7 @@ func @scalar_operand(%arg0: f32, %0 = affine.min #map0()[%iv0] // FILL: %[[T0:.*]] = tensor.extract_slice %[[ARG1]] - // FILL: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold + // FILL: %[[T1:.*]] = tensor.pad %[[T0]] nofold %1 = tensor.extract_slice %arg1[0, 0] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32> // Check only the fill output operand is padded. @@ -361,8 +361,8 @@ func @static_extract_slice_missing(%arg0: tensor<24x12xf32>, %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor // Check the matmul inputs are padded despite the missing slice for the static output. - // MATMUL: %[[T0:.*]] = linalg.pad_tensor - // MATMUL: %[[T1:.*]] = linalg.pad_tensor + // MATMUL: %[[T0:.*]] = tensor.pad + // MATMUL: %[[T1:.*]] = tensor.pad // MATMUL: = linalg.matmul ins(%[[T0]], %[[T1]] // MATMUL-SAME: outs(%[[ARG2]] %3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32> @@ -414,8 +414,8 @@ func @static_input_padding_only(%arg0: tensor<24x12xf32>, %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32> // Check the matmul inputs are padded despite the failure to compute a padding value for the static output. - // INPUTS-ONLY: %[[T1:.*]] = linalg.pad_tensor - // INPUTS-ONLY: %[[T2:.*]] = linalg.pad_tensor + // INPUTS-ONLY: %[[T1:.*]] = tensor.pad + // INPUTS-ONLY: %[[T2:.*]] = tensor.pad // INPUTS-ONLY: = linalg.matmul ins(%[[T1]], %[[T2]] // INPUTS-ONLY-SAME: outs(%[[T0]] %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> @@ -465,7 +465,7 @@ func @rank_reducing(%arg0: tensor<1x64x1x64xf32>, %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [1, %size, 1, %size] [1, 1, 1, 1] : tensor<1x64x1x64xf32> to tensor<1x?x?xf32> // Check the fill is padded despite the rank-reducing slice operation. - // FILL: %[[T0:.*]] = linalg.pad_tensor + // FILL: %[[T0:.*]] = tensor.pad // FILL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]]) // FILL-SAME: tensor<1x64x64xf32> // FILL: = tensor.extract_slice %[[T1]] diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir index 7f6bd150f3de9..90e6381f6f16a 100644 --- a/mlir/test/Dialect/Linalg/pad_fusion.mlir +++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir @@ -15,9 +15,9 @@ func @dynamic_pad_fusion(%arg0 : tensor, %arg1 : index, %arg2 : index, %1 = arith.mulf %arg6, %arg6 : f32 linalg.yield %1 : f32 } -> tensor - %1 = linalg.pad_tensor %0 low [%arg1, %arg2] high [%arg3, %arg4] { + %1 = tensor.pad %0 low [%arg1, %arg2] high [%arg3, %arg4] { ^bb0(%arg6: index, %arg7 : index): - linalg.yield %arg5 : f32 + tensor.yield %arg5 : f32 } : tensor to tensor return %1 : tensor } @@ -64,9 +64,9 @@ func @mixed_pad_fusion(%arg0 : tensor, %arg1 : index, %arg2 : index, %1 = arith.mulf %arg4, %arg4 : f32 linalg.yield %1 : f32 } -> tensor<42x?xf32> - %1 = linalg.pad_tensor %0 low [3, %arg1] high [4, %arg2] { + %1 = tensor.pad %0 low [3, %arg1] high [4, %arg2] { ^bb0(%arg4: index, %arg5 : index): - linalg.yield %arg3 : f32 + tensor.yield %arg3 : f32 } : tensor<42x?xf32> to tensor<49x?xf32> return %1 : tensor<49x?xf32> } diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir index 9e0a672252104..27f014ed66147 100644 --- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir +++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir @@ -253,9 +253,9 @@ func @dim_of_pad_op(%arg0 : tensor<2x?x?xf32>, %arg1 : index, %arg2 : index, %c3 = arith.constant 3 : index %c4 = arith.constant 4 : index %c5 = arith.constant 5 : index - %0 = linalg.pad_tensor %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] { + %0 = tensor.pad %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] { ^bb0(%arg4: index, %arg5: index, %arg6: index): - linalg.yield %arg3 : f32 + tensor.yield %arg3 : f32 } : tensor<2x?x?xf32> to tensor %1 = tensor.dim %0, %c0 : tensor %2 = tensor.dim %0, %c1 : tensor diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 9e8dfb292032f..337b7c0ad2b7e 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -15,77 +15,6 @@ // CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> // CHECK-DAG: #[[$strided3DT:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)> -func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, - %pad_value: f32) -> tensor<6x?x?x?xf32> { - %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %pad_value : f32 - } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - return %0 : tensor<6x?x?x?xf32> -} -// CHECK-LABEL: func @pad_dynamic -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] -// CHECK-SAME: low[2, %[[LOW]], 3, 3] -// CHECK-SAME: high[3, 3, %[[HIGH]], 2] -// CHECK: : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - -// ----- - -func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> { - %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] { - ^bb0(%arg1 : index, %arg2 : index): - linalg.yield %pad_value : f32 - } : tensor<3x4xf32> to tensor<6x9xf32> - return %0 : tensor<6x9xf32> -} -// CHECK-LABEL: func @pad_static -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] low[1, 2] high[2, 3] -// CHECK: : tensor<3x4xf32> to tensor<6x9xf32> - -// ----- - -func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index, - %pad_value: f32) -> tensor { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor<2x3xf32> to tensor - return %0 : tensor -} -// CHECK-LABEL: func @pad_asymmetrical -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] -// CHECK-SAME: low[0, 0] -// CHECK-SAME: high[%[[UB0]], %[[UB1]]] -// CHECK: : tensor<2x3xf32> to tensor - -// ----- - -func @pad_to_static_size(%arg0: tensor, %ub0: index, %ub1: index, - %pad_value: f32) -> tensor<2x3xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] { - ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 - } : tensor to tensor<2x3xf32> - return %0 : tensor<2x3xf32> -} -// CHECK-LABEL: func @pad_to_static_size -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] -// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] -// CHECK: linalg.pad_tensor %[[ARG0]] -// CHECK-SAME: low[0, 0] -// CHECK-SAME: high[%[[UB0]], %[[UB1]]] -// CHECK: : tensor to tensor<2x3xf32> - -// ----- - func @views(%arg0: index) { %c0 = arith.constant 0 : index %0 = arith.muli %arg0, %arg0 : index diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir index a8e26baa2bded..64bb9d1ea9eff 100644 --- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir +++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir @@ -6,9 +6,9 @@ // CHECK: return %[[RESULT]] func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<2x1xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> %1 = tensor.extract_slice %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32> return %1 : tensor<2x1xf32> @@ -18,16 +18,16 @@ func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK-LABEL: @static_high_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x4xf32> func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<2x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> %1 = tensor.extract_slice %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32> return %1 : tensor<2x4xf32> @@ -37,16 +37,16 @@ func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK-LABEL: @static_low_pad_only // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<2x3xf32> func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<2x3xf32> { - %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + %0 = tensor.pad %arg0 low[3, 7] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> %1 = tensor.extract_slice %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32> return %1 : tensor<2x3xf32> @@ -56,16 +56,16 @@ func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK-LABEL: @static_low_pad_only_2 // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-NOT: tensor.extract_slice // CHECK: %[[RESULT:.*]] = tensor.generate // CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<1x3xf32> func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<1x3xf32> { - %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + %0 = tensor.pad %arg0 low[3, 7] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> %1 = tensor.extract_slice %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32> return %1 : tensor<1x3xf32> @@ -75,16 +75,16 @@ func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK-LABEL: @static_mixed_data_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[1, 3] -// CHECK: linalg.yield %[[PAD]] +// CHECK: %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[1, 3] +// CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<3x4xf32> func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<11x13xf32> %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32> return %1 : tensor<3x4xf32> @@ -94,16 +94,16 @@ func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK-LABEL: @static_mixed_data_low_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32> -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0] -// CHECK: linalg.yield %[[PAD]] +// CHECK: %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[1, 3] high[0, 0] +// CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<3x4xf32> func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] { + %0 = tensor.pad %arg0 low[3, 7] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<14x20xf32> %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32> return %1 : tensor<3x4xf32> @@ -113,15 +113,15 @@ func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK-LABEL: @static_mixed_data_low_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor -// CHECK: %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] low[1, 1] high[2, 3] -// CHECK: linalg.yield %[[PAD]] +// CHECK-NOT: tensor.pad +// CHECK: %[[RESULT:.*]] = tensor.pad %[[ARG0]] low[1, 1] high[2, 3] +// CHECK: tensor.yield %[[PAD]] // CHECK: return %[[RESULT]] : tensor<7x9xf32> func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) -> tensor<7x9xf32> { - %0 = linalg.pad_tensor %arg0 low[2, 3] high[7, 8] { + %0 = tensor.pad %arg0 low[2, 3] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor<4x5xf32> to tensor<13x16xf32> %1 = tensor.extract_slice %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32> return %1 : tensor<7x9xf32> @@ -131,7 +131,7 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK-LABEL: @dynamic_high_pad // CHECK-SAME: %[[ARG0:.*]]: tensor -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<3x4xf32>) { @@ -139,14 +139,14 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32) // CHECK: scf.yield %[[GEN]] // CHECK: } else { // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor -// CHECK: %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] +// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] // CHECK: scf.yield %[[PADTENSOR]] // CHECK: } // CHECK: return %[[RESULT]] func @dynamic_high_pad(%arg0 : tensor, %h1: index, %pad : f32) -> tensor<3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[%h1, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[%h1, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor to tensor<3x4xf32> return %1 : tensor<3x4xf32> @@ -156,7 +156,7 @@ func @dynamic_high_pad(%arg0 : tensor, %h1: index, %pad : f32) -> tenso // CHECK-LABEL: @dynamic_extract_size // CHECK-SAME: %[[ARG0:.*]]: tensor, %[[ARG1:.*]]: index -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor) { @@ -164,14 +164,14 @@ func @dynamic_high_pad(%arg0 : tensor, %h1: index, %pad : f32) -> tenso // CHECK: scf.yield %[[GEN]] // CHECK: } else { // CHECK: %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor to tensor -// CHECK: %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] +// CHECK: %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3] // CHECK: scf.yield %[[PADTENSOR]] // CHECK: } // CHECK: return %[[RESULT]] func @dynamic_extract_size(%arg0 : tensor, %s1: index, %pad : f32) -> tensor { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[2, 4] [%s1, 4] [1, 1] : tensor to tensor return %1 : tensor @@ -184,14 +184,14 @@ func @dynamic_extract_size(%arg0 : tensor, %s1: index, %pad : f32) -> t // CHECK: tensor.generate // CHECK: else // CHECK: %[[SLICE:.*]] = tensor.extract_slice -// CHECK: linalg.pad_tensor %[[SLICE]] low[0, 0] +// CHECK: tensor.pad %[[SLICE]] low[0, 0] func @dynamic_zero_low_padding(%arg0 : tensor, %pad : f32, %o1 : index, %o2 : index, %s1 : index, %s2 : index) -> tensor { - %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] { + %0 = tensor.pad %arg0 low[0, 0] high[7, 8] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor to tensor return %1 : tensor @@ -204,14 +204,14 @@ func @dynamic_zero_low_padding(%arg0 : tensor, %pad : f32, // CHECK: tensor.generate // CHECK: else // CHECK: %[[SLICE:.*]] = tensor.extract_slice -// CHECK: linalg.pad_tensor %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0] +// CHECK: tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0] func @dynamic_zero_high_padding(%arg0 : tensor, %pad : f32, %o1 : index, %o2 : index, %s1 : index, %s2 : index) -> tensor { - %0 = linalg.pad_tensor %arg0 low[7, 8] high[0, 0] { + %0 = tensor.pad %arg0 low[7, 8] high[0, 0] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad : f32 + tensor.yield %pad : f32 } : tensor to tensor %1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor to tensor return %1 : tensor diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir index b60c8c466f154..ac94261a153f0 100644 --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -288,7 +288,7 @@ func @conv_tensors_dynamic(%input: tensor, %filter: tensor, %large_input: tensor<64 %d0 = tensor.dim %large_input, %c0 : tensor<64x128xf32> %d1 = tensor.dim %large_input, %c1 : tensor<64x128xf32> - %pad = linalg.pad_tensor %small_input low[4, 60] high[2, 67] { + %pad = tensor.pad %small_input low[4, 60] high[2, 67] { ^bb0(%arg0: index, %arg1: index): - linalg.yield %zero : f32 + tensor.yield %zero : f32 } : tensor<58x1xf32> to tensor<64x128xf32> %fill = linalg.fill(%zero, %large_input) : f32, tensor<64x128xf32> -> tensor<64x128xf32> diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir index a83793544078c..a8dfdd940673a 100644 --- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir +++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir @@ -23,7 +23,7 @@ // TILE2: tensor.generate // TILE2: else // TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] +// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] // TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // TILE2: return %[[RESULT]] @@ -43,15 +43,15 @@ // TILE1: tensor.generate // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] +// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] // TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] // TILE1: return %[[RESULT]] func @dynamic_pad_tensor(%input_tensor: tensor, %pad_value: f32) -> tensor { - %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor to tensor return %0 : tensor } @@ -71,7 +71,7 @@ func @dynamic_pad_tensor(%input_tensor: tensor, // TILE2: tensor.generate // TILE2: else // TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] +// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] // TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] // TILE2: return %[[RESULT]] @@ -86,15 +86,15 @@ func @dynamic_pad_tensor(%input_tensor: tensor, // TILE1: tensor.generate // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] +// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] // TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] // TILE1: return %[[RESULT]] func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, %pad_value: f32) -> tensor<15x16xf32> { - %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<15x16xf32> return %0 : tensor<15x16xf32> } @@ -112,7 +112,7 @@ func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, // TILE1: scf.yield %[[GEN]] : tensor<14x3xf32> // TILE1: else // TILE1: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> -// TILE1: %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[0, 0] high[7, %{{.*}}] +// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] // TILE1: scf.yield %[[PAD]] : tensor<14x3xf32> // TILE1: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> // TILE1: scf.yield %[[R3]] : tensor<14x15xf32> @@ -120,9 +120,9 @@ func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>, %output_tensor: tensor<14x15xf32>, %pad_value: f32) -> tensor<14x15xf32> { - %0 = linalg.pad_tensor %input_tensor low[0, 0] high[7, 6] { + %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<7x9xf32> to tensor<14x15xf32> return %0 : tensor<14x15xf32> } diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index ee3ec0019a840..c9f50af28ef27 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -537,7 +537,7 @@ func @matmul_tensors( // CHECK-LABEL: func @pad_static( // CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32> @@ -547,9 +547,9 @@ func @matmul_tensors( // CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32> // CHECK: return %[[RESULT]] func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] { + %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<2x?x2xf32> to tensor<2x3x4xf32> return %0 : tensor<2x3x4xf32> } @@ -558,7 +558,7 @@ func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> // CHECK-LABEL: func @pad_static_source( // CHECK-SAME: %[[ARG0:.*]]: tensor<2x5x2xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK: %[[INIT:.*]] = linalg.init_tensor [2, 6, 4] : tensor<2x6x4xf32> @@ -568,9 +568,9 @@ func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> // CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x5x2xf32>, tensor<2x6x4xf32> // CHECK: return %[[WRITE]] func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6x4xf32> { - %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] { + %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { ^bb0(%arg1: index, %arg2: index, %arg3: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<2x5x2xf32> to tensor<2x6x4xf32> return %0 : tensor<2x6x4xf32> } @@ -579,7 +579,7 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6 // CHECK-LABEL: func @pad_static_dynamic( // CHECK-SAME: %[[SRC:.*]]: tensor<1x2x2x?xf32>, %[[LOW:.*]]: index, %[[HIGH:.*]]: index -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index @@ -596,9 +596,9 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6 // CHECK: return %[[RESULT]] func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, %pad_value: f32) -> tensor<6x?x?x?xf32> { - %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { + %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - linalg.yield %pad_value : f32 + tensor.yield %pad_value : f32 } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> return %0 : tensor<6x?x?x?xf32> } @@ -607,7 +607,7 @@ func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, // CHECK-LABEL: func @pad_and_transfer_read // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 // CHECK: %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> @@ -616,9 +616,9 @@ func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> { %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 %c6 = arith.constant 6.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] { + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { ^bb0(%arg1: index, %arg2: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<5x6xf32> to tensor<10x13xf32> %1 = vector.transfer_read %0[%c0, %c0], %c6 : tensor<10x13xf32>, vector<7x9xf32> @@ -631,7 +631,7 @@ func private @make_vector() -> vector<7x9xf32> // CHECK-LABEL: func @pad_and_transfer_write_static // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> // CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32> @@ -640,9 +640,9 @@ func @pad_and_transfer_write_static( %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] { + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { ^bb0(%arg2: index, %arg3: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<5x6xf32> to tensor<10x13xf32> %1 = call @make_vector() : () -> vector<7x9xf32> %2 = vector.transfer_write %1, %0[%c0, %c0] @@ -657,7 +657,7 @@ func private @make_vector() -> vector<7x9xf32> // CHECK-LABEL: func @pad_and_transfer_write_dynamic_static // CHECK-SAME: %[[ARG0:.*]]: tensor, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor // CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> @@ -669,9 +669,9 @@ func @pad_and_transfer_write_dynamic_static( %c5 = arith.constant 5.0 : f32 %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1] : tensor to tensor - %0 = linalg.pad_tensor %s low[0, 0] high[%padding, 7] { + %0 = tensor.pad %s low[0, 0] high[%padding, 7] { ^bb0(%arg2: index, %arg3: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor to tensor %1 = call @make_vector() : () -> vector<7x9xf32> %2 = vector.transfer_write %1, %0[%c0, %c0] @@ -686,7 +686,7 @@ func private @make_vector() -> tensor<12x13xf32> // CHECK-LABEL: func @pad_and_insert_slice_source // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 // CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32> @@ -697,9 +697,9 @@ func @pad_and_insert_slice_source( %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0] high[2, 3] { + %0 = tensor.pad %arg0 low[0, 0] high[2, 3] { ^bb0(%arg2: index, %arg3: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<5x6xf32> to tensor<7x9xf32> %1 = call @make_vector() : () -> tensor<12x13xf32> %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> @@ -717,9 +717,9 @@ func private @make_vector() -> tensor<12x13xf32> func @pad_and_insert_slice_dest( %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[0, 0, 0] high[0, 7, 7] { + %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] { ^bb0(%arg2: index, %arg3: index, %arg4: index): - linalg.yield %c5 : f32 + tensor.yield %c5 : f32 } : tensor<1x5x6xf32> to tensor<1x12x13xf32> %1 = call @make_vector() : () -> tensor<12x13xf32> %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> @@ -730,7 +730,7 @@ func @pad_and_insert_slice_dest( // CHECK-LABEL: func @pad_tensor_non_const_pad_value // CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: linalg.pad_tensor +// CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index @@ -743,14 +743,14 @@ func @pad_and_insert_slice_dest( func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { %c0 = arith.constant 0 : index %c5 = arith.constant 5.0 : f32 - %0 = linalg.pad_tensor %arg0 low[3, 4] high[4, 3] { + %0 = tensor.pad %arg0 low[3, 4] high[4, 3] { ^bb0(%arg1: index, %arg2: index): %i1 = arith.index_cast %arg1 : index to i32 %i2 = arith.index_cast %arg2 : index to i32 %f1 = arith.sitofp %i1 : i32 to f32 %f2 = arith.sitofp %i2 : i32 to f32 %m = arith.mulf %f1, %f2 : f32 - linalg.yield %m : f32 + tensor.yield %m : f32 } : tensor<5x6xf32> to tensor<12x13xf32> return %0 : tensor<12x13xf32> } diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 82f880d098fdc..10d39132a1126 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -982,3 +982,199 @@ func @fold_rank() -> (index) { // CHECK-NEXT: return [[C3]] return %rank_0 : index } + +// ----- + +// CHECK-LABEL: func @pad_tensor_same_static_shape( +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK: return %[[ARG0]] +func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) + -> tensor<5x6xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.pad %arg0 low[%a, 0] high[0, %a] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor<5x6xf32> to tensor<5x6xf32> + return %0 : tensor<5x6xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape( +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK: %[[PAD:.*]] = tensor.pad +// CHECK: return %[[PAD]] +func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index) + -> tensor<5x6xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.pad %arg0 nofold low[%a, 0] high[0, %a] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor<5x6xf32> to tensor<5x6xf32> + return %0 : tensor<5x6xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_after_cast_different_shape( +// CHECK-SAME: %[[INPUT:.*]]: tensor) -> tensor { +// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PADDED:.*]] = tensor.pad %[[INPUT]] +// CHECK-SAME: low[0, 0, 1, 1] high[0, 0, 1, 1] { +// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): +// CHECK: tensor.yield %[[CST]] : f32 +// CHECK: } : tensor to tensor +// CHECK: %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] : +// CHECK-SAME: tensor to tensor +// CHECK: return %[[DYNAMIC]] : tensor +// CHECK: } +func @pad_tensor_after_cast_different_shape(%arg0: tensor) + -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %dynamic = tensor.cast %arg0 : tensor to tensor + %padded = tensor.pad %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst: f32 + } : tensor to tensor + return %padded: tensor +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_after_cast_same_shape( +// CHECK-SAME: %[[INPUT:.*]]: tensor, +// CHECK-SAME: %[[PADDING:.*]]: index) -> tensor { +// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[PADDED:.*]] = tensor.pad %[[INPUT]] +// CHECK-SAME: low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1] { +// CHECK: ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index): +// CHECK: tensor.yield %[[CST]] : f32 +// CHECK: } : tensor to tensor +// CHECK: return %[[PADDED:.*]] : tensor +// CHECK: } +func @pad_tensor_after_cast_same_shape(%arg0: tensor, %padding : index) + -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + %dynamic = tensor.cast %arg0 : tensor to tensor + %padded = tensor.pad %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %cst: f32 + } : tensor to tensor + return %padded: tensor +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_of_cast( +// CHECK-NOT: tensor.cast +// CHECK: tensor.pad +// CHECK: tensor<8x?xf32> to tensor<8x32xf32> +func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> { + %c0 = arith.constant 0 : index + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.cast %t : tensor<8x?xf32> to tensor + %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %s] { + ^bb0(%arg9: index, %arg10: index): + tensor.yield %cst : f32 + } : tensor to tensor<8x32xf32> + return %1 : tensor<8x32xf32> +} + +// ----- + +// CHECK-LABEL: @cast_of_pad_more_static +func @cast_of_pad_more_static(%arg0: tensor, %padding: index) -> tensor<32x32xf32> { + %cst = arith.constant 0.000000e+00 : f32 + // CHECK: %[[PAD:.*]] = tensor.pad + // CHECK: tensor to tensor<32x32xf32> + %padded = tensor.pad %arg0 low[%padding, %padding] high[0, 0] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor + // CHECK-NOT: tensor.cast + %casted = tensor.cast %padded : tensor to tensor<32x32xf32> + // CHECK: return %[[PAD]] + return %casted : tensor<32x32xf32> +} + +// ----- + +// CHECK-LABEL: @cast_of_pad_less_static +func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 + // CHECK: tensor.pad + %padded = tensor.pad %arg0 low[%padding, %padding, %padding] high[0, 0, 0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %cst : f32 + } : tensor<32x?x?xf32> to tensor<32x?x?xf32> + // CHECK: %[[CAST:.*]] = tensor.cast + %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor + // CHECK: return %[[CAST]] + return %casted : tensor +} + +// ----- + +func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> { + %c0 = arith.constant 0 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor + %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %c0] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor<4x4xf32> + return %1 : tensor<4x4xf32> +} +// CHECK-LABEL: @tensor_pad_cast +// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32> +// CHECK: return %[[ARG0]] + +// ----- + +// CHECK-LABEL: func @fold_pad_tensor_source_cast( +// CHECK-SAME: %[[ARG0:.*]]: tensor<4x?xf32> +// CHECK-NOT: tensor.cast +// CHECK: %[[RESULT:.*]] = tensor.pad %[[ARG0]] +func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> { + %cst = arith.constant 0.0 : f32 + %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor + %1 = tensor.pad %0 low[0, 0] high[0, 1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %cst : f32 + } : tensor to tensor<4x4xf32> + return %1 : tensor<4x4xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_static_zero_cast( +// CHECK-SAME: %[[ARG0:.*]]: tensor +// CHECK-NOT: tensor.pad +// CHECK: %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor to tensor<2x3x4xf32> +// CHECK: return %[[RESULT]] +func @pad_static_zero_cast(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { + %c0 = arith.constant 0 : index + %0 = tensor.pad %arg0 low[0, %c0, 0] high[0, 0, %c0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %pad_value : f32 + } : tensor to tensor<2x3x4xf32> + + return %0 : tensor<2x3x4xf32> +} + +// ----- + +// CHECK-LABEL: func @pad_nofold_static_zero( +// CHECK-SAME: %[[ARG0:.*]]: tensor +// CHECK: %[[PAD:.*]] = tensor.pad +// CHECK: return %[[PAD]] +func @pad_nofold_static_zero(%arg0: tensor, %pad_value: f32) -> tensor<2x3x4xf32> { + %c0 = arith.constant 0 : index + %0 = tensor.pad %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %pad_value : f32 + } : tensor to tensor<2x3x4xf32> + + return %0 : tensor<2x3x4xf32> +} diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir index 8cdab35fb5e20..cec4718595a45 100644 --- a/mlir/test/Dialect/Tensor/invalid.mlir +++ b/mlir/test/Dialect/Tensor/invalid.mlir @@ -317,3 +317,58 @@ func @illegal_num_offsets(%arg0 : tensor, %arg1 : tensor, %0 = tensor.insert_slice %arg0 into %arg1[0, 0] [%arg2, %arg3] [1, 1] : tensor into tensor return } + +// ----- + + +func @pad_result_type(%arg0: tensor, %arg1: index, %arg2: i32) -> tensor { + // expected-error @+1 {{specified type 'tensor' does not match the inferred type 'tensor}} + %0 = tensor.pad %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] { + ^bb0(%arg3: index, %arg4: index): + tensor.yield %arg2 : i32 + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_number_of_block_args(%arg0: tensor, %arg1: i32) -> tensor { + // expected-error @+1 {{expected the block to have 2 arguments}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg2: index, %arg3: index, %arg4: index): + tensor.yield %arg1 : i32 + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_no_block(%arg0: tensor, %arg1: i32) -> tensor { + // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_block_args(%arg0: tensor, %arg1: i32) -> tensor { + // expected-error @+1 {{op expected block argument 1 to be an index}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg2: i32, %arg3: i32): + tensor.yield %arg1 : i32 + } : tensor to tensor + return %0 : tensor +} + +// ----- + +func @pad_yield_type(%arg0: tensor, %arg1: i8) -> tensor { + // expected-error @+1 {{op expected yield type to match shape element type}} + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %arg1 : i8 + } : tensor to tensor + return %0 : tensor +} + diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir index d461dffeb6d5b..a76a18d190b57 100644 --- a/mlir/test/Dialect/Tensor/ops.mlir +++ b/mlir/test/Dialect/Tensor/ops.mlir @@ -176,3 +176,77 @@ func @rank(%t : tensor<4x4x?xf32>) { %1 = tensor.rank %t : tensor<4x4x?xf32> return } + +// ----- + +func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, + %pad_value: f32) -> tensor<6x?x?x?xf32> { + %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %pad_value : f32 + } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + return %0 : tensor<6x?x?x?xf32> +} +// CHECK-LABEL: func @pad_dynamic +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] +// CHECK-SAME: low[2, %[[LOW]], 3, 3] +// CHECK-SAME: high[3, 3, %[[HIGH]], 2] +// CHECK: : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + +// ----- + +func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> { + %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { + ^bb0(%arg1 : index, %arg2 : index): + tensor.yield %pad_value : f32 + } : tensor<3x4xf32> to tensor<6x9xf32> + return %0 : tensor<6x9xf32> +} +// CHECK-LABEL: func @pad_static +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] low[1, 2] high[2, 3] +// CHECK: : tensor<3x4xf32> to tensor<6x9xf32> + +// ----- + +func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index, + %pad_value: f32) -> tensor { + %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<2x3xf32> to tensor + return %0 : tensor +} +// CHECK-LABEL: func @pad_asymmetrical +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] +// CHECK-SAME: low[0, 0] +// CHECK-SAME: high[%[[UB0]], %[[UB1]]] +// CHECK: : tensor<2x3xf32> to tensor + +// ----- + +func @pad_to_static_size(%arg0: tensor, %ub0: index, %ub1: index, + %pad_value: f32) -> tensor<2x3xf32> { + %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor to tensor<2x3xf32> + return %0 : tensor<2x3xf32> +} +// CHECK-LABEL: func @pad_to_static_size +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]] +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]] +// CHECK: tensor.pad %[[ARG0]] +// CHECK-SAME: low[0, 0] +// CHECK-SAME: high[%[[UB0]], %[[UB1]]] +// CHECK: : tensor to tensor<2x3xf32> + +// ----- + diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir index 58a4b7630dcc1..d840491b89984 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir @@ -21,9 +21,9 @@ func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: tensor to tensor<2xf32> %10 = tensor.cast %9 : tensor<2xf32> to tensor - %11 = linalg.pad_tensor %10 low[%c0] high[%c0] { + %11 = tensor.pad %10 low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<2xf32> %12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor scf.yield %12 : tensor @@ -38,9 +38,9 @@ func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: tensor to tensor<2xf32> %10 = tensor.cast %9 : tensor<2xf32> to tensor - %11 = linalg.pad_tensor %10 low[%c0] high[%c0] { + %11 = tensor.pad %10 low[%c0] high[%c0] { ^bb0(%arg5: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor to tensor<2xf32> %12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor scf.yield %12 : tensor diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir index 05b7e1a7d2cac..ced7a49073b37 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir @@ -13,9 +13,9 @@ func @main() { %offset = arith.constant 2 : index %cst = arith.constant 2.3 : f32 %c0 = arith.constant 0 : index - %out = linalg.pad_tensor %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset] { + %out = tensor.pad %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset] { ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index): - linalg.yield %cst : f32 + tensor.yield %cst : f32 } : tensor<1x?x3xf32> to tensor<1x?x?xf32> %unranked = tensor.cast %out: tensor<1x?x?xf32> to tensor<*xf32> call @print_memref_f32(%unranked) : (tensor<*xf32>) -> () diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index 76d983d3892e4..eef99f82fcb3f 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -42,6 +42,7 @@ struct TestLinalgTransforms memref::MemRefDialect, scf::SCFDialect, StandardOpsDialect, + linalg::LinalgDialect, vector::VectorDialect, gpu::GPUDialect>(); // clang-format on @@ -549,20 +550,20 @@ static void applyLinalgToVectorPatterns(FuncOp funcOp) { funcOp.getContext(), LinalgTransformationFilter() .addOpFilter()); - populatePadTensorOpVectorizationPatterns(patterns); + populatePadOpVectorizationPatterns(patterns); populateConvolutionVectorizationPatterns(patterns); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } static void applyPadTensorToGenericPatterns(FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); - patterns.add(funcOp.getContext()); + patterns.add(funcOp.getContext()); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } static void applyGeneralizePadTensorPatterns(FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); - patterns.add(funcOp.getContext()); + patterns.add(funcOp.getContext()); (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index cd4d2e195d03a..e6b4654097d2c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4280,6 +4280,7 @@ td_library( ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", + ":TilingInterfaceTdFiles", ":ViewLikeInterfaceTdFiles", ], ) @@ -4336,6 +4337,7 @@ cc_library( ":StandardOps", ":Support", ":TensorOpsIncGen", + ":TilingInterface", ":ViewLikeInterface", "//llvm:Support", ], @@ -4356,6 +4358,38 @@ cc_library( ], ) +cc_library( + name = "TensorTilingInterfaceImpl", + srcs = ["lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp"], + hdrs = ["include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"], + includes = ["include"], + deps = [ + ":Affine", + ":IR", + ":LinalgOps", + ":SCFDialect", + ":StandardOps", + ":TensorDialect", + ":TilingInterface", + "//llvm:Support", + ], +) + +cc_library( + name = "TensorUtils", + srcs = ["lib/Dialect/Tensor/Utils/Utils.cpp"], + hdrs = ["include/mlir/Dialect/Tensor/Utils/Utils.h"], + includes = ["include"], + deps = [ + ":Affine", + ":ArithmeticDialect", + ":IR", + ":Support", + ":TensorDialect", + "//llvm:Support", + ], +) + gentbl_cc_library( name = "TensorPassIncGen", strip_include_prefix = "include", @@ -5634,6 +5668,7 @@ cc_library( ":StandardToSPIRV", ":TensorDialect", ":TensorInferTypeOpInterfaceImpl", + ":TensorTilingInterfaceImpl", ":TensorTransforms", ":TosaDialect", ":TosaToLinalg", @@ -6913,6 +6948,7 @@ cc_library( ":Support", ":TensorBufferizableOpInterfaceImpl", ":TensorDialect", + ":TensorUtils", ":TransformUtils", ":VectorBufferizableOpInterfaceImpl", ":VectorOps", @@ -6952,7 +6988,6 @@ cc_library( deps = [ ":IR", ":Support", - ":TensorDialect", ":TilingInterfaceIncGen", ":ViewLikeInterface", "//llvm:Support", @@ -7255,6 +7290,7 @@ cc_library( ":SCFDialect", ":StandardOps", ":TensorDialect", + ":TensorUtils", ":TosaDialect", ":Transforms", ], From 3c90ae5d0b7137315e1c60734bc32b4356f987d4 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Fri, 21 Jan 2022 20:34:17 +0100 Subject: [PATCH 206/946] Revert "[flang] Update tco tool pipline and add translation to LLVM IR" This reverts commit 68db0e25df4b1edaa2c6080eb88453ab01ea01d3. --- .../include/flang/Optimizer/CodeGen/CodeGen.h | 10 +- .../include/flang/Optimizer/Support/InitFIR.h | 12 +- flang/include/flang/Tools/CLOptions.inc | 160 ------------------ flang/lib/Optimizer/CodeGen/CodeGen.cpp | 37 ---- flang/lib/Optimizer/Support/CMakeLists.txt | 1 - flang/lib/Optimizer/Support/InitFIR.cpp | 20 --- flang/test/Fir/basic-program.fir | 12 -- flang/tools/tco/CMakeLists.txt | 20 +-- flang/tools/tco/tco.cpp | 45 +---- 9 files changed, 15 insertions(+), 302 deletions(-) delete mode 100644 flang/include/flang/Tools/CLOptions.inc delete mode 100644 flang/lib/Optimizer/Support/InitFIR.cpp delete mode 100644 flang/test/Fir/basic-program.fir diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h index 939d6aebb524d..1bd31b207859a 100644 --- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h +++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h @@ -12,8 +12,6 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassRegistry.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/raw_ostream.h" #include namespace fir { @@ -38,13 +36,9 @@ std::unique_ptr> createFirTargetRewritePass( /// Convert FIR to the LLVM IR dialect std::unique_ptr createFIRToLLVMPass(); -using LLVMIRLoweringPrinter = - std::function; /// Convert the LLVM IR dialect to LLVM-IR proper -std::unique_ptr createLLVMDialectToLLVMPass( - llvm::raw_ostream &output, - LLVMIRLoweringPrinter printer = - [](llvm::Module &m, llvm::raw_ostream &out) { m.print(out, nullptr); }); +std::unique_ptr +createLLVMDialectToLLVMPass(llvm::raw_ostream &output); // declarative passes #define GEN_PASS_REGISTRATION diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index 2e8c1685a06f7..e78967de2a383 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -13,6 +13,7 @@ #ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H #define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H +#include "flang/Optimizer/CodeGen/CodeGen.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "mlir/Conversion/Passes.h" #include "mlir/Dialect/Affine/Passes.h" @@ -34,19 +35,11 @@ namespace fir::support { #define FLANG_DIALECT_LIST \ FLANG_NONCODEGEN_DIALECT_LIST, FIRCodeGenDialect, mlir::LLVM::LLVMDialect -inline void registerNonCodegenDialects(mlir::DialectRegistry ®istry) { - registry.insert(); -} - /// Register all the dialects used by flang. inline void registerDialects(mlir::DialectRegistry ®istry) { registry.insert(); } -inline void loadNonCodegenDialects(mlir::MLIRContext &context) { - context.loadDialect(); -} - /// Forced load of all the dialects used by flang. Lowering is not an MLIR /// pass, but a producer of FIR and MLIR. It is therefore a requirement that the /// dialects be preloaded to be able to build the IR. @@ -82,9 +75,6 @@ inline void registerMLIRPassesForFortranTools() { mlir::registerConvertAffineToStandardPass(); } -/// Register the interfaces needed to lower to LLVM IR. -void registerLLVMTranslation(mlir::MLIRContext &context); - } // namespace fir::support #endif // FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc deleted file mode 100644 index 1c85075d5cc17..0000000000000 --- a/flang/include/flang/Tools/CLOptions.inc +++ /dev/null @@ -1,160 +0,0 @@ -//===-- CLOptions.inc -- command line options -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// This file defines some shared command-line options that can be used when -/// debugging the test tools. This file must be included into the tool. - -#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/Passes.h" -#include "flang/Optimizer/CodeGen/CodeGen.h" -#include "flang/Optimizer/Transforms/Passes.h" -#include "llvm/Support/CommandLine.h" - -#define DisableOption(DOName, DOOption, DODescription) \ - static llvm::cl::opt disable##DOName("disable-" DOOption, \ - llvm::cl::desc("disable " DODescription " pass"), llvm::cl::init(false), \ - llvm::cl::Hidden) - -/// Shared option in tools to control whether dynamically sized array -/// allocations should always be on the heap. -static llvm::cl::opt dynamicArrayStackToHeapAllocation( - "fdynamic-heap-array", - llvm::cl::desc("place all array allocations of dynamic size on the heap"), - llvm::cl::init(false), llvm::cl::Hidden); - -/// Shared option in tools to set a maximum value for the number of elements in -/// a compile-time sized array that can be allocated on the stack. -static llvm::cl::opt arrayStackAllocationThreshold( - "fstack-array-size", - llvm::cl::desc( - "place all array allocations more than elements on the heap"), - llvm::cl::init(~static_cast(0)), llvm::cl::Hidden); - -namespace { -/// Optimizer Passes -DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass"); -DisableOption(FirAvc, "avc", "array value copy analysis and transformation"); -DisableOption( - FirMao, "memory-allocation-opt", "memory allocation optimization"); - -/// CodeGen Passes -#if !defined(FLANG_EXCLUDE_CODEGEN) -DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen"); -DisableOption(TargetRewrite, "target-rewrite", "rewrite FIR for target"); -DisableOption(FirToLlvmIr, "fir-to-llvmir", "FIR to LLVM-IR dialect"); -DisableOption(LlvmIrToLlvm, "llvm", "conversion to LLVM"); -#endif - -/// Generic for adding a pass to the pass manager if it is not disabled. -template -void addPassConditionally( - mlir::PassManager &pm, llvm::cl::opt &disabled, F ctor) { - if (!disabled) - pm.addPass(ctor()); -} - -template -void addNestedPassConditionally( - mlir::PassManager &pm, llvm::cl::opt &disabled, F ctor) { - if (!disabled) - pm.addNestedPass(ctor()); -} - -} // namespace - -namespace fir { - -static void defaultFlangInlinerOptPipeline(mlir::OpPassManager &pm) { - mlir::GreedyRewriteConfig config; - config.enableRegionSimplification = false; - pm.addPass(mlir::createCanonicalizerPass(config)); -} - -inline void addCfgConversionPass(mlir::PassManager &pm) { - addNestedPassConditionally( - pm, disableCfgConversion, fir::createFirToCfgPass); -} - -inline void addAVC(mlir::PassManager &pm) { - addNestedPassConditionally( - pm, disableFirAvc, fir::createArrayValueCopyPass); -} - -#if !defined(FLANG_EXCLUDE_CODEGEN) -inline void addCodeGenRewritePass(mlir::PassManager &pm) { - addPassConditionally( - pm, disableCodeGenRewrite, fir::createFirCodeGenRewritePass); -} - -inline void addTargetRewritePass(mlir::PassManager &pm) { - addPassConditionally(pm, disableTargetRewrite, []() { - return fir::createFirTargetRewritePass(fir::TargetRewriteOptions{}); - }); -} - -inline void addFIRToLLVMPass(mlir::PassManager &pm) { - addPassConditionally(pm, disableFirToLlvmIr, fir::createFIRToLLVMPass); -} - -inline void addLLVMDialectToLLVMPass( - mlir::PassManager &pm, llvm::raw_ostream &output) { - addPassConditionally(pm, disableLlvmIrToLlvm, - [&]() { return fir::createLLVMDialectToLLVMPass(output); }); -} -#endif - -/// Create a pass pipeline for running default optimization passes for -/// incremental conversion of FIR. -/// -/// \param pm - MLIR pass manager that will hold the pipeline definition -inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) { - // simplify the IR - mlir::GreedyRewriteConfig config; - config.enableRegionSimplification = false; - fir::addAVC(pm); - pm.addNestedPass(fir::createCharacterConversionPass()); - pm.addPass(mlir::createCanonicalizerPass(config)); - - // The default inliner pass adds the canonicalizer pass with the default - // configuration. Create the inliner pass with tco config. - llvm::StringMap pipelines; - pm.addPass( - mlir::createInlinerPass(pipelines, defaultFlangInlinerOptPipeline)); - pm.addPass(mlir::createCSEPass()); - - // convert control flow to CFG form - fir::addCfgConversionPass(pm); - pm.addPass(mlir::createLowerToCFGPass()); - - pm.addPass(mlir::createCanonicalizerPass(config)); -} - -#if !defined(FLANG_EXCLUDE_CODEGEN) -inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm) { - pm.addNestedPass(fir::createAbstractResultOptPass()); - fir::addCodeGenRewritePass(pm); - fir::addTargetRewritePass(pm); - fir::addFIRToLLVMPass(pm); -} - -/// Create a pass pipeline for lowering from MLIR to LLVM IR -/// -/// \param pm - MLIR pass manager that will hold the pipeline definition -inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm) { - // Add default optimizer pass pipeline. - fir::createDefaultFIROptimizerPassPipeline(pm); - - // Add codegen pass pipeline. - fir::createDefaultFIRCodeGenPassPipeline(pm); -} -#undef FLANG_EXCLUDE_CODEGEN -#endif - -} // namespace fir diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 40d6d2017b2fa..be2e7cde916df 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -23,7 +23,6 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" -#include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/ArrayRef.h" #define DEBUG_TYPE "flang-codegen" @@ -3306,44 +3305,8 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase { } } }; - -/// Lower from LLVM IR dialect to proper LLVM-IR and dump the module -struct LLVMIRLoweringPass - : public mlir::PassWrapper> { - using Printer = fir::LLVMIRLoweringPrinter; - LLVMIRLoweringPass(raw_ostream &output, Printer p) - : output{output}, printer{p} {} - - mlir::ModuleOp getModule() { return getOperation(); } - - void runOnOperation() override final { - auto *ctx = getModule().getContext(); - auto optName = getModule().getName(); - llvm::LLVMContext llvmCtx; - if (auto llvmModule = mlir::translateModuleToLLVMIR( - getModule(), llvmCtx, optName ? *optName : "FIRModule")) { - printer(*llvmModule, output); - return; - } - - mlir::emitError(mlir::UnknownLoc::get(ctx), "could not emit LLVM-IR\n"); - signalPassFailure(); - } - -private: - raw_ostream &output; - Printer printer; -}; - } // namespace std::unique_ptr fir::createFIRToLLVMPass() { return std::make_unique(); } - -std::unique_ptr -fir::createLLVMDialectToLLVMPass(raw_ostream &output, - fir::LLVMIRLoweringPrinter printer) { - return std::make_unique(output, printer); -} diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt index 779e20711513e..30a163de9ccaf 100644 --- a/flang/lib/Optimizer/Support/CMakeLists.txt +++ b/flang/lib/Optimizer/Support/CMakeLists.txt @@ -2,7 +2,6 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_flang_library(FIRSupport FIRContext.cpp - InitFIR.cpp InternalNames.cpp KindMapping.cpp diff --git a/flang/lib/Optimizer/Support/InitFIR.cpp b/flang/lib/Optimizer/Support/InitFIR.cpp deleted file mode 100644 index baa1336d9ca02..0000000000000 --- a/flang/lib/Optimizer/Support/InitFIR.cpp +++ /dev/null @@ -1,20 +0,0 @@ -//===-- Optimizer/Support/InitFIR.cpp -------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "flang/Optimizer/Support/InitFIR.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" - -void fir::support::registerLLVMTranslation(mlir::MLIRContext &context) { - mlir::DialectRegistry registry; - // Register OpenMP dialect interface here as well. - mlir::registerOpenMPDialectTranslation(registry); - // Register LLVM-IR dialect interface. - registerLLVMDialectTranslation(registry); - context.appendDialectRegistry(registry); -} diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir deleted file mode 100644 index b417a6148d39b..0000000000000 --- a/flang/test/Fir/basic-program.fir +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: tco %s | FileCheck %s -// REQUIRES: shell - -// Check that tco is working with a basic test. - -func @_QQmain() { - return -} - -// CHECK: ; ModuleID = 'FIRModule' -// CHECK-LABEL: define void @_QQmain() -// CHECK: ret void diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt index a64b9c59bd02a..1a9c5ac72f153 100644 --- a/flang/tools/tco/CMakeLists.txt +++ b/flang/tools/tco/CMakeLists.txt @@ -1,25 +1,13 @@ -set(LLVM_LINK_COMPONENTS - AllTargetsAsmParsers - AllTargetsCodeGens - AllTargetsDescs - AllTargetsInfos -) -llvm_map_components_to_libnames(llvm_libs ${LLVM_LINK_COMPONENTS}) - -add_flang_tool(tco tco.cpp) -llvm_update_compile_flags(tco) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) -target_link_libraries(tco PRIVATE + +set(LIBS FIRCodeGen FIRDialect FIRSupport FIRTransforms - FIRBuilder ${dialect_libs} MLIRIR MLIRLLVMIR - MLIRLLVMToLLVMIRTranslation - MLIRTargetLLVMIRExport MLIRPass MLIRStandardToLLVM MLIRTransforms @@ -30,5 +18,7 @@ target_link_libraries(tco PRIVATE MLIRStandardToLLVM MLIRSupport MLIRVectorToLLVM - ${llvm_libs} ) + +add_flang_tool(tco tco.cpp) +target_link_libraries(tco PRIVATE ${LIBS}) diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp index 2bb3b27e7eb63..8f2c283bc82f9 100644 --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -11,14 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "flang/Optimizer/CodeGen/CodeGen.h" -#include "flang/Optimizer/Support/FIRContext.h" #include "flang/Optimizer/Support/InitFIR.h" -#include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Support/KindMapping.h" -#include "flang/Optimizer/Transforms/Passes.h" -#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" -#include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" #include "mlir/Parser.h" @@ -31,13 +25,11 @@ #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -/// list of program return codes static cl::opt inputFilename(cl::Positional, cl::desc(""), cl::init("-")); @@ -50,14 +42,8 @@ static cl::opt emitFir("emit-fir", cl::desc("Parse and pretty-print the input"), cl::init(false)); -static cl::opt targetTriple("target", - cl::desc("specify a target triple"), - cl::init("native")); - -#include "flang/Tools/CLOptions.inc" - static void printModuleBody(mlir::ModuleOp mod, raw_ostream &output) { - for (auto &op : *mod.getBody()) + for (auto &op : mod.getBody()->without_terminator()) output << op << '\n'; } @@ -79,8 +65,6 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { mlir::DialectRegistry registry; fir::support::registerDialects(registry); mlir::MLIRContext context(registry); - fir::support::loadDialects(context); - fir::support::registerLLVMTranslation(context); auto owningRef = mlir::parseSourceFile(sourceMgr, &context); if (!owningRef) { @@ -96,31 +80,21 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { ToolOutputFile out(outputFilename, ec, sys::fs::OF_None); // run passes - fir::KindMapping kindMap{&context}; - fir::setTargetTriple(*owningRef, targetTriple); - fir::setKindMapping(*owningRef, kindMap); - mlir::PassManager pm(&context, mlir::OpPassManager::Nesting::Implicit); - pm.enableVerifier(/*verifyPasses=*/true); + mlir::PassManager pm{&context}; mlir::applyPassManagerCLOptions(pm); if (emitFir) { // parse the input and pretty-print it back out // -emit-fir intentionally disables all the passes - } else if (passPipeline.hasAnyOccurrences()) { - auto errorHandler = [&](const Twine &msg) { - mlir::emitError(mlir::UnknownLoc::get(pm.getContext())) << msg; - return mlir::failure(); - }; - if (mlir::failed(passPipeline.addToPipeline(pm, errorHandler))) - return mlir::failure(); } else { - fir::createMLIRToLLVMPassPipeline(pm); - fir::addLLVMDialectToLLVMPass(pm, out.os()); + // TODO: Actually add passes when added to FIR code base + // add all the passes + // the user can disable them individually } // run the pass manager if (mlir::succeeded(pm.run(*owningRef))) { // passes ran successfully, so keep the output - if (emitFir || passPipeline.hasAnyOccurrences()) + if (emitFir) printModuleBody(*owningRef, out.os()); out.keep(); return mlir::success(); @@ -133,13 +107,8 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) { } int main(int argc, char **argv) { - [[maybe_unused]] InitLLVM y(argc, argv); fir::support::registerMLIRPassesForFortranTools(); - fir::registerOptCodeGenPasses(); - fir::registerOptTransformPasses(); - InitializeAllTargets(); - mlir::registerAsmPrinterCLOptions(); - mlir::registerMLIRContextCLOptions(); + [[maybe_unused]] InitLLVM y(argc, argv); mlir::registerPassManagerCLOptions(); mlir::PassPipelineCLParser passPipe("", "Compiler passes to run"); cl::ParseCommandLineOptions(argc, argv, "Tilikum Crossing Optimizer\n"); From 48132bb1e437f213e7a183a6a69e7589abe33af6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 21 Jan 2022 11:30:49 -0800 Subject: [PATCH 207/946] [RISCV] Simplify interface to combineMUL_VLToVWMUL. NFC Instead of passing the both the SDNode* and 2 of the operands in two different orders, just pass the SDNode * and a bool to indicate which operand order to test. While there rename to combineMUL_VLToVWMUL_VL. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4aba42b014d17..e1f1c49094424 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7242,9 +7242,14 @@ static SDValue performANY_EXTENDCombine(SDNode *N, // Try to form VWMUL or VWMULU. // FIXME: Support VWMULSU. -static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1, - SelectionDAG &DAG) { +static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG, + bool Commute) { assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode"); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Commute) + std::swap(Op0, Op1); + bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL; bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL; if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse()) @@ -7887,15 +7892,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } break; } - case RISCVISD::MUL_VL: { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG)) + case RISCVISD::MUL_VL: + if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false)) return V; - if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG)) - return V; - return SDValue(); - } + // Mul is commutative. + return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true); case ISD::STORE: { auto *Store = cast(N); SDValue Val = Store->getValue(); From d6e2c95d2252f479d5bc7a74df70f90eba94b34d Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Fri, 21 Jan 2022 18:00:33 +0100 Subject: [PATCH 208/946] [libc++] Use addressof in unordered_map. This addresses the usage of `operator&` in ``. (Note there are still more headers with the same issue.) Reviewed By: #libc, Quuxplusone, ldionne Differential Revision: https://reviews.llvm.org/D117393 --- libcxx/include/__hash_table | 40 +++++++-------- libcxx/include/unordered_map | 21 ++++---- ...rator.operators.addressof.compile.pass.cpp | 49 +++++++++++++++++++ .../assign_move.addressof.compile.pass.cpp | 42 ++++++++++++++++ .../move.addressof.compile.pass.cpp | 33 +++++++++++++ .../move_alloc.addressof.compile.pass.cpp | 36 ++++++++++++++ .../emplace_hint.addressof.compile.pass.cpp | 30 ++++++++++++ ...rase_const_iter.addressof.compile.pass.cpp | 27 ++++++++++ .../erase_range.addressof.compile.pass.cpp | 27 ++++++++++ ...nt_const_lvalue.addressof.compile.pass.cpp | 28 +++++++++++ ...ible_value_type.addressof.compile.pass.cpp | 28 +++++++++++ ...alue_value_type.addressof.compile.pass.cpp | 28 +++++++++++ ...ry_emplace_hint.addressof.compile.pass.cpp | 40 +++++++++++++++ .../swap.addressof.compile.pass.cpp | 29 +++++++++++ .../move.addressof.compile.pass.cpp | 33 +++++++++++++ .../move_alloc.addressof.compile.pass.cpp | 36 ++++++++++++++ .../emplace_hint.addressof.compile.pass.cpp | 30 ++++++++++++ 17 files changed, 527 insertions(+), 30 deletions(-) create mode 100644 libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 6b682ab27c6c3..adc732cffb015 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -308,9 +308,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_iterator& operator=(const __hash_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; } return *this; @@ -406,7 +406,7 @@ public: : __node_(__x.__node_) { #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__iterator_copy(this, &__x); + __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); #endif } @@ -415,7 +415,7 @@ public: __hash_const_iterator(const __hash_const_iterator& __i) : __node_(__i.__node_) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); } _LIBCPP_INLINE_VISIBILITY @@ -427,9 +427,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_iterator& operator=(const __hash_const_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; } return *this; @@ -523,7 +523,7 @@ public: __bucket_(__i.__bucket_), __bucket_count_(__i.__bucket_count_) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); } _LIBCPP_INLINE_VISIBILITY @@ -535,9 +535,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_local_iterator& operator=(const __hash_local_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; __bucket_ = __i.__bucket_; __bucket_count_ = __i.__bucket_count_; @@ -655,7 +655,7 @@ public: __bucket_count_(__x.__bucket_count_) { #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__iterator_copy(this, &__x); + __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); #endif } @@ -666,7 +666,7 @@ public: __bucket_(__i.__bucket_), __bucket_count_(__i.__bucket_count_) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); } _LIBCPP_INLINE_VISIBILITY @@ -678,9 +678,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator& operator=(const __hash_const_local_iterator& __i) { - if (this != &__i) + if (this != _VSTD::addressof(__i)) { - __get_db()->__iterator_copy(this, &__i); + __get_db()->__iterator_copy(this, _VSTD::addressof(__i)); __node_ = __i.__node_; __bucket_ = __i.__bucket_; __bucket_count_ = __i.__bucket_count_; @@ -1615,7 +1615,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( __u.size() = 0; } #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -2021,7 +2021,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( const_iterator __p, __node_pointer __cp) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); if (__p != end() && key_eq()(*__p, __cp->__value_)) @@ -2148,7 +2148,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi( const_iterator __p, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); __node_holder __h = __construct_node(_VSTD::forward<_Args>(__args)...); @@ -2472,7 +2472,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { __next_pointer __np = __p.__node_; - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered container erase(iterator) called with an iterator not" " referring to this container"); _LIBCPP_DEBUG_ASSERT(__p != end(), @@ -2492,10 +2492,10 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__first)) == this, "unordered container::erase(iterator, iterator) called with an iterator not" " referring to this container"); - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__last)) == this, "unordered container::erase(iterator, iterator) called with an iterator not" " referring to this container"); for (const_iterator __p = __first; __first != __last; __p = __first) @@ -2727,7 +2727,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] = __u.__p1_.first().__ptr(); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 73edadab20990..361db707d2461 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -519,6 +519,7 @@ template #include <__functional/is_transparent.h> #include <__hash_table> #include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> #include <__node_handle> #include <__utility/forward.h> #include @@ -1186,7 +1187,7 @@ public: {return __table_.__insert_unique(__x);} iterator insert(const_iterator __p, const value_type& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not " "referring to this unordered_map"); ((void)__p); @@ -1207,7 +1208,7 @@ public: {return __table_.__insert_unique(_VSTD::move(__x));} iterator insert(const_iterator __p, value_type&& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_map::insert(const_iterator, const value_type&) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1225,7 +1226,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator insert(const_iterator __p, _Pp&& __x) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_map::insert(const_iterator, value_type&&) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1241,7 +1242,7 @@ public: template _LIBCPP_INLINE_VISIBILITY iterator emplace_hint(const_iterator __p, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_map::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered_map"); ((void)__p); @@ -1273,7 +1274,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); ((void)__h); @@ -1284,7 +1285,7 @@ public: _LIBCPP_INLINE_VISIBILITY iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args) { - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this, + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this, "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not" " referring to this unordered_map"); ((void)__h); @@ -1692,7 +1693,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -1712,7 +1713,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -2468,7 +2469,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -2489,7 +2490,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } diff --git a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..856b78293a107 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// Validate the constructors of the (const)(_local)_iterator classes to be +// properly guarded against ADL-hijacking operator&. + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +template +void test() { + FromIterator from; + ToIterator copy(from); + copy = from; + + ToIterator move(std::move(from)); + from = FromIterator(); + move = std::move(from); +} + +void test() { + { + using I = std::unordered_map::iterator; + using CI = std::unordered_map::const_iterator; + test(); + test(); + test(); + } + { + using IL = std::unordered_map::local_iterator; + using CIL = std::unordered_map::const_local_iterator; + test(); + test(); + test(); + } +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..6dbd7aaea2a8e --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// unordered_map& operator=(unordered_map&&) +// noexcept( +// allocator_type::propagate_on_container_move_assignment::value && +// is_nothrow_move_assignable::value && +// is_nothrow_move_assignable::value && +// is_nothrow_move_assignable::value); + +// Validate whether the container can be move-assigned with an ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + { + std::unordered_map mo; + std::unordered_map m; + m = std::move(mo); + } + { + std::unordered_map mo; + std::unordered_map m; + m = std::move(mo); + } +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..e36c6525d631b --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// unordered_map(unordered_map&& u) +// noexcept( +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map mo; + std::unordered_map m(std::move(mo)); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..1fec0ee5d0f4b --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// unordered_map(unordered_map&& u, const allocator_type& a); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +#include "test_allocator.h" +#include "min_allocator.h" + +void test() { + using A = test_allocator>; + using C = std::unordered_map, + std::equal_to, A>; + + C mo; + C m(std::move(mo), A()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..58ddefd8cfbfc --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// template +// iterator emplace_hint(const_iterator position, Args&&... args); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + m.emplace_hint(m.cbegin()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..1461f2499baad --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator erase(const_iterator p) + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + m.erase(m.cbegin()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..5f342f7b2152f --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator erase(const_iterator first, const_iterator last) + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + m.erase(m.cbegin(), m.cend()); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..db1805e7d7e63 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator insert(const_iterator p, const value_type& x); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + const std::pair v; + m.insert(m.cend(), v); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..530b826b61e78 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// template ::value>::type> +// pair insert(P&& x); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test(std::unordered_map& m) { m.insert(m.cend(), *m.begin()); } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..80219cb193edd --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// iterator insert(const_iterator hint, value_type&& obj); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test(std::unordered_map& m) { + m.insert(m.cend(), std::pair{}); +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..2c667374d4fe8 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_map + +// template +// iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args); +// template +// iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args); +// template + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m; + { + const operator_hijacker k; + m.try_emplace(m.cend(), k); + } + { + operator_hijacker k; + m.try_emplace(m.cend(), std::move(k)); + } +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..f5b5f516d42b5 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// void swap(unordered_map& c) +// noexcept(allocator_traits::is_always_equal::value && +// noexcept(swap(declval(), declval())) && +// noexcept(swap(declval(), declval()))); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_map m1; + std::unordered_map m2; + std::swap(m1, m2); +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..73b19f35e2048 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_multimap + +// unordered_multimap(unordered_multimap&&) +// noexcept( +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_multimap mo; + std::unordered_multimap m(std::move(mo)); +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..6419a03666d65 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_multimap + +// unordered_multimap(unordered_map&& u, const allocator_type& a); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +#include "test_allocator.h" +#include "min_allocator.h" + +void test() { + using A = test_allocator>; + using C = std::unordered_multimap, + std::equal_to, A>; + + C mo; + C m(std::move(mo), A()); +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..5e23b73cf34b3 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator>> +// class unordered_multimap + +// template +// iterator emplace_hint(const_iterator position, Args&&... args); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_multimap m; + m.emplace_hint(m.cbegin()); +} From 4d0a18d06e8ea6ee38efd53e9febf7cefe7d3925 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:56:25 -0800 Subject: [PATCH 209/946] [mlir][sparse] Adding assertions for overhead storage types Fixes https://bugs.llvm.org/show_bug.cgi?id=52314 aka https://github.com/llvm/llvm-project/issues/51656 Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D117597 --- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 50 +++++++++++++++---- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 9087ed1885bfa..20cd1b53d31b4 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -254,25 +255,28 @@ class SparseTensorStorage : public SparseTensorStorageBase { bool allDense = true; uint64_t sz = 1; for (uint64_t r = 0; r < rank; r++) { + assert(sizes[r] > 0 && "Dimension size zero has trivial storage"); sz *= sizes[r]; if (sparsity[r] == DimLevelType::kCompressed) { pointers[r].reserve(sz + 1); indices[r].reserve(sz); sz = 1; allDense = false; + // Prepare the pointer structure. We cannot use `addPointer` + // here, because `isCompressedDim` won't work until after this + // preparation has been done. + pointers[r].push_back(0); } else { assert(sparsity[r] == DimLevelType::kDense && "singleton not yet supported"); } } - // Prepare sparse pointer structures for all dimensions. - for (uint64_t r = 0; r < rank; r++) - if (sparsity[r] == DimLevelType::kCompressed) - pointers[r].push_back(0); // Then assign contents from coordinate scheme tensor if provided. if (tensor) { - // Lexicographically sort the tensor, to ensure precondition of `fromCOO`. + // Ensure both preconditions of `fromCOO`. + assert(tensor->getSizes() == sizes && "Tensor size mismatch"); tensor->sort(); + // Now actually insert the `elements`. const std::vector> &elements = tensor->getElements(); uint64_t nnz = elements.size(); values.reserve(nnz); @@ -403,10 +407,33 @@ class SparseTensorStorage : public SparseTensorStorageBase { } private: + /// Appends the next free position of `indices[d]` to `pointers[d]`. + /// Thus, when called after inserting the last element of a segment, + /// it will append the position where the next segment begins. + inline void addPointer(uint64_t d) { + assert(isCompressedDim(d)); // Entails `d < getRank()`. + uint64_t p = indices[d].size(); + assert(p <= std::numeric_limits

::max() && + "Pointer value is too large for the P-type"); + pointers[d].push_back(p); // Here is where we convert to `P`. + } + + /// Appends the given index to `indices[d]`. + inline void addIndex(uint64_t d, uint64_t i) { + assert(isCompressedDim(d)); // Entails `d < getRank()`. + assert(i <= std::numeric_limits::max() && + "Index value is too large for the I-type"); + indices[d].push_back(i); // Here is where we convert to `I`. + } + /// Initializes sparse tensor storage scheme from a memory-resident sparse /// tensor in coordinate scheme. This method prepares the pointers and /// indices arrays under the given per-dimension dense/sparse annotations. - /// Precondition: the `elements` must be lexicographically sorted. + /// + /// Preconditions: + /// (1) the `elements` must be lexicographically sorted. + /// (2) the indices of every element are valid for `sizes` (equal rank + /// and pointwise less-than). void fromCOO(const std::vector> &elements, uint64_t lo, uint64_t hi, uint64_t d) { // Once dimensions are exhausted, insert the numerical values. @@ -426,7 +453,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { seg++; // Handle segment in interval for sparse or dense dimension. if (isCompressedDim(d)) { - indices[d].push_back(i); + addIndex(d, i); } else { // For dense storage we must fill in all the zero values between // the previous element (when last we ran this for-loop) and the @@ -441,7 +468,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { } // Finalize the sparse pointer structure at this dimension. if (isCompressedDim(d)) { - pointers[d].push_back(indices[d].size()); + addPointer(d); } else { // For dense storage we must fill in all the zero values after // the last element. @@ -479,7 +506,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { if (d == getRank()) { values.push_back(0); } else if (isCompressedDim(d)) { - pointers[d].push_back(indices[d].size()); + addPointer(d); } else { for (uint64_t full = 0, sz = sizes[d]; full < sz; full++) endDim(d + 1); @@ -493,7 +520,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { for (uint64_t i = 0; i < rank - diff; i++) { uint64_t d = rank - i - 1; if (isCompressedDim(d)) { - pointers[d].push_back(indices[d].size()); + addPointer(d); } else { for (uint64_t full = idx[d] + 1, sz = sizes[d]; full < sz; full++) endDim(d + 1); @@ -508,7 +535,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { for (uint64_t d = diff; d < rank; d++) { uint64_t i = cursor[d]; if (isCompressedDim(d)) { - indices[d].push_back(i); + addIndex(d, i); } else { for (uint64_t full = top; full < i; full++) endDim(d + 1); @@ -532,6 +559,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { /// Returns true if dimension is compressed. inline bool isCompressedDim(uint64_t d) const { + assert(d < getRank()); return (!pointers[d].empty()); } From cb8b94f6efa9a1b434afd9906e87e918ffe762dd Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 21 Jan 2022 20:01:06 +0000 Subject: [PATCH 210/946] [AArch64] Add extra tests useful in testing hadd. NFC --- llvm/test/CodeGen/AArch64/arm64-vhadd.ll | 427 +++++++++++++++++++++++ 1 file changed, 427 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll index 712bd16870237..d692d6b2f8bea 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -817,6 +817,433 @@ define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) nounwind { } +define <4 x i16> @hadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) nounwind { +; CHECK-LABEL: hadd8_sext_asr: +; CHECK: // %bb.0: +; CHECK-NEXT: shl.4h v0, v0, #8 +; CHECK-NEXT: shl.4h v1, v1, #8 +; CHECK-NEXT: sshr.4h v0, v0, #8 +; CHECK-NEXT: ssra.4h v0, v1, #8 +; CHECK-NEXT: sshr.4h v0, v0, #1 +; CHECK-NEXT: ret + %zextsrc1 = sext <4 x i8> %src1 to <4 x i16> + %zextsrc2 = sext <4 x i8> %src2 to <4 x i16> + %add = add <4 x i16> %zextsrc1, %zextsrc2 + %resulti8 = ashr <4 x i16> %add, + ret <4 x i16> %resulti8 +} + +define <4 x i16> @hadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) nounwind { +; CHECK-LABEL: hadd8_zext_asr: +; CHECK: // %bb.0: +; CHECK-NEXT: bic.4h v0, #255, lsl #8 +; CHECK-NEXT: bic.4h v1, #255, lsl #8 +; CHECK-NEXT: add.4h v0, v0, v1 +; CHECK-NEXT: ushr.4h v0, v0, #1 +; CHECK-NEXT: ret + %zextsrc1 = zext <4 x i8> %src1 to <4 x i16> + %zextsrc2 = zext <4 x i8> %src2 to <4 x i16> + %add = add <4 x i16> %zextsrc1, %zextsrc2 + %resulti8 = ashr <4 x i16> %add, + ret <4 x i16> %resulti8 +} + +define <4 x i16> @hadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) nounwind { +; CHECK-LABEL: hadd8_sext_lsr: +; CHECK: // %bb.0: +; CHECK-NEXT: shl.4h v0, v0, #8 +; CHECK-NEXT: shl.4h v1, v1, #8 +; CHECK-NEXT: sshr.4h v0, v0, #8 +; CHECK-NEXT: ssra.4h v0, v1, #8 +; CHECK-NEXT: ushr.4h v0, v0, #1 +; CHECK-NEXT: ret + %zextsrc1 = sext <4 x i8> %src1 to <4 x i16> + %zextsrc2 = sext <4 x i8> %src2 to <4 x i16> + %add = add <4 x i16> %zextsrc1, %zextsrc2 + %resulti8 = lshr <4 x i16> %add, + ret <4 x i16> %resulti8 +} + +define <4 x i16> @hadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) nounwind { +; CHECK-LABEL: hadd8_zext_lsr: +; CHECK: // %bb.0: +; CHECK-NEXT: bic.4h v0, #255, lsl #8 +; CHECK-NEXT: bic.4h v1, #255, lsl #8 +; CHECK-NEXT: add.4h v0, v0, v1 +; CHECK-NEXT: ushr.4h v0, v0, #1 +; CHECK-NEXT: ret + %zextsrc1 = zext <4 x i8> %src1 to <4 x i16> + %zextsrc2 = zext <4 x i8> %src2 to <4 x i16> + %add = add <4 x i16> %zextsrc1, %zextsrc2 + %resulti8 = lshr <4 x i16> %add, + ret <4 x i16> %resulti8 +} + + + +define void @testLowerToSHADD8b_c(<8 x i8> %src1, <8 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToSHADD8b_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v1, #10 +; CHECK-NEXT: saddw.8h v0, v1, v0 +; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <8 x i8> %src1 to <8 x i16> + %add = add <8 x i16> %sextsrc1, + %resulti16 = lshr <8 x i16> %add, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + store <8 x i8> %result, <8 x i8>* %dest, align 8 + ret void +} + +define void @testLowerToSHADD4h_c(<4 x i16> %src1, <4 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToSHADD4h_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: saddw.4s v0, v1, v0 +; CHECK-NEXT: shrn.4h v0, v0, #1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <4 x i16> %src1 to <4 x i32> + %add = add <4 x i32> %sextsrc1, + %resulti16 = lshr <4 x i32> %add, + %result = trunc <4 x i32> %resulti16 to <4 x i16> + store <4 x i16> %result, <4 x i16>* %dest, align 8 + ret void +} + +define void @testLowerToSHADD2s_c(<2 x i32> %src1, <2 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToSHADD2s_c: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: dup.2d v1, x8 +; CHECK-NEXT: saddw.2d v0, v1, v0 +; CHECK-NEXT: shrn.2s v0, v0, #1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <2 x i32> %src1 to <2 x i64> + %add = add <2 x i64> %sextsrc1, + %resulti16 = lshr <2 x i64> %add, + %result = trunc <2 x i64> %resulti16 to <2 x i32> + store <2 x i32> %result, <2 x i32>* %dest, align 8 + ret void +} + +define void @testLowerToSHADD16b_c(<16 x i8> %src1, <16 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToSHADD16b_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v1, #10 +; CHECK-NEXT: saddw.8h v2, v1, v0 +; CHECK-NEXT: saddw2.8h v0, v1, v0 +; CHECK-NEXT: shrn.8b v1, v2, #1 +; CHECK-NEXT: shrn2.16b v1, v0, #1 +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <16 x i8> %src1 to <16 x i16> + %add = add <16 x i16> %sextsrc1, + %resulti16 = lshr <16 x i16> %add, + %result = trunc <16 x i16> %resulti16 to <16 x i8> + store <16 x i8> %result, <16 x i8>* %dest, align 16 + ret void +} + +define void @testLowerToSHADD8h_c(<8 x i16> %src1, <8 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToSHADD8h_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: saddw.4s v2, v1, v0 +; CHECK-NEXT: saddw2.4s v0, v1, v0 +; CHECK-NEXT: shrn.4h v1, v2, #1 +; CHECK-NEXT: shrn2.8h v1, v0, #1 +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <8 x i16> %src1 to <8 x i32> + %add = add <8 x i32> %sextsrc1, + %resulti16 = lshr <8 x i32> %add, + %result = trunc <8 x i32> %resulti16 to <8 x i16> + store <8 x i16> %result, <8 x i16>* %dest, align 16 + ret void +} + +define void @testLowerToSHADD4s_c(<4 x i32> %src1, <4 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToSHADD4s_c: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: dup.2d v1, x8 +; CHECK-NEXT: saddw.2d v2, v1, v0 +; CHECK-NEXT: saddw2.2d v0, v1, v0 +; CHECK-NEXT: shrn.2s v1, v2, #1 +; CHECK-NEXT: shrn2.4s v1, v0, #1 +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + %sextsrc1 = sext <4 x i32> %src1 to <4 x i64> + %add = add <4 x i64> %sextsrc1, + %resulti16 = lshr <4 x i64> %add, + %result = trunc <4 x i64> %resulti16 to <4 x i32> + store <4 x i32> %result, <4 x i32>* %dest, align 16 + ret void +} + +define void @testLowerToUHADD8b_c(<8 x i8> %src1, <8 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToUHADD8b_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v1, #10 +; CHECK-NEXT: uaddw.8h v0, v1, v0 +; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i8> %src1 to <8 x i16> + %add = add <8 x i16> %zextsrc1, + %resulti16 = lshr <8 x i16> %add, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + store <8 x i8> %result, <8 x i8>* %dest, align 8 + ret void +} + +define void @testLowerToUHADD4h_c(<4 x i16> %src1, <4 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToUHADD4h_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: uaddw.4s v0, v1, v0 +; CHECK-NEXT: shrn.4h v0, v0, #1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> + %add = add <4 x i32> %zextsrc1, + %resulti16 = lshr <4 x i32> %add, + %result = trunc <4 x i32> %resulti16 to <4 x i16> + store <4 x i16> %result, <4 x i16>* %dest, align 8 + ret void +} + +define void @testLowerToUHADD2s_c(<2 x i32> %src1, <2 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToUHADD2s_c: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: dup.2d v1, x8 +; CHECK-NEXT: uaddw.2d v0, v1, v0 +; CHECK-NEXT: shrn.2s v0, v0, #1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <2 x i32> %src1 to <2 x i64> + %add = add <2 x i64> %zextsrc1, + %resulti16 = lshr <2 x i64> %add, + %result = trunc <2 x i64> %resulti16 to <2 x i32> + store <2 x i32> %result, <2 x i32>* %dest, align 8 + ret void +} + +define void @testLowerToUHADD16b_c(<16 x i8> %src1, <16 x i8>* %dest) nounwind { +; CHECK-LABEL: testLowerToUHADD16b_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v1, #10 +; CHECK-NEXT: uaddw.8h v2, v1, v0 +; CHECK-NEXT: uaddw2.8h v0, v1, v0 +; CHECK-NEXT: shrn.8b v1, v2, #1 +; CHECK-NEXT: shrn2.16b v1, v0, #1 +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <16 x i8> %src1 to <16 x i16> + %add = add <16 x i16> %zextsrc1, + %resulti16 = lshr <16 x i16> %add, + %result = trunc <16 x i16> %resulti16 to <16 x i8> + store <16 x i8> %result, <16 x i8>* %dest, align 16 + ret void +} + +define void @testLowerToUHADD8h_c(<8 x i16> %src1, <8 x i16>* %dest) nounwind { +; CHECK-LABEL: testLowerToUHADD8h_c: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: uaddw.4s v2, v1, v0 +; CHECK-NEXT: uaddw2.4s v0, v1, v0 +; CHECK-NEXT: shrn.4h v1, v2, #1 +; CHECK-NEXT: shrn2.8h v1, v0, #1 +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> + %add = add <8 x i32> %zextsrc1, + %resulti16 = lshr <8 x i32> %add, + %result = trunc <8 x i32> %resulti16 to <8 x i16> + store <8 x i16> %result, <8 x i16>* %dest, align 16 + ret void +} + +define void @testLowerToUHADD4s_c(<4 x i32> %src1, <4 x i32>* %dest) nounwind { +; CHECK-LABEL: testLowerToUHADD4s_c: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: dup.2d v1, x8 +; CHECK-NEXT: uaddw.2d v2, v1, v0 +; CHECK-NEXT: uaddw2.2d v0, v1, v0 +; CHECK-NEXT: shrn.2s v1, v2, #1 +; CHECK-NEXT: shrn2.4s v1, v0, #1 +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> + %add = add <4 x i64> %zextsrc1, + %resulti16 = lshr <4 x i64> %add, + %result = trunc <4 x i64> %resulti16 to <4 x i32> + store <4 x i32> %result, <4 x i32>* %dest, align 16 + ret void +} + + +define <8 x i8> @andmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind { +; CHECK-LABEL: andmaskv8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v2, #7 +; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: uaddw.8h v0, v0, v1 +; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: ret + %zextsrc1 = and <8 x i16> %src1, + %zextsrc2 = zext <8 x i8> %src2 to <8 x i16> + %add = add <8 x i16> %zextsrc1, %zextsrc2 + %resulti16 = lshr <8 x i16> %add, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + ret <8 x i8> %result +} + +define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) nounwind { +; CHECK-LABEL: andmaskv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v3, #7 +; CHECK-NEXT: and.16b v0, v0, v3 +; CHECK-NEXT: and.16b v1, v1, v3 +; CHECK-NEXT: uaddw.8h v0, v0, v2 +; CHECK-NEXT: uaddw2.8h v1, v1, v2 +; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: shrn2.16b v0, v1, #1 +; CHECK-NEXT: ret + %zextsrc1 = and <16 x i16> %src1, + %zextsrc2 = zext <16 x i8> %src2 to <16 x i16> + %add = add <16 x i16> %zextsrc1, %zextsrc2 + %resulti16 = lshr <16 x i16> %add, + %result = trunc <16 x i16> %resulti16 to <16 x i8> + ret <16 x i8> %result +} + +define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) nounwind { +; CHECK-LABEL: andmask2v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v4, #7 +; CHECK-NEXT: movi.8h v5, #3 +; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: and.16b v2, v2, v5 +; CHECK-NEXT: and.16b v1, v1, v4 +; CHECK-NEXT: and.16b v3, v3, v5 +; CHECK-NEXT: add.8h v0, v0, v2 +; CHECK-NEXT: add.8h v1, v1, v3 +; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: shrn2.16b v0, v1, #1 +; CHECK-NEXT: ret + %zextsrc1 = and <16 x i16> %src1, + %zextsrc2 = and <16 x i16> %src2, + %add = add <16 x i16> %zextsrc1, %zextsrc2 + %resulti16 = lshr <16 x i16> %add, + %result = trunc <16 x i16> %resulti16 to <16 x i8> + ret <16 x i8> %result +} + +define <8 x i8> @andmask2v8i8(<8 x i16> %src1, <8 x i16> %src2) nounwind { +; CHECK-LABEL: andmask2v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v2, #7 +; CHECK-NEXT: bic.8h v1, #255, lsl #8 +; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: add.8h v0, v0, v1 +; CHECK-NEXT: shrn.8b v0, v0, #1 +; CHECK-NEXT: ret + %zextsrc1 = and <8 x i16> %src1, + %zextsrc2 = and <8 x i16> %src2, + %add = add <8 x i16> %zextsrc1, %zextsrc2 + %resulti16 = lshr <8 x i16> %add, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + ret <8 x i8> %result +} + +define <8 x i16> @andmask3v8i8(<8 x i16> %src1, <8 x i16> %src2) nounwind { +; CHECK-LABEL: andmask3v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.8h v2, #7 +; CHECK-NEXT: bic.8h v1, #254, lsl #8 +; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: add.8h v0, v0, v1 +; CHECK-NEXT: ushr.8h v0, v0, #1 +; CHECK-NEXT: ret + %zextsrc1 = and <8 x i16> %src1, + %zextsrc2 = and <8 x i16> %src2, + %add = add <8 x i16> %zextsrc1, %zextsrc2 + %resulti16 = lshr <8 x i16> %add, + ret <8 x i16> %resulti16 +} + +define <16 x i8> @sextmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) nounwind { +; CHECK-LABEL: sextmaskv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll.8h v3, v2, #0 +; CHECK-NEXT: sshr.8h v1, v1, #11 +; CHECK-NEXT: ssra.8h v3, v0, #11 +; CHECK-NEXT: saddw2.8h v1, v1, v2 +; CHECK-NEXT: shrn.8b v0, v3, #1 +; CHECK-NEXT: shrn2.16b v0, v1, #1 +; CHECK-NEXT: ret + %sextsrc1 = ashr <16 x i16> %src1, + %sextsrc2 = sext <16 x i8> %src2 to <16 x i16> + %add = add <16 x i16> %sextsrc1, %sextsrc2 + %resulti16 = ashr <16 x i16> %add, + %result = trunc <16 x i16> %resulti16 to <16 x i8> + ret <16 x i8> %result +} + +define <8 x i8> @sextmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind { +; CHECK-LABEL: sextmaskv8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll.8h v1, v1, #0 +; CHECK-NEXT: ssra.8h v1, v0, #11 +; CHECK-NEXT: shrn.8b v0, v1, #1 +; CHECK-NEXT: ret + %sextsrc1 = ashr <8 x i16> %src1, + %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> + %add = add <8 x i16> %sextsrc1, %sextsrc2 + %resulti16 = ashr <8 x i16> %add, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + ret <8 x i8> %result +} + +define <8 x i8> @sextmask2v8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind { +; CHECK-LABEL: sextmask2v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll.8h v1, v1, #0 +; CHECK-NEXT: ssra.8h v1, v0, #8 +; CHECK-NEXT: shrn.8b v0, v1, #1 +; CHECK-NEXT: ret + %sextsrc1 = ashr <8 x i16> %src1, + %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> + %add = add <8 x i16> %sextsrc1, %sextsrc2 + %resulti16 = ashr <8 x i16> %add, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + ret <8 x i8> %result +} + +define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind { +; CHECK-LABEL: sextmask3v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll.8h v1, v1, #0 +; CHECK-NEXT: usra.8h v1, v0, #7 +; CHECK-NEXT: shrn.8b v0, v1, #1 +; CHECK-NEXT: ret + %sextsrc1 = ashr <8 x i16> %src1, + %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> + %add = add <8 x i16> %sextsrc1, %sextsrc2 + %resulti16 = ashr <8 x i16> %add, + %result = trunc <8 x i16> %resulti16 to <8 x i8> + ret <8 x i8> %result +} + + declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone From f18fcdabda7271d386d24de71302907cc3f0fe4b Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Thu, 8 Apr 2021 23:31:12 -0700 Subject: [PATCH 211/946] [BOLT][NFC] Expand auto types pt.2 Summary: Expand autos where it may lead to differences in the BOLT binary. Test Plan: NFC Reviewers: maksfb Reviewed By: maks FBD27673231 --- bolt/lib/Core/BinaryFunction.cpp | 4 ++-- bolt/lib/Passes/IndirectCallPromotion.cpp | 12 ++++++------ bolt/lib/Profile/YAMLProfileWriter.cpp | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 9d55a837f2a3c..18f521edff05f 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3613,13 +3613,13 @@ void BinaryFunction::insertBasicBlocks( std::vector> &&NewBBs, const bool UpdateLayout, const bool UpdateCFIState, const bool RecomputeLandingPads) { - const auto StartIndex = Start ? getIndex(Start) : -1; + const int64_t StartIndex = Start ? getIndex(Start) : -1LL; const size_t NumNewBlocks = NewBBs.size(); BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks, nullptr); - auto I = StartIndex + 1; + int64_t I = StartIndex + 1; for (std::unique_ptr &BB : NewBBs) { assert(!BasicBlocks[I]); BasicBlocks[I++] = BB.release(); diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 188655b8b7004..4f1604177ab8d 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -281,7 +281,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, Inst.getOperand(0).getReg() == BC.MRI->getProgramCounter()) return Targets; - auto ICSP = BC.MIB->tryGetAnnotationAs( + const auto ICSP = BC.MIB->tryGetAnnotationAs( Inst, "CallProfile"); if (ICSP) { for (const IndirectCallProfile &CSP : ICSP.get()) { @@ -938,7 +938,7 @@ size_t IndirectCallPromotion::canPromoteCallsite( // If we have no targets (or no calls), skip this callsite. if (Targets.empty() || !NumCalls) { if (opts::Verbosity >= 1) { - const auto InstIdx = &Inst - &(*BB.begin()); + const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " @ " << InstIdx << " in " << BB.getName() << ", calls = " << NumCalls << ", targets empty or NumCalls == 0.\n"; @@ -985,7 +985,7 @@ size_t IndirectCallPromotion::canPromoteCallsite( if (TopNFrequency == 0 || TopNFrequency < opts::IndirectCallPromotionMispredictThreshold) { if (opts::Verbosity >= 1) { - const auto InstIdx = &Inst - &(*BB.begin()); + const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " @ " << InstIdx << " in " << BB.getName() << ", calls = " << NumCalls << ", top N mis. frequency " << format("%.1f", TopNFrequency) @@ -1034,7 +1034,7 @@ size_t IndirectCallPromotion::canPromoteCallsite( if (TopNMispredictFrequency < opts::IndirectCallPromotionMispredictThreshold) { if (opts::Verbosity >= 1) { - const auto InstIdx = &Inst - &(*BB.begin()); + const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " @ " << InstIdx << " in " << BB.getName() << ", calls = " << NumCalls << ", top N mispredict frequency " @@ -1064,7 +1064,7 @@ void IndirectCallPromotion::printCallsiteInfo( BinaryContext &BC = BB.getFunction()->getBinaryContext(); const bool IsTailCall = BC.MIB->isTailCall(Inst); const bool IsJumpTable = BB.getFunction()->getJumpTable(Inst); - const auto InstIdx = &Inst - &(*BB.begin()); + const ptrdiff_t InstIdx = &Inst - &(*BB.begin()); outs() << "BOLT-INFO: ICP candidate branch info: " << *BB.getFunction() << " @ " << InstIdx << " in " << BB.getName() @@ -1219,7 +1219,7 @@ void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) { for (unsigned Idx = 0; Idx < BB->size(); ++Idx) { MCInst &Inst = BB->getInstructionAtIndex(Idx); - const auto InstIdx = &Inst - &(*BB->begin()); + const ptrdiff_t InstIdx = &Inst - &(*BB->begin()); const bool IsTailCall = BC.MIB->isTailCall(Inst); const bool HasIndirectCallProfile = BC.MIB->hasAnnotation(Inst, "CallProfile"); diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index d8d69759c22bf..ddbcd5939adaf 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -59,7 +59,7 @@ void convert(const BinaryFunction &BF, CSI.Offset = *Offset - BB->getInputOffset(); if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) { - auto ICSP = BC.MIB->tryGetAnnotationAs( + const auto ICSP = BC.MIB->tryGetAnnotationAs( Instr, "CallProfile"); if (!ICSP) continue; From 5a654b01133fe5f5b2b4088495f829bb8c34e12d Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Wed, 19 Jan 2022 20:20:55 -0800 Subject: [PATCH 212/946] [BOLT] Make ICP target selection (more) deterministic Summary: Break ties by selecting targets with lower addresses. Reviewers: maksfb FBD33677001 --- bolt/lib/Passes/IndirectCallPromotion.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 4f1604177ab8d..4104cc9ce3078 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -292,17 +292,18 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB, } } - // Sort by target count, number of indices in case of jump table, and - // mispredicts. We prioritize targets with high count, small number of - // indices and high mispredicts + // Sort by target count, number of indices in case of jump table, and + // mispredicts. We prioritize targets with high count, small number of indices + // and high mispredicts. Break ties by selecting targets with lower addresses. std::stable_sort(Targets.begin(), Targets.end(), [](const Callsite &A, const Callsite &B) { if (A.Branches != B.Branches) return A.Branches > B.Branches; - else if (A.JTIndices.size() != B.JTIndices.size()) + if (A.JTIndices.size() != B.JTIndices.size()) return A.JTIndices.size() < B.JTIndices.size(); - else + if (A.Mispreds != B.Mispreds) return A.Mispreds > B.Mispreds; + return A.To.Addr < B.To.Addr; }); // Remove non-symbol targets From f8c7fb499be6c2712be828ffe7378802dfd645a0 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 29 Oct 2021 17:27:38 -0700 Subject: [PATCH 213/946] [BOLT][NFC] Reduce includes with include-what-you-use Summary: Removed redundant includes with IWYU Test Plan: ninja bolt Reviewers: maksfb FBD32043568 --- bolt/include/bolt/Core/BinaryContext.h | 3 +-- bolt/include/bolt/Core/BinaryData.h | 1 - bolt/include/bolt/Core/DebugData.h | 3 --- bolt/include/bolt/Passes/AllocCombiner.h | 2 -- bolt/include/bolt/Rewrite/DWARFRewriter.h | 3 +-- bolt/lib/Core/BinaryFunction.cpp | 1 - bolt/lib/Core/DebugData.cpp | 7 ++++--- bolt/lib/Core/JumpTable.cpp | 1 - bolt/lib/Passes/IndirectCallPromotion.cpp | 1 - bolt/lib/Passes/ThreeWayBranch.cpp | 2 -- bolt/lib/Rewrite/DWARFRewriter.cpp | 4 +--- bolt/lib/Rewrite/MachORewriteInstance.cpp | 2 -- bolt/lib/Rewrite/RewriteInstance.cpp | 2 -- bolt/lib/Utils/Utils.cpp | 1 - 14 files changed, 7 insertions(+), 26 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index abcd2c3692055..c626af3a897d6 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -28,13 +28,12 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorOr.h" diff --git a/bolt/include/bolt/Core/BinaryData.h b/bolt/include/bolt/Core/BinaryData.h index 831f968bbfaea..01e1538f8a95e 100644 --- a/bolt/include/bolt/Core/BinaryData.h +++ b/bolt/include/bolt/Core/BinaryData.h @@ -16,7 +16,6 @@ #define BOLT_CORE_BINARY_DATA_H #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h index 832123fde38a0..761614c00f619 100644 --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -14,7 +14,6 @@ #ifndef BOLT_CORE_DEBUG_DATA_H #define BOLT_CORE_DEBUG_DATA_H -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/MC/MCDwarf.h" @@ -33,8 +32,6 @@ namespace llvm { -class DWARFAbbreviationDeclarationSet; - namespace bolt { class BinaryContext; diff --git a/bolt/include/bolt/Passes/AllocCombiner.h b/bolt/include/bolt/Passes/AllocCombiner.h index 810c265714193..21f1aa73d343e 100644 --- a/bolt/include/bolt/Passes/AllocCombiner.h +++ b/bolt/include/bolt/Passes/AllocCombiner.h @@ -13,8 +13,6 @@ namespace llvm { namespace bolt { -class DataflowInfoManager; -class FrameAnalysis; class AllocCombinerPass : public BinaryFunctionPass { /// Stats aggregating variables diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h index 55dda5d71fc63..d75ae8ade8516 100644 --- a/bolt/include/bolt/Rewrite/DWARFRewriter.h +++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h @@ -10,11 +10,10 @@ #define BOLT_REWRITE_DWARF_REWRITER_H #include "bolt/Core/DebugData.h" -#include "bolt/Rewrite/RewriteInstance.h" #include -#include #include #include +#include #include #include diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 18f521edff05f..0b175a8b47e9b 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -28,7 +28,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index 396bf8aebf52f..fde6227a02d6c 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -11,11 +11,10 @@ //===----------------------------------------------------------------------===// #include "bolt/Core/DebugData.h" -#include "bolt/Core/BinaryBasicBlock.h" -#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryContext.h" #include "bolt/Utils/Utils.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/LEB128.h" @@ -32,6 +31,8 @@ extern llvm::cl::opt Verbosity; } // namespace opts namespace llvm { +class MCSymbol; + namespace bolt { const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0}; diff --git a/bolt/lib/Core/JumpTable.cpp b/bolt/lib/Core/JumpTable.cpp index a3c273b890da3..91c73f67b87ee 100644 --- a/bolt/lib/Core/JumpTable.cpp +++ b/bolt/lib/Core/JumpTable.cpp @@ -14,7 +14,6 @@ #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/BinarySection.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #define DEBUG_TYPE "bolt" diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp index 4104cc9ce3078..dd1cc2024e8de 100644 --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -13,7 +13,6 @@ #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/BinaryFunctionCallGraph.h" #include "bolt/Passes/DataflowInfoManager.h" -#include "bolt/Utils/CommandLineOpts.h" #include "llvm/Support/CommandLine.h" #define DEBUG_TYPE "ICP" diff --git a/bolt/lib/Passes/ThreeWayBranch.cpp b/bolt/lib/Passes/ThreeWayBranch.cpp index 5ee2b66a9fd5c..445faba888ae5 100644 --- a/bolt/lib/Passes/ThreeWayBranch.cpp +++ b/bolt/lib/Passes/ThreeWayBranch.cpp @@ -12,8 +12,6 @@ #include "bolt/Passes/ThreeWayBranch.h" -#include - using namespace llvm; namespace llvm { diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp index 531f82499fc09..c44e548463cf7 100644 --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -11,6 +11,7 @@ #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/DebugData.h" #include "bolt/Core/ParallelUtilities.h" +#include "bolt/Rewrite/RewriteInstance.h" #include "bolt/Utils/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -20,11 +21,8 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp index e04c22282104c..c823349d8fd18 100644 --- a/bolt/lib/Rewrite/MachORewriteInstance.cpp +++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp @@ -19,10 +19,8 @@ #include "bolt/Rewrite/ExecutableFileMemoryManager.h" #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h" #include "bolt/Utils/Utils.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ToolOutputFile.h" diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 73288a32315be..934f79749e88d 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -35,10 +35,8 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/TargetRegistry.h" diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp index f94d286bc87b8..3d18da5d836d7 100644 --- a/bolt/lib/Utils/Utils.cpp +++ b/bolt/lib/Utils/Utils.cpp @@ -13,7 +13,6 @@ #include "bolt/Utils/Utils.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" From 2f9f9afa4e1281b4ac7c8ad36860a4e35e6f5070 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Fri, 21 Jan 2022 12:22:50 -0800 Subject: [PATCH 214/946] [mlir] Add polynomial approximation for atan and atan2 Implement a taylor series approximation for atan and add an atan2 lowering that uses atan's appromation. This includes tests for edge cases and tests for each quadrant. Reviewed By: NatashaKnk Differential Revision: https://reviews.llvm.org/D115682 --- .../Transforms/PolynomialApproximation.cpp | 134 +++++++++++++++++- .../Math/polynomial-approximation.mlir | 82 +++++++++++ .../math-polynomial-approx.mlir | 118 +++++++++++++++ 3 files changed, 331 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp index 9931e89647bcf..7d04ae7e3d34f 100644 --- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp +++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp @@ -278,6 +278,133 @@ Value makePolynomialCalculation(ImplicitLocOpBuilder &builder, } } // namespace +//----------------------------------------------------------------------------// +// AtanOp approximation. +//----------------------------------------------------------------------------// + +namespace { +struct AtanApproximation : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(math::AtanOp op, + PatternRewriter &rewriter) const final; +}; +} // namespace + +LogicalResult +AtanApproximation::matchAndRewrite(math::AtanOp op, + PatternRewriter &rewriter) const { + auto operand = op.getOperand(); + if (!getElementTypeOrSelf(operand).isF32()) + return rewriter.notifyMatchFailure(op, "unsupported operand type"); + + ArrayRef shape = vectorShape(op.getOperand()); + + ImplicitLocOpBuilder builder(op->getLoc(), rewriter); + auto one = broadcast(builder, f32Cst(builder, 1.0f), shape); + + // Remap the problem over [0.0, 1.0] by looking at the absolute value and the + // handling symmetry. + Value abs = builder.create(operand); + Value reciprocal = builder.create(one, abs); + Value compare = + builder.create(arith::CmpFPredicate::OLT, abs, reciprocal); + Value x = builder.create(compare, abs, reciprocal); + + // Perform the Taylor series approximation for atan over the range + // [-1.0, 1.0]. + auto n1 = broadcast(builder, f32Cst(builder, 0.14418283), shape); + auto n2 = broadcast(builder, f32Cst(builder, -0.34999234), shape); + auto n3 = broadcast(builder, f32Cst(builder, -0.01067831), shape); + auto n4 = broadcast(builder, f32Cst(builder, 1.00209986), shape); + + Value p = builder.create(x, n1, n2); + p = builder.create(x, p, n3); + p = builder.create(x, p, n4); + p = builder.create(x, p); + + // Remap the solution for over [0.0, 1.0] to [0.0, inf] + auto half_pi = broadcast(builder, f32Cst(builder, 1.57079632679f), shape); + Value sub = builder.create(half_pi, p); + Value select = builder.create(compare, p, sub); + + // Correct for signing of the input. + rewriter.replaceOpWithNewOp(op, select, operand); + return success(); +} + +//----------------------------------------------------------------------------// +// AtanOp approximation. +//----------------------------------------------------------------------------// + +namespace { +struct Atan2Approximation : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(math::Atan2Op op, + PatternRewriter &rewriter) const final; +}; +} // namespace + +LogicalResult +Atan2Approximation::matchAndRewrite(math::Atan2Op op, + PatternRewriter &rewriter) const { + auto y = op.getOperand(0); + auto x = op.getOperand(1); + if (!getElementTypeOrSelf(x).isF32()) + return rewriter.notifyMatchFailure(op, "unsupported operand type"); + + ImplicitLocOpBuilder builder(op->getLoc(), rewriter); + ArrayRef shape = vectorShape(op.getResult()); + + // Compute atan in the valid range. + auto div = builder.create(y, x); + auto atan = builder.create(div); + + // Determine what the atan would be for a 180 degree rotation. + auto zero = broadcast(builder, f32Cst(builder, 0.0f), shape); + auto pi = broadcast(builder, f32Cst(builder, 3.14159265359f), shape); + auto add_pi = builder.create(atan, pi); + auto sub_pi = builder.create(atan, pi); + auto atan_gt = + builder.create(arith::CmpFPredicate::OGT, atan, zero); + auto flipped_atan = builder.create(atan_gt, sub_pi, add_pi); + + // Determine whether to directly use atan or use the 180 degree flip + auto x_gt = builder.create(arith::CmpFPredicate::OGT, x, zero); + Value result = builder.create(x_gt, atan, flipped_atan); + + // Handle x = 0, y > 0 + Value x_zero = + builder.create(arith::CmpFPredicate::OEQ, x, zero); + Value y_gt = + builder.create(arith::CmpFPredicate::OGT, y, zero); + Value is_half_pi = builder.create(x_zero, y_gt); + auto half_pi = broadcast(builder, f32Cst(builder, 1.57079632679f), shape); + result = builder.create(is_half_pi, half_pi, result); + + // Handle x = 0, y < 0 + Value y_lt = + builder.create(arith::CmpFPredicate::OLT, y, zero); + Value is_negative_half_pi_pi = builder.create(x_zero, y_lt); + auto negative_half_pi_pi = + broadcast(builder, f32Cst(builder, -1.57079632679), shape); + result = builder.create(is_negative_half_pi_pi, negative_half_pi_pi, + result); + + // Handle x = 0, y = 0; + Value y_zero = + builder.create(arith::CmpFPredicate::OEQ, y, zero); + Value is_nan = builder.create(x_zero, y_zero); + Value cst_nan = broadcast(builder, f32FromBits(builder, 0x7fc00000), shape); + result = builder.create(is_nan, cst_nan, result); + + rewriter.replaceOp(op, result); + return success(); +} + //----------------------------------------------------------------------------// // TanhOp approximation. //----------------------------------------------------------------------------// @@ -1074,9 +1201,10 @@ RsqrtApproximation::matchAndRewrite(math::RsqrtOp op, void mlir::populateMathPolynomialApproximationPatterns( RewritePatternSet &patterns, const MathPolynomialApproximationOptions &options) { - patterns.add, + patterns.add, SinAndCosApproximation>( patterns.getContext()); if (options.enableAvx2) diff --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir index f388b84d83c8e..a40cc9c6f037a 100644 --- a/mlir/test/Dialect/Math/polynomial-approximation.mlir +++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir @@ -507,3 +507,85 @@ func @rsqrt_vector_2x16xf32(%arg0: vector<2x16xf32>) -> vector<2x16xf32> { %0 = math.rsqrt %arg0 : vector<2x16xf32> return %0 : vector<2x16xf32> } + +// CHECK-LABEL: @atan_scalar +// CHECK-DAG: %[[ONE:.+]] = arith.constant 1.000000e+00 +// CHECK-DAG: %[[N1:.+]] = arith.constant 0.144182831 +// CHECK-DAG: %[[N2:.+]] = arith.constant -0.349992335 +// CHECK-DAG: %[[N3:.+]] = arith.constant -0.0106783099 +// CHECK-DAG: %[[N4:.+]] = arith.constant 1.00209987 +// CHECK-DAG: %[[HALF_PI:.+]] = arith.constant 1.57079637 +// CHECK-DAG: %[[ABS:.+]] = math.abs %arg0 +// CHECK-DAG: %[[DIV:.+]] = arith.divf %cst, %[[ABS]] +// CHECK-DAG: %[[CMP:.+]] = arith.cmpf olt, %[[ABS]], %[[DIV]] +// CHECK-DAG: %[[SEL:.+]] = select %[[CMP]], %[[ABS]], %[[DIV]] +// CHECK-DAG: %[[P0:.+]] = math.fma %[[SEL]], %[[N1]], %[[N2]] +// CHECK-DAG: %[[P1:.+]] = math.fma %[[SEL]], %[[P0]], %[[N3]] +// CHECK-DAG: %[[P2:.+]] = math.fma %[[SEL]], %[[P1]], %[[N4]] +// CHECK-DAG: %[[P3:.+]] = arith.mulf %[[SEL]], %[[P2]] +// CHECK-DAG: %[[SUB:.+]] = arith.subf %[[HALF_PI]], %[[P3]] +// CHECK-DAG: %[[EST:.+]] = select %[[CMP]], %[[P3]], %[[SUB]] +// CHECK-DAG: %[[RES:.+]] = math.copysign %[[EST]], %arg0 +// CHECK: return %[[RES]] +func @atan_scalar(%arg0: f32) -> f32 { + %0 = math.atan %arg0 : f32 + return %0 : f32 +} + + +// CHECK-LABEL: @atan2_scalar + +// ATan approximation: +// CHECK-DAG: %[[ONE:.+]] = arith.constant 1.000000e+00 +// CHECK-DAG: %[[N1:.+]] = arith.constant 0.144182831 +// CHECK-DAG: %[[N2:.+]] = arith.constant -0.349992335 +// CHECK-DAG: %[[N3:.+]] = arith.constant -0.0106783099 +// CHECK-DAG: %[[N4:.+]] = arith.constant 1.00209987 +// CHECK-DAG: %[[HALF_PI:.+]] = arith.constant 1.57079637 +// CHECK-DAG: %[[RATIO:.+]] = arith.divf %arg0, %arg1 +// CHECK-DAG: %[[ABS:.+]] = math.abs %[[RATIO]] +// CHECK-DAG: %[[DIV:.+]] = arith.divf %cst, %[[ABS]] +// CHECK-DAG: %[[CMP:.+]] = arith.cmpf olt, %[[ABS]], %[[DIV]] +// CHECK-DAG: %[[SEL:.+]] = select %[[CMP]], %[[ABS]], %[[DIV]] +// CHECK-DAG: %[[P0:.+]] = math.fma %[[SEL]], %[[N1]], %[[N2]] +// CHECK-DAG: %[[P1:.+]] = math.fma %[[SEL]], %[[P0]], %[[N3]] +// CHECK-DAG: %[[P2:.+]] = math.fma %[[SEL]], %[[P1]], %[[N4]] +// CHECK-DAG: %[[P3:.+]] = arith.mulf %[[SEL]], %[[P2]] +// CHECK-DAG: %[[SUB:.+]] = arith.subf %[[HALF_PI]], %[[P3]] +// CHECK-DAG: %[[EST:.+]] = select %[[CMP]], %[[P3]], %[[SUB]] +// CHECK-DAG: %[[ATAN:.+]] = math.copysign %[[EST]], %[[RATIO]] + +// Handle the case of x < 0: +// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00 +// CHECK-DAG: %[[PI:.+]] = arith.constant 3.14159274 +// CHECK-DAG: %[[ADD_PI:.+]] = arith.addf %[[ATAN]], %[[PI]] +// CHECK-DAG: %[[SUB_PI:.+]] = arith.subf %[[ATAN]], %[[PI]] +// CHECK-DAG: %[[CMP_ATAN:.+]] = arith.cmpf ogt, %[[ATAN]], %[[ZERO]] +// CHECK-DAG: %[[ATAN_ADJUST:.+]] = select %[[CMP_ATAN]], %[[SUB_PI]], %[[ADD_PI]] +// CHECK-DAG: %[[X_NEG:.+]] = arith.cmpf ogt, %arg1, %[[ZERO]] +// CHECK-DAG: %[[ATAN_EST:.+]] = select %[[X_NEG]], %[[ATAN]], %[[ATAN_ADJUST]] + +// Handle PI / 2 edge case: +// CHECK-DAG: %[[X_ZERO:.+]] = arith.cmpf oeq, %arg1, %[[ZERO]] +// CHECK-DAG: %[[Y_POS:.+]] = arith.cmpf ogt, %arg0, %[[ZERO]] +// CHECK-DAG: %[[IS_HALF_PI:.+]] = arith.andi %[[X_ZERO]], %[[Y_POS]] +// CHECK-DAG: %[[EDGE1:.+]] = select %[[IS_HALF_PI]], %[[HALF_PI]], %[[ATAN_EST]] + +// Handle -PI / 2 edge case: +// CHECK-DAG: %[[NEG_HALF_PI:.+]] = arith.constant -1.57079637 +// CHECK-DAG: %[[Y_NEG:.+]] = arith.cmpf olt, %arg0, %[[ZERO]] +// CHECK-DAG: %[[IS_NEG_HALF_PI:.+]] = arith.andi %[[X_ZERO]], %[[Y_NEG]] +// CHECK-DAG: %[[EDGE2:.+]] = select %[[IS_NEG_HALF_PI]], %[[NEG_HALF_PI]], %[[EDGE1]] + +// Handle Nan edgecase: +// CHECK-DAG: %[[Y_ZERO:.+]] = arith.cmpf oeq, %arg0, %[[ZERO]] +// CHECK-DAG: %[[X_Y_ZERO:.+]] = arith.andi %[[X_ZERO]], %[[Y_ZERO]] +// CHECK-DAG: %[[NAN:.+]] = arith.constant 0x7FC00000 +// CHECK-DAG: %[[EDGE3:.+]] = select %[[X_Y_ZERO]], %[[NAN]], %[[EDGE2]] +// CHECK: return %[[EDGE3]] + +func @atan2_scalar(%arg0: f32, %arg1: f32) -> f32 { + %0 = math.atan2 %arg0, %arg1 : f32 + return %0 : f32 +} + diff --git a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir index b3c41057fa302..5a41d56dd42bd 100644 --- a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir +++ b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir @@ -371,6 +371,122 @@ func @cos() { return } +// -------------------------------------------------------------------------- // +// Atan. +// -------------------------------------------------------------------------- // + +func @atan() { + // CHECK: -0.785184 + %0 = arith.constant -1.0 : f32 + %atan_0 = math.atan %0 : f32 + vector.print %atan_0 : f32 + + // CHECK: 0.785184 + %1 = arith.constant 1.0 : f32 + %atan_1 = math.atan %1 : f32 + vector.print %atan_1 : f32 + + // CHECK: -0.463643 + %2 = arith.constant -0.5 : f32 + %atan_2 = math.atan %2 : f32 + vector.print %atan_2 : f32 + + // CHECK: 0.463643 + %3 = arith.constant 0.5 : f32 + %atan_3 = math.atan %3 : f32 + vector.print %atan_3 : f32 + + // CHECK: 0 + %4 = arith.constant 0.0 : f32 + %atan_4 = math.atan %4 : f32 + vector.print %atan_4 : f32 + + // CHECK: -1.10715 + %5 = arith.constant -2.0 : f32 + %atan_5 = math.atan %5 : f32 + vector.print %atan_5 : f32 + + // CHECK: 1.10715 + %6 = arith.constant 2.0 : f32 + %atan_6 = math.atan %6 : f32 + vector.print %atan_6 : f32 + + return +} + + +// -------------------------------------------------------------------------- // +// Atan2. +// -------------------------------------------------------------------------- // + +func @atan2() { + %zero = arith.constant 0.0 : f32 + %one = arith.constant 1.0 : f32 + %two = arith.constant 2.0 : f32 + %neg_one = arith.constant -1.0 : f32 + %neg_two = arith.constant -2.0 : f32 + + // CHECK: 0 + %atan2_0 = math.atan2 %zero, %one : f32 + vector.print %atan2_0 : f32 + + // CHECK: 1.5708 + %atan2_1 = math.atan2 %one, %zero : f32 + vector.print %atan2_1 : f32 + + // CHECK: 3.14159 + %atan2_2 = math.atan2 %zero, %neg_one : f32 + vector.print %atan2_2 : f32 + + // CHECK: -1.5708 + %atan2_3 = math.atan2 %neg_one, %zero : f32 + vector.print %atan2_3 : f32 + + // CHECK: nan + %atan2_4 = math.atan2 %zero, %zero : f32 + vector.print %atan2_4 : f32 + + // CHECK: 1.10715 + %atan2_5 = math.atan2 %two, %one : f32 + vector.print %atan2_5 : f32 + + // CHECK: 2.03444 + %x6 = arith.constant -1.0 : f32 + %y6 = arith.constant 2.0 : f32 + %atan2_6 = math.atan2 %two, %neg_one : f32 + vector.print %atan2_6 : f32 + + // CHECK: -2.03444 + %atan2_7 = math.atan2 %neg_two, %neg_one : f32 + vector.print %atan2_7 : f32 + + // CHECK: -1.10715 + %atan2_8 = math.atan2 %neg_two, %one : f32 + vector.print %atan2_8 : f32 + + // CHECK: 0.463643 + %atan2_9 = math.atan2 %one, %two : f32 + vector.print %atan2_9 : f32 + + // CHECK: 2.67795 + %x10 = arith.constant -2.0 : f32 + %y10 = arith.constant 1.0 : f32 + %atan2_10 = math.atan2 %one, %neg_two : f32 + vector.print %atan2_10 : f32 + + // CHECK: -2.67795 + %x11 = arith.constant -2.0 : f32 + %y11 = arith.constant -1.0 : f32 + %atan2_11 = math.atan2 %neg_one, %neg_two : f32 + vector.print %atan2_11 : f32 + + // CHECK: -0.463643 + %atan2_12 = math.atan2 %neg_one, %two : f32 + vector.print %atan2_12 : f32 + + return +} + func @main() { call @tanh(): () -> () @@ -382,5 +498,7 @@ func @main() { call @expm1(): () -> () call @sin(): () -> () call @cos(): () -> () + call @atan() : () -> () + call @atan2() : () -> () return } From 0d9cc6995401e629f63b1e43e2e6e8bf73c4edd7 Mon Sep 17 00:00:00 2001 From: Daniel Thornburgh Date: Fri, 21 Jan 2022 19:26:52 +0000 Subject: [PATCH 215/946] [Support] Update missed tests with lazy caching behavior. Fixes test failures created by https://reviews.llvm.org/D117589. Reviewed By: zhuhan0 Differential Revision: https://reviews.llvm.org/D117915 --- llvm/test/tools/gold/X86/cache.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/gold/X86/cache.ll b/llvm/test/tools/gold/X86/cache.ll index 5ab5563025587..fd3e7ba876830 100644 --- a/llvm/test/tools/gold/X86/cache.ll +++ b/llvm/test/tools/gold/X86/cache.ll @@ -8,8 +8,8 @@ ; RUN: --plugin-opt=cache-dir=%t.cache \ ; RUN: -o %t3.o %t2.o %t.o -; We should just get the timestamp file -; RUN: ls %t.cache | count 1 +; Since nothing was added to the cache, there shouldn't be a timestamp file yet. +; RUN: not ls %t.cache ; Verify that enabling caching is working with module with hash. From cd4e600f5f5cedd092c8ff19c208897034494f3d Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Fri, 21 Jan 2022 21:00:39 +0000 Subject: [PATCH 216/946] [Sema] Warn about printf %n on Android and Fuchsia The `printf` specifier `%n` is not supported on Android's libc and will soon be removed from Fuchsia's Reviewed By: enh Differential Revision: https://reviews.llvm.org/D117611 --- clang/include/clang/AST/FormatString.h | 3 +- .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/lib/AST/OSLog.cpp | 4 +- clang/lib/AST/PrintfFormatString.cpp | 2 +- clang/lib/Sema/SemaChecking.cpp | 24 ++++-- clang/test/FixIt/format.m | 16 +++- clang/test/Sema/format-strings.c | 84 ++++++++++++++++++- 7 files changed, 122 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h index 5a407b9261922..d7933382f13d6 100644 --- a/clang/include/clang/AST/FormatString.h +++ b/clang/include/clang/AST/FormatString.h @@ -726,7 +726,8 @@ class FormatStringHandler { virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) { + unsigned specifierLen, + const TargetInfo &Target) { return true; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 19ce0ffcec51d..88e430d8eb09f 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9369,6 +9369,9 @@ def warn_printf_ObjCflags_without_ObjCConversion: Warning< def warn_printf_invalid_objc_flag: Warning< "'%0' is not a valid object format flag">, InGroup; +def warn_printf_narg_not_supported : Warning< + "'%%n' specifier not supported on this platform">, + InGroup; def warn_scanf_scanlist_incomplete : Warning< "no closing ']' for '%%[' in scanf format string">, InGroup; diff --git a/clang/lib/AST/OSLog.cpp b/clang/lib/AST/OSLog.cpp index 094c0102854b1..4cc5def0651f7 100644 --- a/clang/lib/AST/OSLog.cpp +++ b/clang/lib/AST/OSLog.cpp @@ -56,8 +56,8 @@ class OSLogFormatStringHandler } bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, - const char *StartSpecifier, - unsigned SpecifierLen) override { + const char *StartSpecifier, unsigned SpecifierLen, + const TargetInfo &) override { if (!FS.consumesDataArgument() && FS.getConversionSpecifier().getKind() != clang::analyze_format_string::ConversionSpecifier::PrintErrno) diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp index a286db3b9b9f1..c6c41abc7e9a5 100644 --- a/clang/lib/AST/PrintfFormatString.cpp +++ b/clang/lib/AST/PrintfFormatString.cpp @@ -428,7 +428,7 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, continue; // We have a format specifier. Pass it to the callback. if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), - I - FSR.getStart())) + I - FSR.getStart(), Target)) return true; } assert(I == E && "Format string not exhausted"); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 27653464110aa..e2b78fa212b81 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -499,7 +499,8 @@ class EstimateSizeFormatHandler 1 /* null byte always written by sprintf */) {} bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, - const char *, unsigned SpecifierLen) override { + const char *, unsigned SpecifierLen, + const TargetInfo &) override { const size_t FieldWidth = computeFieldWidth(FS); const size_t Precision = computePrecision(FS); @@ -8909,8 +8910,8 @@ class CheckPrintfHandler : public CheckFormatHandler { void handleInvalidMaskType(StringRef MaskType) override; bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, - const char *startSpecifier, - unsigned specifierLen) override; + const char *startSpecifier, unsigned specifierLen, + const TargetInfo &Target) override; bool checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, const char *StartSpecifier, unsigned SpecifierLen, @@ -9169,11 +9170,9 @@ bool CheckPrintfHandler::checkForCStrMembers( return false; } -bool -CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier - &FS, - const char *startSpecifier, - unsigned specifierLen) { +bool CheckPrintfHandler::HandlePrintfSpecifier( + const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, + unsigned specifierLen, const TargetInfo &Target) { using namespace analyze_format_string; using namespace analyze_printf; @@ -9305,6 +9304,15 @@ CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier } } + const llvm::Triple &Triple = Target.getTriple(); + if (CS.getKind() == ConversionSpecifier::nArg && + (Triple.isAndroid() || Triple.isOSFuchsia())) { + EmitFormatDiagnostic(S.PDiag(diag::warn_printf_narg_not_supported), + getLocationOfByte(CS.getStart()), + /*IsStringLocation*/ false, + getSpecifierRange(startSpecifier, specifierLen)); + } + // Check for invalid use of field width if (!FS.hasValidFieldWidth()) { HandleInvalidAmount(FS, FS.getFieldWidth(), /* field width */ 0, diff --git a/clang/test/FixIt/format.m b/clang/test/FixIt/format.m index 0d173846d0ada..af2d2ce797a49 100644 --- a/clang/test/FixIt/format.m +++ b/clang/test/FixIt/format.m @@ -241,8 +241,13 @@ void testSizeTypes() { // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:14}:"%f" short x; +#if !defined(__ANDROID__) && !defined(__Fuchsia__) printf("%zn", &x); // expected-warning-re{{format specifies type 'ssize_t *' (aka '{{.+}}') but the argument has type 'short *'}} - // PrintfSpecifier::fixType doesn't handle %n, so a fix-it is not emitted, +#else + printf("%zn", &x); // expected-warning-re{{format specifies type 'ssize_t *' (aka '{{.+}}') but the argument has type 'short *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} +#endif // !defined(__ANDROID__) && !defined(__Fuchsia__) + // PrintfSpecifier::fixType doesn't handle %n, so a fix-it is not emitted, // see the comment in PrintfSpecifier::fixType in PrintfFormatString.cpp. } @@ -269,12 +274,21 @@ void testPtrDiffTypes() { // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:14}:"%f" ptrdiff_t p3 = 0; +#if !defined(__ANDROID__) && !defined(__Fuchsia__) printf("%tn", &p3); // No warning. +#else + printf("%tn", &p3); // expected-warning{{'%n' specifier not supported on this platform}} +#endif // !defined(__ANDROID__) && !defined(__Fuchsia__) short x; +#if !defined(__ANDROID__) && !defined(__Fuchsia__) printf("%tn", &x); // expected-warning-re{{format specifies type 'ptrdiff_t *' (aka '{{.+}}') but the argument has type 'short *'}} // PrintfSpecifier::fixType doesn't handle %n, so a fix-it is not emitted, // see the comment in PrintfSpecifier::fixType in PrintfFormatString.cpp. +#else + printf("%tn", &x); // expected-warning-re{{format specifies type 'ptrdiff_t *' (aka '{{.+}}') but the argument has type 'short *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} +#endif // !defined(__ANDROID__) && !defined(__Fuchsia__) } void testEnum() { diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c index bbe47636ebb7d..bb5c4c4d1de7f 100644 --- a/clang/test/Sema/format-strings.c +++ b/clang/test/Sema/format-strings.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs %s // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -fno-signed-char %s +// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s +// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-linux-android %s #include #include @@ -118,6 +120,8 @@ void check_conditional_literal(const char* s, int i) { printf(i ? "%i\n" : "%i %s %s\n", i, s); // expected-warning{{more '%' conversions than data arguments}} } +#if !defined(__ANDROID__) && !defined(__Fuchsia__) + void check_writeback_specifier() { int x; @@ -154,6 +158,45 @@ void check_writeback_specifier() // expected-note@-1{{did you mean to use 'll'?}} } +#else + +void check_writeback_specifier() +{ + int x; + printf("%n", &x); // expected-warning{{'%n' specifier not supported on this platform}} + + printf("%hhn", (signed char*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%hhn", (char*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%hhn", (unsigned char*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%hhn", (int*)0); // expected-warning{{format specifies type 'signed char *' but the argument has type 'int *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + + printf("%hn", (short*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%hn", (unsigned short*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%hn", (int*)0); // expected-warning{{format specifies type 'short *' but the argument has type 'int *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + + printf("%n", (int*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%n", (unsigned int*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%n", (char*)0); // expected-warning{{format specifies type 'int *' but the argument has type 'char *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + + printf("%ln", (long*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%ln", (unsigned long*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%ln", (int*)0); // expected-warning{{format specifies type 'long *' but the argument has type 'int *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + + printf("%lln", (long long*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%lln", (unsigned long long*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%lln", (int*)0); // expected-warning{{format specifies type 'long long *' but the argument has type 'int *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + + printf("%qn", (long long*)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%qn", (unsigned long long*)0); // expected-warning{{'%n' specifier not supported on this platform}} +} + +#endif // !defined(__ANDROID__) && !defined(__Fuchsia__) + void check_invalid_specifier(FILE* fp, char *buf) { printf("%s%lb%d","unix",10,20); // expected-warning {{invalid conversion specifier 'b'}} expected-warning {{data argument not used by format string}} @@ -386,14 +429,28 @@ void bug7377_bad_length_mod_usage() { // Bad flag usage printf("%#p", (void *) 0); // expected-warning{{flag '#' results in undefined behavior with 'p' conversion specifier}} printf("%0d", -1); // no-warning + printf("%-p", (void *) 0); // no-warning +#if !defined(__ANDROID__) && !defined(__Fuchsia__) printf("%#n", (int *) 0); // expected-warning{{flag '#' results in undefined behavior with 'n' conversion specifier}} printf("%-n", (int *) 0); // expected-warning{{flag '-' results in undefined behavior with 'n' conversion specifier}} - printf("%-p", (void *) 0); // no-warning +#else + printf("%#n", (int *) 0); // expected-warning{{flag '#' results in undefined behavior with 'n' conversion specifier}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + printf("%-n", (int *) 0); // expected-warning{{flag '-' results in undefined behavior with 'n' conversion specifier}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} +#endif // !defined(__ANDROID__) && !defined(__Fuchsia__) // Bad optional amount use printf("%.2c", 'a'); // expected-warning{{precision used with 'c' conversion specifier, resulting in undefined behavior}} +#if !defined(__ANDROID__) && !defined(__Fuchsia__) + printf("%1n", (int *) 0); // expected-warning{{field width used with 'n' conversion specifier, resulting in undefined behavior}} + printf("%.9n", (int *) 0); // expected-warning{{precision used with 'n' conversion specifier, resulting in undefined behavior}} +#else printf("%1n", (int *) 0); // expected-warning{{field width used with 'n' conversion specifier, resulting in undefined behavior}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} printf("%.9n", (int *) 0); // expected-warning{{precision used with 'n' conversion specifier, resulting in undefined behavior}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} +#endif // #if !defined(__ANDROID__) && !defined(__Fuchsia__) // Ignored flags printf("% +f", 1.23); // expected-warning{{flag ' ' is ignored when flag '+' is present}} @@ -644,6 +701,8 @@ void test14_zed(int *p) { test14_bar("%", "%d", p); // expected-warning{{incomplete format specifier}} } +#if !defined(__ANDROID__) && !defined(__Fuchsia__) + void test_qualifiers(volatile int *vip, const int *cip, const volatile int *cvip) { printf("%n", cip); // expected-warning{{format specifies type 'int *' but the argument has type 'const int *'}} @@ -660,6 +719,29 @@ void test_qualifiers(volatile int *vip, const int *cip, printf("%n", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}} } +#else + +void test_qualifiers(volatile int *vip, const int *cip, + const volatile int *cvip) { + printf("%n", cip); // expected-warning{{format specifies type 'int *' but the argument has type 'const int *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + printf("%n", cvip); // expected-warning{{format specifies type 'int *' but the argument has type 'const volatile int *'}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} + + printf("%n", vip); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%p", cip); // No warning. + printf("%p", cvip); // No warning. + + + typedef int* ip_t; + typedef const int* cip_t; + printf("%n", (ip_t)0); // expected-warning{{'%n' specifier not supported on this platform}} + printf("%n", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}} + // expected-warning@-1 {{'%n' specifier not supported on this platform}} +} + +#endif // #if !defined(__ANDROID__) && !defined(__Fuchsia__) + #pragma GCC diagnostic ignored "-Wformat-nonliteral" #pragma GCC diagnostic warning "-Wformat-security" // From 0379459fc5860a9c34d5592d30f5834afbcd6b75 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 21 Jan 2022 12:31:41 -0800 Subject: [PATCH 217/946] [RISCV] Strengthen a SDTypeProfile. Fix formatting. --- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9745c13863823..28cb8fc413793 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -216,17 +216,17 @@ def riscv_zext_vl : SDNode<"RISCVISD::VZEXT_VL", SDT_RISCVVEXTEND_VL>; def riscv_trunc_vector_vl : SDNode<"RISCVISD::TRUNCATE_VECTOR_VL", SDTypeProfile<1, 3, [SDTCisVec<0>, - SDTCisVec<1>, + SDTCisSameNumEltsAs<0, 1>, SDTCisSameNumEltsAs<0, 2>, SDTCVecEltisVT<2, i1>, SDTCisVT<3, XLenVT>]>>; def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>, - SDTCisSameNumEltsAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisSameNumEltsAs<1, 3>, - SDTCVecEltisVT<3, i1>, - SDTCisVT<4, XLenVT>]>; + SDTCisSameNumEltsAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisSameNumEltsAs<1, 3>, + SDTCVecEltisVT<3, i1>, + SDTCisVT<4, XLenVT>]>; def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>; From 4f8ea3c84f3de6ddb754ad339c4672c7e3b7fc74 Mon Sep 17 00:00:00 2001 From: Muiez Ahmed Date: Fri, 21 Jan 2022 16:18:46 -0500 Subject: [PATCH 218/946] [SystemZ][z/OS][NFC] Remove extra symbol --- libcxx/include/__locale | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 6181f2539475d..98445bd2d8f40 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -512,8 +512,7 @@ public: # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT #elif defined(__MVS__) - static const mask __regex_word = 0x8000; -# if defined(__NATIVE_ASCII_F)` +# if defined(__NATIVE_ASCII_F) typedef unsigned int mask; static const mask space = _ISSPACE_A; static const mask print = _ISPRINT_A; @@ -538,6 +537,7 @@ public: static const mask xdigit = __ISXDIGIT; static const mask blank = __ISBLANK; # endif + static const mask __regex_word = 0x8000; #else # error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE? #endif From d84d1135d80c1dead6564347943ba56eed5aac3b Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Wed, 17 Nov 2021 15:05:58 -0800 Subject: [PATCH 219/946] Emit swift5 reflection section data in dsym bundle generated by dsymutil in the Dwarf section. Add support for Swift reflection metadata to dsymutil. This patch adds support for copying Swift reflection metadata (__swift5_.* sections) from .o files to into the symbol-rich binary in the output .dSYM. The functionality is automatically enabled only if a .o file has reflection metadata sections and the binary doesn't. When copying dsymutil moves the section from the __TEXT segment to the __DWARF segment. rdar://76973336 https://reviews.llvm.org/D115007 --- llvm/include/llvm/BinaryFormat/Swift.def | 26 + llvm/include/llvm/BinaryFormat/Swift.h | 24 + llvm/include/llvm/DWARFLinker/DWARFStreamer.h | 8 +- llvm/include/llvm/MC/MCContext.h | 10 +- llvm/include/llvm/MC/MCObjectFileInfo.h | 13 + llvm/include/llvm/Object/MachO.h | 4 + llvm/include/llvm/Object/ObjectFile.h | 6 + llvm/lib/DWARFLinker/DWARFStreamer.cpp | 20 +- llvm/lib/MC/MCContext.cpp | 8 +- llvm/lib/MC/MCObjectFileInfo.cpp | 11 + llvm/lib/Object/MachOObjectFile.cpp | 12 + llvm/test/tools/dsymutil/Inputs/main.yaml | 886 ++++++++++++++++++ .../dsymutil/Inputs/reflection_metadata.yaml | 436 +++++++++ llvm/test/tools/dsymutil/Inputs/test.yaml | 254 +++++ llvm/test/tools/dsymutil/reflection-dump.test | 42 + llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 86 +- 16 files changed, 1835 insertions(+), 11 deletions(-) create mode 100644 llvm/include/llvm/BinaryFormat/Swift.def create mode 100644 llvm/include/llvm/BinaryFormat/Swift.h create mode 100644 llvm/test/tools/dsymutil/Inputs/main.yaml create mode 100644 llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml create mode 100644 llvm/test/tools/dsymutil/Inputs/test.yaml create mode 100644 llvm/test/tools/dsymutil/reflection-dump.test diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def new file mode 100644 index 0000000000000..39931bec70e57 --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/Swift.def @@ -0,0 +1,26 @@ +//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Macros for running through Swift enumerators. +// +//===----------------------------------------------------------------------===// + +#if !(defined HANDLE_SWIFT_SECTION) +#error "Missing macro definition of HANDLE_SWIFT_SECTION" +#endif + +#ifndef HANDLE_SWIFT_SECTION +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) +#endif + +HANDLE_SWIFT_SECTION(Fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd") +HANDLE_SWIFT_SECTION(Assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty") +HANDLE_SWIFT_SECTION(Builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn") +HANDLE_SWIFT_SECTION(Capture, "__swift5_capture", "swift5_capture", ".sw5cptr") +HANDLE_SWIFT_SECTION(Typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf") +HANDLE_SWIFT_SECTION(Reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst") diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h new file mode 100644 index 0000000000000..63bbfe08c86d4 --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/Swift.h @@ -0,0 +1,24 @@ +//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#ifndef LLVM_BINARYFORMAT_SWIFT_H +#define LLVM_BINARYFORMAT_SWIFT_H + +namespace llvm { +namespace swift { + +enum Swift5ReflectionSectionKind { +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND, +#include "llvm/BinaryFormat/Swift.def" +#undef HANDLE_SWIFT_SECTION + Unknown, + Last = Unknown +}; +} // end of namespace swift +} // end of namespace llvm + +#endif diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h index 9a5c6bcaf83f3..8e845ee91b9f7 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h @@ -9,6 +9,7 @@ #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H #define LLVM_DWARFLINKER_DWARFSTREAMER_H +#include "llvm/BinaryFormat/Swift.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/DWARFLinker/DWARFLinker.h" @@ -48,7 +49,7 @@ class DwarfStreamer : public DwarfEmitter { : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator), ErrorHandler(Error), WarningHandler(Warning) {} - bool init(Triple TheTriple); + bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName); /// Dump the file to the disk. void finish(); @@ -85,6 +86,11 @@ class DwarfStreamer : public DwarfEmitter { /// Emit the swift_ast section stored in \p Buffer. void emitSwiftAST(StringRef Buffer); + /// Emit the swift reflection section stored in \p Buffer. + void emitSwiftReflectionSection( + llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, + StringRef Buffer, uint32_t Alignment, uint32_t Size); + /// Emit debug_ranges for \p FuncRange by translating the /// original \p Entries. void emitRangesEntries( diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 88d86d5b675ac..d2307d6922780 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -80,6 +80,10 @@ namespace llvm { private: Environment Env; + /// The name of the Segment where Swift5 Reflection Section data will be + /// outputted + StringRef Swift5ReflectionSegmentName; + /// The triple for this object. Triple TT; @@ -399,13 +403,17 @@ namespace llvm { const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI, const SourceMgr *Mgr = nullptr, MCTargetOptions const *TargetOpts = nullptr, - bool DoAutoReset = true); + bool DoAutoReset = true, + StringRef Swift5ReflSegmentName = {}); MCContext(const MCContext &) = delete; MCContext &operator=(const MCContext &) = delete; ~MCContext(); Environment getObjectFileType() const { return Env; } + const StringRef &getSwift5ReflectionSegmentName() const { + return Swift5ReflectionSegmentName; + } const Triple &getTargetTriple() const { return TT; } const SourceMgr *getSourceManager() const { return SrcMgr; } diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 5e0cccaba77fa..1b4804db783b4 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/VersionTuple.h" @@ -228,6 +229,10 @@ class MCObjectFileInfo { MCSection *ReadOnly8Section = nullptr; MCSection *ReadOnly16Section = nullptr; + // Swift5 Reflection Data Sections + std::array + Swift5ReflectionSections = {}; + public: void initMCObjectFileInfo(MCContext &MCCtx, bool PIC, bool LargeCodeModel = false); @@ -423,6 +428,14 @@ class MCObjectFileInfo { bool isPositionIndependent() const { return PositionIndependent; } + // Swift5 Reflection Data Sections + MCSection *getSwift5ReflectionSection( + llvm::swift::Swift5ReflectionSectionKind ReflSectionKind) { + return ReflSectionKind != llvm::swift::Swift5ReflectionSectionKind::Unknown + ? Swift5ReflectionSections[ReflSectionKind] + : nullptr; + } + private: bool PositionIndependent = false; MCContext *Ctx = nullptr; diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index ede742c47f971..09b6454bb0c14 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -22,6 +22,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" @@ -583,6 +584,9 @@ class MachOObjectFile : public ObjectFile { StringRef mapDebugSectionName(StringRef Name) const override; + llvm::swift::Swift5ReflectionSectionKind + mapReflectionSectionNameToEnumValue(StringRef SectionName) const override; + bool hasPageZeroSegment() const { return HasPageZeroSegment; } static bool classof(const Binary *v) { diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 12704b1fc88e7..29919db772f0e 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -18,6 +18,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/SymbolicFile.h" @@ -290,6 +291,11 @@ class ObjectFile : public SymbolicFile { virtual void getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const = 0; + virtual llvm::swift::Swift5ReflectionSectionKind + mapReflectionSectionNameToEnumValue(StringRef SectionName) const { + return llvm::swift::Swift5ReflectionSectionKind::Unknown; + }; + Expected getSymbolValue(DataRefImpl Symb) const; public: diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp index 1ab6ead3b5f66..7f9b9a9bc793c 100644 --- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -27,7 +27,8 @@ namespace llvm { -bool DwarfStreamer::init(Triple TheTriple) { +bool DwarfStreamer::init(Triple TheTriple, + StringRef Swift5ReflectionSegmentName) { std::string ErrorStr; std::string TripleName; StringRef Context = "dwarf streamer init"; @@ -54,8 +55,9 @@ bool DwarfStreamer::init(Triple TheTriple) { if (!MSTI) return error("no subtarget info for target " + TripleName, Context), false; - MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get())); - MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false)); + MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr, + nullptr, true, Swift5ReflectionSegmentName)); + MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false)); MC->setObjectFileInfo(MOFI.get()); MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions); @@ -302,6 +304,18 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) { MS->emitBytes(Buffer); } +void DwarfStreamer::emitSwiftReflectionSection( + llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, StringRef Buffer, + uint32_t Alignment, uint32_t Size) { + MCSection *ReflectionSection = + MOFI->getSwift5ReflectionSection(ReflSectionKind); + if (ReflectionSection == nullptr) + return; + ReflectionSection->setAlignment(Align(Alignment)); + MS->SwitchSection(ReflectionSection); + MS->emitBytes(Buffer); +} + /// Emit the debug_range section contents for \p FuncRange by /// translating the original \p Entries. The debug_range section /// format is totally trivial, consisting just of pairs of address diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 7f639e9c408fe..eafcee1e0607b 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &, MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCSubtargetInfo *msti, const SourceMgr *mgr, MCTargetOptions const *TargetOpts, - bool DoAutoReset) - : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr), - DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti), - Symbols(Allocator), UsedNames(Allocator), + bool DoAutoReset, StringRef Swift5ReflSegmentName) + : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple), + SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler), + MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator), InlineAsmUsedLabelNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), AutoReset(DoAutoReset), TargetOptions(TargetOpts) { diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index d7f85f793c55f..77b0b0ee687cb 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -299,6 +299,17 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { RemarksSection = Ctx->getMachOSection( "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + // The architecture of dsymutil makes it very difficult to copy the Swift + // reflection metadata sections into the __TEXT segment, so dsymutil creates + // these sections in the __DWARF segment instead. + if (!Ctx->getSwift5ReflectionSegmentName().empty()) { +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \ + Swift5ReflectionSections[llvm::swift::Swift5ReflectionSectionKind::KIND] = \ + Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(), \ + MACHO, 0, SectionKind::getMetadata()); +#include "llvm/BinaryFormat/Swift.def" + } + TLSExtraDataSection = TLSTLVSection; } diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 42e257516f4e0..83bc74ff31c40 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -4765,3 +4766,14 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) { Path.str().c_str()); return ObjectPaths; } + +llvm::swift::Swift5ReflectionSectionKind +MachOObjectFile::mapReflectionSectionNameToEnumValue( + StringRef SectionName) const { +#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \ + .Case(MACHO, llvm::swift::Swift5ReflectionSectionKind::KIND) + return StringSwitch(SectionName) +#include "llvm/BinaryFormat/Swift.def" + .Default(llvm::swift::Swift5ReflectionSectionKind::Unknown); +#undef HANDLE_SWIFT_SECTION +} diff --git a/llvm/test/tools/dsymutil/Inputs/main.yaml b/llvm/test/tools/dsymutil/Inputs/main.yaml new file mode 100644 index 0000000000000..d81931d7861c3 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/main.yaml @@ -0,0 +1,886 @@ +# How to generate this file: +# 1. First take a swift file and run xcrun swiftc -g -v test.swift +# reflection_metadata.swift, make sure the two swift files are in a short path +# like /tmp/ + +# 2. Now you can see what the driver does, generate the object files in the +# tmp directory and link them to create the input binary + +# 3. Run obj2yaml on the input binary to create a yaml file and strip out the +# swift5 reflection sections from the load commands in the text segment + +# 4. I ran delta to reduce this file. + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x2 + ncmds: 18 + sizeofcmds: 2848 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 952 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 11 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100003EB0 + size: 336 + offset: 0x3EB0 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 392 + segname: __DATA_CONST + vmaddr: 4294983680 + vmsize: 16384 + fileoff: 16384 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 4 + flags: 16 + Sections: + - sectname: __got + segname: __DATA_CONST + addr: 0x100004000 + size: 48 + offset: 0x4000 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x6 + reserved1: 0x11 + reserved2: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 392 + segname: __DATA + vmaddr: 4295000064 + vmsize: 16384 + fileoff: 32768 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 4 + flags: 0 + Sections: + - sectname: __la_symbol_ptr + segname: __DATA + addr: 0x100008000 + size: 384 + offset: 0x8088 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4295016448 + vmsize: 32768 + fileoff: 49152 + filesize: 23584 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 49152 + rebase_size: 64 + bind_off: 49216 + bind_size: 216 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 49432 + lazy_bind_size: 600 + export_off: 50032 + export_size: 1000 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 51136 + nsyms: 638 + stroff: 61504 + strsize: 11232 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 560 + iextdefsym: 560 + nextdefsym: 52 + iundefsym: 612 + nundefsym: 26 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 61344 + nindirectsyms: 40 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + - cmd: LC_UUID + cmdsize: 24 + uuid: AA0A51FA-8B29-3A7B-85AA-FA6A457B2211 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 786432 + sdk: 786688 + ntools: 1 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 9376 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 14942208 + compatibility_version: 65536 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 85917696 + compatibility_version: 65536 + - cmd: LC_LOAD_DYLIB + cmdsize: 64 + dylib: + name: 24 + timestamp: 2 + current_version: 85196845 + compatibility_version: 65536 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 51032 + datasize: 104 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 51136 + datasize: 0 +LinkEditData: + NameList: + - n_strx: 2355 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294976208 + - n_strx: 2398 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976224 + - n_strx: 2440 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976240 + - n_strx: 2479 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976256 + - n_strx: 2509 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294976272 + - n_strx: 2570 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976320 + - n_strx: 2590 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294976512 + - n_strx: 2635 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294976576 + - n_strx: 2683 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294976608 + - n_strx: 2731 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976640 + - n_strx: 2751 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976656 + - n_strx: 2775 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976704 + - n_strx: 2791 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294976720 + - n_strx: 2814 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976752 + - n_strx: 2838 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976768 + - n_strx: 2873 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294976784 + - n_strx: 2906 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294976832 + - n_strx: 2926 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294977104 + - n_strx: 2946 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294977200 + - n_strx: 2966 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294977376 + - n_strx: 3008 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977392 + - n_strx: 3049 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977408 + - n_strx: 3087 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977424 + - n_strx: 3116 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294977440 + - n_strx: 3176 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977488 + - n_strx: 3201 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977504 + - n_strx: 3232 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977552 + - n_strx: 3270 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977648 + - n_strx: 3318 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294977664 + - n_strx: 3364 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294978352 + - n_strx: 3411 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294978464 + - n_strx: 3447 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294978688 + - n_strx: 3506 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294978832 + - n_strx: 3567 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294978944 + - n_strx: 3587 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979024 + - n_strx: 3607 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979056 + - n_strx: 3627 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979136 + - n_strx: 3647 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294979232 + - n_strx: 3666 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979264 + - n_strx: 3686 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979328 + - n_strx: 3706 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979536 + - n_strx: 3726 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979856 + - n_strx: 3746 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979872 + - n_strx: 3766 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979888 + - n_strx: 3786 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979920 + - n_strx: 3814 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294979936 + - n_strx: 3842 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294980240 + - n_strx: 3871 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294980288 + - n_strx: 3898 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294980320 + - n_strx: 3927 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294980368 + - n_strx: 3951 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294980384 + - n_strx: 3982 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294980448 + - n_strx: 4001 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294980464 + - n_strx: 4032 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294980512 + - n_strx: 4060 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294980800 + - n_strx: 4088 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294981120 + - n_strx: 4116 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294981136 + - n_strx: 4144 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 4294981152 + - n_strx: 4172 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294981184 + - n_strx: 4208 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294981248 + - n_strx: 4225 + n_type: 0x1E + n_sect: 1 + n_desc: 128 + n_value: 4294981280 + - n_strx: 4253 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294981328 + - n_strx: 4276 + n_type: 0x1E + n_sect: 1 + n_desc: 0 + n_value: 4294981376 + - n_strx: 4294 + n_type: 0x1E + n_sect: 5 + n_desc: 128 + n_value: 4294981764 + - n_strx: 4306 + n_type: 0x1E + n_sect: 5 + n_desc: 0 + n_value: 4294981824 + - n_strx: 4322 + n_type: 0x1E + n_sect: 5 + n_desc: 0 + n_value: 4294981952 + - n_strx: 4349 + n_type: 0x1E + n_sect: 5 + n_desc: 0 + n_value: 4294981960 + - n_strx: 4387 + n_type: 0x1E + n_sect: 5 + n_desc: 0 + n_value: 4294981968 + - n_strx: 4423 + n_type: 0x1E + n_sect: 5 + n_desc: 0 + n_value: 4294982160 + - n_strx: 4474 + n_type: 0xE + n_sect: 5 + n_desc: 0 + n_value: 4294982352 + - n_strx: 4503 + n_type: 0xE + n_sect: 5 + n_desc: 0 + n_value: 4294982448 + - n_strx: 4530 + n_type: 0x1E + n_sect: 5 + n_desc: 128 + n_value: 4294982464 + - n_strx: 4558 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982466 + - n_strx: 4571 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982470 + - n_strx: 4608 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982476 + - n_strx: 4639 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982498 + - n_strx: 4666 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982506 + - n_strx: 4691 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982510 + - n_strx: 4727 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982516 + - n_strx: 4758 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982522 + - n_strx: 4790 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982528 + - n_strx: 4820 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982534 + - n_strx: 4859 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982540 + - n_strx: 4902 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982554 + - n_strx: 4945 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 4294982564 + - n_strx: 4986 + n_type: 0x1E + n_sect: 6 + n_desc: 128 + n_value: 0 + - n_strx: 5987 + n_type: 0x66 + n_sect: 3 + n_desc: 1 + n_value: 1638431181 + - n_strx: 7104 + n_type: 0x66 + n_sect: 3 + n_desc: 1 + n_value: 1638431191 + StringTable: + - ' ' + - '_$s4main10MyProtocolMp' + - '_$s4main10MyProtocolTL' + - '_$s4main11ConformanceV5innerSivM' + - '_$s4main11ConformanceV5innerSivg' + - '_$s4main11ConformanceV5innerSivpMV' + - '_$s4main11ConformanceV5innerSivpfi' + - '_$s4main11ConformanceV5innerSivs' + - '_$s4main11ConformanceVAA10MyProtocolAAMc' + - '_$s4main11ConformanceVAA10MyProtocolAAWP' + - '_$s4main11ConformanceVMa' + - '_$s4main11ConformanceVMn' + - '_$s4main11ConformanceVN' + - '_$s4main12Conformance2V5innerSivM' + - '_$s4main12Conformance2V5innerSivg' + - '_$s4main12Conformance2V5innerSivpMV' + - '_$s4main12Conformance2V5innerSivpfi' + - '_$s4main12Conformance2V5innerSivs' + - '_$s4main12Conformance2VAA10MyProtocolAAMc' + - '_$s4main12Conformance2VAA10MyProtocolAAWP' + - '_$s4main12Conformance2VMa' + - '_$s4main12Conformance2VMn' + - '_$s4main12Conformance2VN' + - '_$s4main13MyGenericEnumOMa' + - '_$s4main13MyGenericEnumOMn' + - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC' + - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq' + - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc' + - '_$s4main14MyGenericClassCMa' + - '_$s4main14MyGenericClassCMn' + - '_$s4main14MyGenericClassCfD' + - '_$s4main14MyGenericClassCfd' + - '_$s4main15MyGenericStructVMa' + - '_$s4main15MyGenericStructVMn' + - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF' + - '_$s4main6MyEnumOMa' + - '_$s4main6MyEnumOMn' + - '_$s4main6MyEnumON' + - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC' + - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq' + - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc' + - '_$s4main7MyClassCMa' + - '_$s4main7MyClassCMm' + - '_$s4main7MyClassCMn' + - '_$s4main7MyClassCN' + - '_$s4main7MyClassCfD' + - '_$s4main7MyClassCfd' + - '_$s4main8MyStructVMa' + - '_$s4main8MyStructVMn' + - '_$s4main8MyStructVN' + - '_$s5Inner4main10MyProtocolPTl' + - __mh_execute_header + - _main + - '_$sBi64_WV' + - '_$sBoWV' + - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC' + - '_$sSSN' + - '_$sSaMa' + - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF' + - '_$ss5print_9separator10terminatoryypd_S2StF' + - '_$sypN' + - '_$sytWV' + - '_OBJC_CLASS_$__TtCs12_SwiftObject' + - '_OBJC_METACLASS_$__TtCs12_SwiftObject' + - __objc_empty_cache + - _objc_opt_self + - _swift_allocObject + - _swift_allocateGenericClassMetadata + - _swift_allocateGenericValueMetadata + - _swift_bridgeObjectRelease + - _swift_checkMetadataState + - _swift_deallocClassInstance + - _swift_deallocObject + - _swift_getAssociatedTypeWitness + - _swift_getGenericMetadata + - _swift_initClassMetadata2 + - _swift_release + - _swift_retain + - dyld_stub_binder + - '_$s4main12Conformance2V5innerSivM.resume.0' + - '_$s4main12Conformance2V5innerACSi_tcfcfA_' + - '_$s4main12Conformance2V5innerACSi_tcfC' + - '_$s4main12Conformance2VACycfC' + - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW' + - '_$s4main3AppVAAyyFZ' + - '_$ss27_finalizeUninitializedArrayySayxGABnlF' + - '_$ss5print_9separator10terminatoryypd_S2StFfA0_' + - '_$ss5print_9separator10terminatoryypd_S2StFfA1_' + - '_$s4main3AppVACycfC' + - '_$s4main3AppV5$mainyyFZ' + - '_$s4main3AppVMa' + - '_$sSa12_endMutationyyF' + - '_$s4main7MyClassC1iSivg' + - '_$s4main7MyClassC2msAA0B6StructVvg' + - '_$s4main7MyClassC2meAA0B4EnumOvg' + - '_$s4main6MyEnumOWOy' + - '_$s4main6MyEnumOWOe' + - '_$s4main6MyEnumOWOh' + - '_$s4main11ConformanceV5innerSivM.resume.0' + - '_$s4main11ConformanceV5innerACSi_tcfcfA_' + - '_$s4main11ConformanceV5innerACSi_tcfC' + - '_$s4main11ConformanceVACycfC' + - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW' + - '_$s4main8MyStructVACycfC' + - '_$s4main14MyGenericClassC1txvg' + - '_$s4main14MyGenericClassC1i5InnerQzvg' + - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg' + - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg' + - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh' + - '_$s4main15MyGenericStructVACyxGycfC' + - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_' + - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA' + - '_$s4main6MyEnumOwCP' + - '_$s4main6MyEnumOwxx' + - '_$s4main6MyEnumOwcp' + - '_$s4main6MyEnumOwca' + - ___swift_memcpy9_8 + - '_$s4main6MyEnumOwta' + - '_$s4main6MyEnumOwet' + - '_$s4main6MyEnumOwst' + - '_$s4main6MyEnumOwug' + - '_$s4main6MyEnumOwup' + - '_$s4main6MyEnumOwui' + - '_$s4main14MyGenericClassCMi' + - '_$s4main14MyGenericClassCMr' + - '_$s4main15MyGenericStructVMi' + - '_$s4main13MyGenericEnumOMi' + - ___swift_initWithCopy_strong + - ___swift_destroy_strong + - ___swift_assignWithCopy_strong + - ___swift_memcpy8_8 + - ___swift_assignWithTake_strong + - '_$s4main13MyGenericEnumOwet' + - '_$s4main13MyGenericEnumOwst' + - '_$s4main13MyGenericEnumOwug' + - '_$s4main13MyGenericEnumOwup' + - '_$s4main13MyGenericEnumOwui' + - ___swift_instantiateGenericMetadata + - ___chkstk_darwin + - ___chkstk_darwin_llvm_probe + - ___chkstk_darwin_probe + - ____chkstk_darwin + - '_$s4mainMXM' + - '_$s4main3AppVMn' + - '_$s4main7MyClassC1iSivpWvd' + - '_$s4main7MyClassC2msAA0B6StructVvpWvd' + - '_$s4main7MyClassC2meAA0B4EnumOvpWvd' + - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd' + - '_$s4main15MyGenericStructVMP' + - '_$s4main13MyGenericEnumOMP' + - ___swift_reflection_version + - _symbolic Si + - _symbolic _____ 4main12Conformance2V + - '_symbolic $s4main10MyProtocolP' + - _symbolic _____ 4main3AppV + - _symbolic x + - _symbolic B0 + - _symbolic _____ 4main11ConformanceV + - _symbolic _____ 4main7MyClassC + - _symbolic _____ 4main8MyStructV + - _symbolic _____ 4main6MyEnumO + - _symbolic _____ 4main14MyGenericClassC + - _symbolic 5Inner_____Qz 4main10MyProtocolP + - _symbolic _____yxG 4main15MyGenericStructV + - _symbolic _____yxG 4main13MyGenericEnumO + - _symbolic _____ 4main15MyGenericStructV + - _symbolic _____ 4main13MyGenericEnumO + - _symbolic _____yxG 4main14MyGenericClassC + - '_$s4main12Conformance2VAA10MyProtocolAAMA' + - '_$s4main11ConformanceVAA10MyProtocolAAMA' + - '_$s4main12Conformance2VMF' + - '_$s4main3AppVMF' + - '_$s4main10MyProtocol_pMF' + - '_$s4main7MyClassCMF' + - '_$s4main11ConformanceVMF' + - '_$s4main8MyStructVMF' + - '_$s4main6MyEnumOMF' + - '_$s4main14MyGenericClassCMF' + - '_$s4main15MyGenericStructVMF' + - '_$s4main13MyGenericEnumOMF' + - '_$s4main6MyEnumOMB' + - '_$s4main12Conformance2VMf' + - '_$s4main3AppVMf' + - '_$s4main3AppVN' + - '_$s4main11ConformanceVMf' + - '_$s4main8MyStructVMf' + - '_$s4main6MyEnumOWV' + - '_$s4main6MyEnumOMf' + - ___unnamed_23 + - '_$s4main14MyGenericClassCMP' + - '_$s4main13MyGenericEnumOWV' + - __METACLASS_DATA__TtC4main7MyClass + - __IVARS__TtC4main7MyClass + - __DATA__TtC4main7MyClass + - __IVARS__TtC4main14MyGenericClass + - __dyld_private + - '_$s4main7MyClassCMf' + - '_$s4main14MyGenericClassCMI' + - '_$s4main15MyGenericStructVMI' + - '_$s4main13MyGenericEnumOMI' + - '/tmp/main-1.swiftmodule' + - '/Users/shubham/Development/test76973336/final2objfiletest/' + - test.swift + - '/tmp/test-1.o' + - '_$s4main12Conformance2V5innerSivpfi' + - '_$s4main12Conformance2V5innerSivg' + - '_$s4main12Conformance2V5innerSivs' + - '_$s4main12Conformance2V5innerSivM' + - '_$s4main12Conformance2V5innerSivM.resume.0' + - '_$s4main12Conformance2V5innerACSi_tcfcfA_' + - '_$s4main12Conformance2V5innerACSi_tcfC' + - '_$s4main12Conformance2VACycfC' + - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW' + - '_$s4main3AppVAAyyFZ' + - '_$ss27_finalizeUninitializedArrayySayxGABnlF' + - '_$ss5print_9separator10terminatoryypd_S2StFfA0_' + - '_$ss5print_9separator10terminatoryypd_S2StFfA1_' + - '_$s4main3AppVACycfC' + - '_$s4main3AppV5$mainyyFZ' + - _main + - '_$s4main12Conformance2VMa' + - '_$s4main3AppVMa' + - '_$sSa12_endMutationyyF' + - '_$s4main12Conformance2VAA10MyProtocolAAMc' + - '_$s4main12Conformance2V5innerSivpMV' + - '_$s4mainMXM' + - '_$s4main12Conformance2VMn' + - '_$s4main3AppVMn' + - _symbolic Si + - _symbolic _____ 4main12Conformance2V + - '_symbolic $s4main10MyProtocolP' + - _symbolic _____ 4main3AppV + - '_$s4main12Conformance2VAA10MyProtocolAAMA' + - '_$s4main12Conformance2VMF' + - '_$s4main3AppVMF' + - '_$s4main12Conformance2VMf' + - '_$s4main12Conformance2VN' + - '_$s4main3AppVMf' + - '_$s4main3AppVN' + - '_$s4main12Conformance2VAA10MyProtocolAAWP' + - reflection_metadata.swift + - '/tmp/reflection_metadata-1.o' diff --git a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml new file mode 100644 index 0000000000000..b2179a23bf28d --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml @@ -0,0 +1,436 @@ +# How to generate this file: +# 1. First take a swift file and run xcrun swiftc -g -v file.swift +# secondfile.swift, make sure the two swift files are in a short path like /tmp/ + +# 2. Now you can see what the driver does, generate the object files in the +# tmp directory + +# 3. Run obj2yaml on object file to create a yaml file + +# 4. I ran delta to reduce this file. + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x1 + ncmds: 8 + sizeofcmds: 2800 + flags: 0x2000 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 2552 + segname: '' + vmaddr: 0 + vmsize: 21352 + fileoff: 2832 + filesize: 20967 + maxprot: 7 + initprot: 7 + nsects: 31 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0 + size: 4571 + offset: 0xB10 + align: 4 + reloff: 0x5CF8 + nreloc: 74 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + relocations: + - address: 0x11A1 + symbolnum: 142 + pcrel: true + length: 2 + extern: true + type: 1 + scattered: false + value: 0 + - sectname: __swift5_typeref + segname: __TEXT + addr: 0x11DC + size: 117 + offset: 0x1CEC + align: 1 + reloff: 0x5F48 + nreloc: 22 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 7800423000005369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0035496E6E657201F9FFFFFF517A0001FFFFFFFF797847000001FFFFFFFF797847000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF79784700 + relocations: + - address: 0x6D + symbolnum: 163 + pcrel: false + length: 2 + extern: true + type: 0 + scattered: false + value: 0 + - sectname: __swift5_capture + segname: __TEXT + addr: 0x1254 + size: 24 + offset: 0x1D64 + align: 2 + reloff: 0x5FF8 + nreloc: 6 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 010000000100000002000000F4FFFFFFF0FFFFFFECFFFFFF + relocations: + - address: 0x14 + symbolnum: 29 + pcrel: false + length: 3 + extern: true + type: 0 + scattered: false + value: 0 + - sectname: __swift5_reflstr + segname: __TEXT + addr: 0x17D8 + size: 37 + offset: 0x22E8 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 496E6E65720069006D73006D6500696E6E6572004300490074006D6773006D676500474300 + - sectname: __swift5_assocty + segname: __TEXT + addr: 0x1800 + size: 24 + offset: 0x2310 + align: 2 + reloff: 0x6530 + nreloc: 8 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF + relocations: + - address: 0x14 + symbolnum: 31 + pcrel: false + length: 2 + extern: true + type: 5 + scattered: false + value: 0 + - sectname: __swift5_fieldmd + segname: __TEXT + addr: 0x1818 + size: 260 + offset: 0x2328 + align: 2 + reloff: 0x6570 + nreloc: 60 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 000000000000000004000C0000000000000000000000000001000C000300000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000000000000000000003000C000200000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF000000000000000001000C000400000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF00000000C8FFFFFFC4FFFFFF000000000000000000000C0000000000000000000000000002000C000100000000000000ECFFFFFFE8FFFFFF + relocations: + - address: 0x100 + symbolnum: 71 + pcrel: false + length: 2 + extern: true + type: 0 + scattered: false + value: 0 + - sectname: __swift5_builtin + segname: __TEXT + addr: 0x1AC8 + size: 20 + offset: 0x25D8 + align: 2 + reloff: 0x67F8 + nreloc: 2 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 00000000090000000800010010000000FE000000 + relocations: + - address: 0x0 + symbolnum: 52 + pcrel: false + length: 2 + extern: true + type: 5 + scattered: false + value: 0 + - sectname: __bss + segname: __DATA + addr: 0x3372 + size: 2084 + offset: 0x50B0 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x6800000B + reserved1: 0x0 + reserved2: 0x0 + relocations: + - address: 0x56 + symbolnum: 1 + pcrel: false + length: 3 + extern: false + type: 0 + scattered: false + value: 0 + - cmd: LC_BUILD_VERSION + cmdsize: 24 + platform: 1 + minos: 786432 + sdk: 786688 + ntools: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 27888 + nsyms: 185 + stroff: 30848 + strsize: 5056 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 79 + iextdefsym: 79 + nextdefsym: 87 + iundefsym: 166 + nundefsym: 19 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LINKER_OPTION + cmdsize: 40 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, + 0x0, 0x0, 0x0, 0x0 ] + - cmd: LC_LINKER_OPTION + cmdsize: 24 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, + 0x6F, 0x72, 0x65, 0x0 ] + - cmd: LC_LINKER_OPTION + cmdsize: 32 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, + 0x6E, 0x63, 0x79, 0x0 ] + - cmd: LC_LINKER_OPTION + cmdsize: 24 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0 ] +LinkEditData: + NameList: + - n_strx: 5014 + n_type: 0xE + n_sect: 1 + n_desc: 0 + n_value: 5600 + StringTable: + - '' + - l_objectdestroy + - '_$s4main6MyEnumOWOy' + - '_$s4main6MyEnumOwxx' + - _symbolic x + - '_$s4main6MyEnumOwst' + - '_$s4main13MyGenericEnumOwst' + - '_$s4main6MyEnumOwet' + - '_$s4main13MyGenericEnumOwet' + - '_OBJC_CLASS_$__TtCs12_SwiftObject' + - '_OBJC_METACLASS_$__TtCs12_SwiftObject' + - _swift_deallocObject + - _swift_allocObject + - '_$s4main11ConformanceV5innerSivs' + - _swift_getAssociatedTypeWitness + - __IVARS__TtC4main7MyClass + - __DATA__TtC4main7MyClass + - __METACLASS_DATA__TtC4main7MyClass + - __IVARS__TtC4main14MyGenericClass + - l_protocols + - _objc_classes + - l_protocol_conformances + - l__swift5_reflection_descriptor + - l_coro.devirt.trigger + - '_$s4main14MyGenericClassCMr' + - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq' + - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq' + - '_$s4main6MyEnumOwup' + - '_$s4main13MyGenericEnumOwup' + - '_$s4main6MyEnumOwcp' + - '_$s4main10MyProtocolMp' + - ___swift_reflection_version + - ____chkstk_darwin + - _swift_retain + - '_$s4main8MyStructVMn' + - '_$s4main15MyGenericStructVMn' + - '_$s4main11ConformanceVMn' + - '_$s4main6MyEnumOMn' + - '_$s4main13MyGenericEnumOMn' + - '_$s4main7MyClassCMn' + - '_$s4main14MyGenericClassCMn' + - '_$s4main7MyClassCMm' + - '_$s5Inner4main10MyProtocolPTl' + - '_$s4main6MyEnumOwui' + - '_$s4main13MyGenericEnumOwui' + - '_$s4main11ConformanceV5innerSivpfi' + - _symbolic Si + - '_$s4main15MyGenericStructVMi' + - '_$s4main13MyGenericEnumOMi' + - '_$s4main14MyGenericClassCMi' + - l_llvm.swift_module_hash + - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh' + - '_$s4main6MyEnumOWOh' + - '_$s4main14MyGenericClassC1i5InnerQzvg' + - '_$s4main14MyGenericClassC1txvg' + - '_$s4main11ConformanceV5innerSivg' + - '_$s4main7MyClassC1iSivg' + - '_$s4main7MyClassC2msAA0B6StructVvg' + - '_$s4main7MyClassC2meAA0B4EnumOvg' + - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg' + - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg' + - '_$s4main6MyEnumOwug' + - '_$s4main13MyGenericEnumOwug' + - ___swift_initWithCopy_strong + - ___swift_assignWithCopy_strong + - ___swift_destroy_strong + - ___swift_assignWithTake_strong + - _objc_opt_self + - '_$s4main8MyStructVMf' + - '_$s4main11ConformanceVMf' + - '_$s4main6MyEnumOMf' + - '_$s4main7MyClassCMf' + - _swift_checkMetadataState + - _swift_release + - l_type_metadata_table + - __objc_empty_cache + - _swift_deallocClassInstance + - ___chkstk_darwin_llvm_probe + - '_$s4main6MyEnumOWOe' + - '_$s4main7MyClassC1iSivpWvd' + - '_$s4main7MyClassC2msAA0B6StructVvpWvd' + - '_$s4main7MyClassC2meAA0B4EnumOvpWvd' + - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd' + - '_$s4main7MyClassCfd' + - '_$s4main14MyGenericClassCfd' + - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc' + - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc' + - '_$s4main11ConformanceVAA10MyProtocolAAMc' + - '_$s4main6MyEnumOwta' + - l_metadata + - _swift_allocateGenericClassMetadata + - _swift_allocateGenericValueMetadata + - _swift_getGenericMetadata + - ___swift_instantiateGenericMetadata + - '_$s4main6MyEnumOwca' + - '_$s4main8MyStructVMa' + - '_$s4main15MyGenericStructVMa' + - '_$s4main11ConformanceVMa' + - '_$s4main6MyEnumOMa' + - '_$s4main13MyGenericEnumOMa' + - '_$s4main7MyClassCMa' + - '_$s4main14MyGenericClassCMa' + - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_' + - '_$s4main11ConformanceV5innerACSi_tcfcfA_' + - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW' + - _symbolic _____ 4main8MyStructV + - _symbolic _____ 4main15MyGenericStructV + - _symbolic _____yxG 4main15MyGenericStructV + - _symbolic _____ 4main11ConformanceV + - '_$sytWV' + - '_$sBoWV' + - '_$sBi64_WV' + - '_$s4main6MyEnumOWV' + - '_$s4main13MyGenericEnumOWV' + - '_$s4main11ConformanceV5innerSivpMV' + - '_symbolic $s4main10MyProtocolP' + - _symbolic 5Inner_____Qz 4main10MyProtocolP + - '_$s4main11ConformanceVAA10MyProtocolAAWP' + - '_$s4main15MyGenericStructVMP' + - '_$s4main13MyGenericEnumOMP' + - '_$s4main14MyGenericClassCMP' + - '_$s4main6MyEnumOwCP' + - _symbolic _____ 4main6MyEnumO + - _symbolic _____ 4main13MyGenericEnumO + - _symbolic _____yxG 4main13MyGenericEnumO + - '_$s4main8MyStructVN' + - '_$s4main11ConformanceVN' + - '_$s4main6MyEnumON' + - '_$s4main7MyClassCN' + - '_$s4main11ConformanceV5innerSivM' + - '_$s4mainMXM' + - '_$s4main10MyProtocolTL' + - '_$s4main15MyGenericStructVMI' + - '_$s4main13MyGenericEnumOMI' + - '_$s4main14MyGenericClassCMI' + - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF' + - '_$s4main10MyProtocol_pMF' + - '_$s4main8MyStructVMF' + - '_$s4main15MyGenericStructVMF' + - '_$s4main11ConformanceVMF' + - '_$s4main6MyEnumOMF' + - '_$s4main13MyGenericEnumOMF' + - '_$s4main7MyClassCMF' + - '_$s4main14MyGenericClassCMF' + - '_$s4main7MyClassCfD' + - '_$s4main14MyGenericClassCfD' + - _symbolic _____ 4main7MyClassC + - _symbolic _____ 4main14MyGenericClassC + - _symbolic _____yxG 4main14MyGenericClassC + - '_$s4main15MyGenericStructVACyxGycfC' + - '_$s4main8MyStructVACycfC' + - '_$s4main11ConformanceVACycfC' + - '_$s4main11ConformanceV5innerACSi_tcfC' + - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC' + - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC' + - '_$s4main6MyEnumOMB' + - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA' + - '_$s4main11ConformanceVAA10MyProtocolAAMA' + - l___unnamed_29 + - l___unnamed_19 + - ___swift_memcpy9_8 + - ___swift_memcpy8_8 + - l___unnamed_28 + - l___unnamed_18 + - l___unnamed_27 + - l___unnamed_17 + - l___unnamed_26 + - l___unnamed_16 + - l___unnamed_25 + - l___unnamed_15 + - l___unnamed_4 + - l___unnamed_24 + - l___unnamed_14 + - l___unnamed_3 + - ___unnamed_23 + - l___unnamed_13 + - _swift_initClassMetadata2 + - l___unnamed_2 + - l___unnamed_12 + - l___unnamed_1 + - l___unnamed_11 + - _symbolic B0 + - l___unnamed_30 + - l___unnamed_10 + - '_$s4main11ConformanceV5innerSivM.resume.0' diff --git a/llvm/test/tools/dsymutil/Inputs/test.yaml b/llvm/test/tools/dsymutil/Inputs/test.yaml new file mode 100644 index 0000000000000..da3aa9a8aaf34 --- /dev/null +++ b/llvm/test/tools/dsymutil/Inputs/test.yaml @@ -0,0 +1,254 @@ +# How to generate this file: +# 1. First take a swift file and run xcrun swiftc -g -v file.swift +# secondfile.swift, make sure the two swift files are in a short path like /tmp/ + +# 2. Now you can see what the driver does, generate the object files in the +# tmp directory + +# 3. Run obj2yaml on object file to create a yaml file + +# 4. I ran delta to reduce this file. + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x1 + ncmds: 8 + sizeofcmds: 2240 + flags: 0x2000 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 1992 + segname: '' + vmaddr: 0 + vmsize: 6592 + fileoff: 2272 + filesize: 6592 + maxprot: 7 + initprot: 7 + nsects: 24 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0 + size: 593 + offset: 0x8E0 + align: 4 + reloff: 0x22A0 + nreloc: 24 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + relocations: + - address: 0x233 + symbolnum: 2 + pcrel: true + length: 2 + extern: true + type: 4 + scattered: false + value: 0 + - sectname: __swift5_typeref + segname: __TEXT + addr: 0x2D6 + size: 38 + offset: 0xBB6 + align: 1 + reloff: 0x2418 + nreloc: 4 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 5369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF00 + relocations: + - address: 0x21 + symbolnum: 46 + pcrel: false + length: 3 + extern: true + type: 0 + scattered: false + value: 0 + - sectname: __swift5_reflstr + segname: __TEXT + addr: 0x318 + size: 12 + offset: 0xBF8 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 496E6E657200696E6E657200 + - sectname: __swift5_assocty + segname: __TEXT + addr: 0x324 + size: 24 + offset: 0xC04 + align: 2 + reloff: 0x2450 + nreloc: 8 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF + relocations: + - address: 0x14 + symbolnum: 5 + pcrel: false + length: 2 + extern: true + type: 0 + scattered: false + value: 0 + - sectname: __swift5_fieldmd + segname: __TEXT + addr: 0x378 + size: 44 + offset: 0xC58 + align: 2 + reloff: 0x24C0 + nreloc: 8 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + content: 000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000 + relocations: + - address: 0x1C + symbolnum: 12 + pcrel: false + length: 3 + extern: false + type: 0 + scattered: false + value: 0 + - cmd: LC_BUILD_VERSION + cmdsize: 24 + platform: 1 + minos: 786432 + sdk: 786688 + ntools: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 9824 + nsyms: 57 + stroff: 10736 + strsize: 1544 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 16 + iextdefsym: 16 + nextdefsym: 31 + iundefsym: 47 + nundefsym: 10 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LINKER_OPTION + cmdsize: 40 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, + 0x0, 0x0, 0x0, 0x0 ] + - cmd: LC_LINKER_OPTION + cmdsize: 24 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, + 0x6F, 0x72, 0x65, 0x0 ] + - cmd: LC_LINKER_OPTION + cmdsize: 32 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, + 0x6E, 0x63, 0x79, 0x0 ] + - cmd: LC_LINKER_OPTION + cmdsize: 24 + count: 1 + PayloadBytes: [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0 ] +LinkEditData: + NameList: + - n_strx: 1494 + n_type: 0xE + n_sect: 9 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - l_entry_point + - '_$s4main12Conformance2V5innerSivs' + - l_protocol_conformances + - l_coro.devirt.trigger + - '_$s4main10MyProtocolMp' + - ___swift_reflection_version + - _main + - '_$s4main3AppVMn' + - '_$s4main12Conformance2VMn' + - '_$s4main12Conformance2V5innerSivpfi' + - _symbolic Si + - l_llvm.swift_module_hash + - '_$s4main12Conformance2V5innerSivg' + - '_$s4main3AppVMf' + - '_$s4main12Conformance2VMf' + - _swift_bridgeObjectRelease + - l_type_metadata_table + - '_$s4main12Conformance2VAA10MyProtocolAAMc' + - '_$sSaMa' + - '_$s4main3AppVMa' + - '_$s4main12Conformance2VMa' + - '_$s4main12Conformance2V5innerACSi_tcfcfA_' + - '_$ss5print_9separator10terminatoryypd_S2StFfA1_' + - '_$ss5print_9separator10terminatoryypd_S2StFfA0_' + - '_$s4main3AppV5$mainyyFZ' + - '_$s4main3AppVAAyyFZ' + - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW' + - _symbolic _____ 4main3AppV + - '_$sytWV' + - '_$sBi64_WV' + - '_$s4main12Conformance2V5innerSivpMV' + - _symbolic _____ 4main12Conformance2V + - '_symbolic $s4main10MyProtocolP' + - '_$s4main12Conformance2VAA10MyProtocolAAWP' + - '_$sypN' + - '_$s4main3AppVN' + - '_$s4main12Conformance2VN' + - '_$sSSN' + - '_$s4main12Conformance2V5innerSivM' + - '_$s4mainMXM' + - '_$sSa12_endMutationyyF' + - '_$ss5print_9separator10terminatoryypd_S2StF' + - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF' + - '_$ss27_finalizeUninitializedArrayySayxGABnlF' + - '_$s4main3AppVMF' + - '_$s4main12Conformance2VMF' + - '_$s4main3AppVACycfC' + - '_$s4main12Conformance2VACycfC' + - '_$s4main12Conformance2V5innerACSi_tcfC' + - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC' + - '_$s4main12Conformance2VAA10MyProtocolAAMA' + - l___unnamed_8 + - l___unnamed_7 + - l___unnamed_6 + - l___unnamed_5 + - l___unnamed_4 + - '_$s4main12Conformance2V5innerSivM.resume.0' + - '' + - '' + - '' + - '' + - '' + - '' + - '' diff --git a/llvm/test/tools/dsymutil/reflection-dump.test b/llvm/test/tools/dsymutil/reflection-dump.test new file mode 100644 index 0000000000000..3fea11ee57adc --- /dev/null +++ b/llvm/test/tools/dsymutil/reflection-dump.test @@ -0,0 +1,42 @@ +RUN: rm -rf %t.dir && mkdir -p %t.dir/tmp +RUN: cp %p/Inputs/main.yaml %t.dir +RUN: cp %p/Inputs/test.yaml %t.dir +RUN: cp %p/Inputs/reflection_metadata.yaml %t.dir +RUN: yaml2obj %p/Inputs/main.yaml -o %t.dir/main +RUN: yaml2obj %p/Inputs/test.yaml -o %t.dir/tmp/test-1.o +RUN: yaml2obj %p/Inputs/reflection_metadata.yaml -o %t.dir/tmp/reflection_metadata-1.o + +RUN: dsymutil -oso-prepend-path=%t.dir %t.dir/main -o %t.dir/main.dSYM +RUN: llvm-objdump -s %t.dir/main.dSYM/Contents/Resources/DWARF/main | FileCheck %s + +CHECK: Contents of section __DWARF,__swift5_typeref: +CHECK-NEXT: 10000e000 53690000 01ffffff ff002473 346d6169 Si........$s4mai +CHECK-NEXT: 10000e010 6e31304d 7950726f 746f636f 6c500000 n10MyProtocolP.. +CHECK-NEXT: 10000e020 01ffffff ff007800 42300000 53690000 ......x.B0..Si.. +CHECK-NEXT: 10000e030 01ffffff ff002473 346d6169 6e31304d ......$s4main10M +CHECK-NEXT: 10000e040 7950726f 746f636f 6c500000 01ffffff yProtocolP...... + +CHECK: Contents of section __DWARF,__swift5_reflstr: +CHECK-NEXT: 10000e09b 496e6e65 7200696e 6e657200 496e6e65 Inner.inner.Inne +CHECK-NEXT: 10000e0ab 72006900 6d73006d 6500696e 6e657200 r.i.ms.me.inner. +CHECK-NEXT: 10000e0bb 43004900 74006d67 73006d67 65004743 C.I.t.mgs.mge.GC +CHECK-NEXT: 10000e0cb 00 + +CHECK: Contents of section __DWARF,__swift5_assocty: +CHECK-NEXT: 10000e0cc 00000000 fcffffff 01000000 08000000 ................ +CHECK-NEXT: 10000e0dc f0ffffff ecffffff 00000000 fcffffff ................ +CHECK-NEXT: 10000e0ec 01000000 08000000 f0ffffff ecffffff ................ + +CHECK: Contents of section __DWARF,__swift5_fieldmd: +CHECK-NEXT: 10000e0fc 00000000 00000000 00000c00 01000000 ................ +CHECK-NEXT: 10000e10c 02000000 ecffffff e8ffffff 00000000 ................ +CHECK-NEXT: 10000e11c 00000000 00000c00 00000000 00000000 ................ +CHECK-NEXT: 10000e12c 00000000 04000c00 00000000 00000000 ................ + +CHECK: Contents of section __DWARF,__swift5_capture: +CHECK-NEXT: 10000e22c 01000000 01000000 02000000 f4ffffff ................ +CHECK-NEXT: 10000e23c f0ffffff ecffffff ........ + +CHECK: Contents of section __DWARF,__swift5_builtin: +CHECK-NEXT: 10000e244 00000000 09000000 08000100 10000000 ................ +CHECK-NEXT: 10000e254 fe000000 .... diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index a8dfde0865377..b9682b83ac67a 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Swift.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" @@ -174,7 +175,7 @@ bool DwarfLinkerForBinary::createStreamer(const Triple &TheTriple, [&](const Twine &Warning, StringRef Context, const DWARFDie *) { warn(Warning, Context); }); - return Streamer->init(TheTriple); + return Streamer->init(TheTriple, "__DWARF"); } ErrorOr @@ -295,6 +296,77 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj, return ErrorOrObj.getError(); } +static bool binaryHasSwiftReflectionSections(const DebugMap &Map, + const LinkOptions &Options, + BinaryHolder &BinHolder) { + // If the input binary has swift5 reflection sections, there is no need to + // copy them to the .dSYM. Only copy them for binaries where the linker + // omitted the reflection metadata. + if (!Map.getBinaryPath().empty() && + Options.FileType == OutputFileType::Object) { + + auto ObjectEntry = BinHolder.getObjectEntry(Map.getBinaryPath()); + // If ObjectEntry or Object has an error, no binary exists, therefore no + // reflection sections exist. + if (!ObjectEntry) { + // Any errors will be diagnosed later in the main loop, ignore them here. + llvm::consumeError(ObjectEntry.takeError()); + return false; + } + + auto Object = + ObjectEntry->getObjectAs(Map.getTriple()); + if (!Object) { + // Any errors will be diagnosed later in the main loop, ignore them here. + llvm::consumeError(Object.takeError()); + return false; + } + + for (auto &Section : Object->sections()) { + llvm::Expected NameOrErr = + Object->getSectionName(Section.getRawDataRefImpl()); + if (!NameOrErr) { + llvm::consumeError(NameOrErr.takeError()); + continue; + } + NameOrErr->consume_back("__TEXT"); + if (Object->mapReflectionSectionNameToEnumValue(*NameOrErr) != + llvm::swift::Swift5ReflectionSectionKind::Unknown) { + return true; + } + } + } + return false; +} + +static void +copySwiftReflectionMetadata(const llvm::dsymutil::DebugMapObject *Obj, + DwarfStreamer *Streamer) { + auto OF = + llvm::object::ObjectFile::createObjectFile(Obj->getObjectFilename()); + if (!OF) { + llvm::consumeError(OF.takeError()); + return; + } else if (auto *MO = + dyn_cast(OF->getBinary())) { + for (auto &Section : OF->getBinary()->sections()) { + llvm::Expected NameOrErr = + MO->getSectionName(Section.getRawDataRefImpl()); + if (!NameOrErr) { + llvm::consumeError(NameOrErr.takeError()); + continue; + } + llvm::Expected SectionContents = Section.getContents(); + if (SectionContents) { + NameOrErr->consume_back("__TEXT"); + Streamer->emitSwiftReflectionSection( + MO->mapReflectionSectionNameToEnumValue(*NameOrErr), + *SectionContents, Section.getAlignment(), Section.getSize()); + } + } + } +} + bool DwarfLinkerForBinary::link(const DebugMap &Map) { if (!createStreamer(Map.getTriple(), OutFile)) return false; @@ -389,8 +461,19 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) { llvm_unreachable("Unhandled DebugMap object"); }); GeneralLinker.setSwiftInterfacesMap(&ParseableSwiftInterfaces); + bool ReflectionSectionsPresentInBinary = false; + // If there is no output specified, no point in checking the binary for swift5 + // reflection sections. + if (!Options.NoOutput) { + ReflectionSectionsPresentInBinary = + binaryHasSwiftReflectionSections(Map, Options, BinHolder); + } for (const auto &Obj : Map.objects()) { + + if (!ReflectionSectionsPresentInBinary) + copySwiftReflectionMetadata(Obj.get(), Streamer.get()); + // N_AST objects (swiftmodule files) should get dumped directly into the // appropriate DWARF section. if (Obj->getType() == MachO::N_AST) { @@ -431,7 +514,6 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) { continue; } - if (auto ErrorOrObj = loadObject(*Obj, Map, RL)) GeneralLinker.addObjectFile(*ErrorOrObj); else { From 9f4cc5a6bb56b42bb90ea31f10ecf8fed8f07653 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 21 Jan 2022 11:55:17 -0800 Subject: [PATCH 220/946] [gn build] Set HAVE_MALLINFO2=1 I'm seeing deprecated warnings due to using mallinfo() instead of mallinfo2(). ../../llvm/lib/Support/Unix/Process.inc:98:10: warning: 'mallinfo' is deprecated [-Wdeprecated-declarations] mi = ::mallinfo(); mallinfo2() is part of glibc 2.33 which was released in Feb 2021, which is fairly recent but I think gn users should be using fairly up to date glibcs. If this breaks people we could make this a gn arg instead. Differential Revision: https://reviews.llvm.org/D117916 --- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 3efbdde7d85b4..3c882a80fee19 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -89,7 +89,7 @@ write_cmake_config("config") { "HAVE_LIBPFM=", "HAVE_LIBPSAPI=", "HAVE_MALLCTL=", - "HAVE_MALLINFO2=", + "HAVE_MALLINFO2=1", "HAVE_SIGNAL_H=1", "HAVE_STD_IS_TRIVIALLY_COPYABLE=1", "HAVE_STRERROR=1", From 6103b2d45bfb43bb403d289d80c137d5155b0778 Mon Sep 17 00:00:00 2001 From: Shubham Sandeep Rastogi Date: Fri, 21 Jan 2022 13:43:51 -0800 Subject: [PATCH 221/946] Revert "Emit swift5 reflection section data in dsym bundle generated by dsymutil in the Dwarf section." This reverts commit d84d1135d80c1dead6564347943ba56eed5aac3b. to investigate buildbot failures --- llvm/include/llvm/BinaryFormat/Swift.def | 26 - llvm/include/llvm/BinaryFormat/Swift.h | 24 - llvm/include/llvm/DWARFLinker/DWARFStreamer.h | 8 +- llvm/include/llvm/MC/MCContext.h | 10 +- llvm/include/llvm/MC/MCObjectFileInfo.h | 13 - llvm/include/llvm/Object/MachO.h | 4 - llvm/include/llvm/Object/ObjectFile.h | 6 - llvm/lib/DWARFLinker/DWARFStreamer.cpp | 20 +- llvm/lib/MC/MCContext.cpp | 8 +- llvm/lib/MC/MCObjectFileInfo.cpp | 11 - llvm/lib/Object/MachOObjectFile.cpp | 12 - llvm/test/tools/dsymutil/Inputs/main.yaml | 886 ------------------ .../dsymutil/Inputs/reflection_metadata.yaml | 436 --------- llvm/test/tools/dsymutil/Inputs/test.yaml | 254 ----- llvm/test/tools/dsymutil/reflection-dump.test | 42 - llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 86 +- 16 files changed, 11 insertions(+), 1835 deletions(-) delete mode 100644 llvm/include/llvm/BinaryFormat/Swift.def delete mode 100644 llvm/include/llvm/BinaryFormat/Swift.h delete mode 100644 llvm/test/tools/dsymutil/Inputs/main.yaml delete mode 100644 llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml delete mode 100644 llvm/test/tools/dsymutil/Inputs/test.yaml delete mode 100644 llvm/test/tools/dsymutil/reflection-dump.test diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def deleted file mode 100644 index 39931bec70e57..0000000000000 --- a/llvm/include/llvm/BinaryFormat/Swift.def +++ /dev/null @@ -1,26 +0,0 @@ -//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Macros for running through Swift enumerators. -// -//===----------------------------------------------------------------------===// - -#if !(defined HANDLE_SWIFT_SECTION) -#error "Missing macro definition of HANDLE_SWIFT_SECTION" -#endif - -#ifndef HANDLE_SWIFT_SECTION -#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) -#endif - -HANDLE_SWIFT_SECTION(Fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd") -HANDLE_SWIFT_SECTION(Assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty") -HANDLE_SWIFT_SECTION(Builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn") -HANDLE_SWIFT_SECTION(Capture, "__swift5_capture", "swift5_capture", ".sw5cptr") -HANDLE_SWIFT_SECTION(Typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf") -HANDLE_SWIFT_SECTION(Reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst") diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h deleted file mode 100644 index 63bbfe08c86d4..0000000000000 --- a/llvm/include/llvm/BinaryFormat/Swift.h +++ /dev/null @@ -1,24 +0,0 @@ -//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// - -#ifndef LLVM_BINARYFORMAT_SWIFT_H -#define LLVM_BINARYFORMAT_SWIFT_H - -namespace llvm { -namespace swift { - -enum Swift5ReflectionSectionKind { -#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND, -#include "llvm/BinaryFormat/Swift.def" -#undef HANDLE_SWIFT_SECTION - Unknown, - Last = Unknown -}; -} // end of namespace swift -} // end of namespace llvm - -#endif diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h index 8e845ee91b9f7..9a5c6bcaf83f3 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h @@ -9,7 +9,6 @@ #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H #define LLVM_DWARFLINKER_DWARFSTREAMER_H -#include "llvm/BinaryFormat/Swift.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/DWARFLinker/DWARFLinker.h" @@ -49,7 +48,7 @@ class DwarfStreamer : public DwarfEmitter { : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator), ErrorHandler(Error), WarningHandler(Warning) {} - bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName); + bool init(Triple TheTriple); /// Dump the file to the disk. void finish(); @@ -86,11 +85,6 @@ class DwarfStreamer : public DwarfEmitter { /// Emit the swift_ast section stored in \p Buffer. void emitSwiftAST(StringRef Buffer); - /// Emit the swift reflection section stored in \p Buffer. - void emitSwiftReflectionSection( - llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, - StringRef Buffer, uint32_t Alignment, uint32_t Size); - /// Emit debug_ranges for \p FuncRange by translating the /// original \p Entries. void emitRangesEntries( diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index d2307d6922780..88d86d5b675ac 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -80,10 +80,6 @@ namespace llvm { private: Environment Env; - /// The name of the Segment where Swift5 Reflection Section data will be - /// outputted - StringRef Swift5ReflectionSegmentName; - /// The triple for this object. Triple TT; @@ -403,17 +399,13 @@ namespace llvm { const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI, const SourceMgr *Mgr = nullptr, MCTargetOptions const *TargetOpts = nullptr, - bool DoAutoReset = true, - StringRef Swift5ReflSegmentName = {}); + bool DoAutoReset = true); MCContext(const MCContext &) = delete; MCContext &operator=(const MCContext &) = delete; ~MCContext(); Environment getObjectFileType() const { return Env; } - const StringRef &getSwift5ReflectionSegmentName() const { - return Swift5ReflectionSegmentName; - } const Triple &getTargetTriple() const { return TT; } const SourceMgr *getSourceManager() const { return SrcMgr; } diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 1b4804db783b4..5e0cccaba77fa 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -15,7 +15,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Triple.h" -#include "llvm/BinaryFormat/Swift.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/VersionTuple.h" @@ -229,10 +228,6 @@ class MCObjectFileInfo { MCSection *ReadOnly8Section = nullptr; MCSection *ReadOnly16Section = nullptr; - // Swift5 Reflection Data Sections - std::array - Swift5ReflectionSections = {}; - public: void initMCObjectFileInfo(MCContext &MCCtx, bool PIC, bool LargeCodeModel = false); @@ -428,14 +423,6 @@ class MCObjectFileInfo { bool isPositionIndependent() const { return PositionIndependent; } - // Swift5 Reflection Data Sections - MCSection *getSwift5ReflectionSection( - llvm::swift::Swift5ReflectionSectionKind ReflSectionKind) { - return ReflSectionKind != llvm::swift::Swift5ReflectionSectionKind::Unknown - ? Swift5ReflectionSections[ReflSectionKind] - : nullptr; - } - private: bool PositionIndependent = false; MCContext *Ctx = nullptr; diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index 09b6454bb0c14..ede742c47f971 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -22,7 +22,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/MachO.h" -#include "llvm/BinaryFormat/Swift.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" @@ -584,9 +583,6 @@ class MachOObjectFile : public ObjectFile { StringRef mapDebugSectionName(StringRef Name) const override; - llvm::swift::Swift5ReflectionSectionKind - mapReflectionSectionNameToEnumValue(StringRef SectionName) const override; - bool hasPageZeroSegment() const { return HasPageZeroSegment; } static bool classof(const Binary *v) { diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 29919db772f0e..12704b1fc88e7 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -18,7 +18,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Magic.h" -#include "llvm/BinaryFormat/Swift.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/SymbolicFile.h" @@ -291,11 +290,6 @@ class ObjectFile : public SymbolicFile { virtual void getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const = 0; - virtual llvm::swift::Swift5ReflectionSectionKind - mapReflectionSectionNameToEnumValue(StringRef SectionName) const { - return llvm::swift::Swift5ReflectionSectionKind::Unknown; - }; - Expected getSymbolValue(DataRefImpl Symb) const; public: diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp index 7f9b9a9bc793c..1ab6ead3b5f66 100644 --- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -27,8 +27,7 @@ namespace llvm { -bool DwarfStreamer::init(Triple TheTriple, - StringRef Swift5ReflectionSegmentName) { +bool DwarfStreamer::init(Triple TheTriple) { std::string ErrorStr; std::string TripleName; StringRef Context = "dwarf streamer init"; @@ -55,9 +54,8 @@ bool DwarfStreamer::init(Triple TheTriple, if (!MSTI) return error("no subtarget info for target " + TripleName, Context), false; - MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr, - nullptr, true, Swift5ReflectionSegmentName)); - MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false)); + MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get())); + MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false)); MC->setObjectFileInfo(MOFI.get()); MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions); @@ -304,18 +302,6 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) { MS->emitBytes(Buffer); } -void DwarfStreamer::emitSwiftReflectionSection( - llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, StringRef Buffer, - uint32_t Alignment, uint32_t Size) { - MCSection *ReflectionSection = - MOFI->getSwift5ReflectionSection(ReflSectionKind); - if (ReflectionSection == nullptr) - return; - ReflectionSection->setAlignment(Align(Alignment)); - MS->SwitchSection(ReflectionSection); - MS->emitBytes(Buffer); -} - /// Emit the debug_range section contents for \p FuncRange by /// translating the original \p Entries. The debug_range section /// format is totally trivial, consisting just of pairs of address diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index eafcee1e0607b..7f639e9c408fe 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &, MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCSubtargetInfo *msti, const SourceMgr *mgr, MCTargetOptions const *TargetOpts, - bool DoAutoReset, StringRef Swift5ReflSegmentName) - : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple), - SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler), - MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator), + bool DoAutoReset) + : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr), + DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti), + Symbols(Allocator), UsedNames(Allocator), InlineAsmUsedLabelNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), AutoReset(DoAutoReset), TargetOptions(TargetOpts) { diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 77b0b0ee687cb..d7f85f793c55f 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -299,17 +299,6 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { RemarksSection = Ctx->getMachOSection( "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); - // The architecture of dsymutil makes it very difficult to copy the Swift - // reflection metadata sections into the __TEXT segment, so dsymutil creates - // these sections in the __DWARF segment instead. - if (!Ctx->getSwift5ReflectionSegmentName().empty()) { -#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \ - Swift5ReflectionSections[llvm::swift::Swift5ReflectionSectionKind::KIND] = \ - Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(), \ - MACHO, 0, SectionKind::getMetadata()); -#include "llvm/BinaryFormat/Swift.def" - } - TLSExtraDataSection = TLSTLVSection; } diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 83bc74ff31c40..42e257516f4e0 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/MachO.h" -#include "llvm/BinaryFormat/Swift.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -4766,14 +4765,3 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) { Path.str().c_str()); return ObjectPaths; } - -llvm::swift::Swift5ReflectionSectionKind -MachOObjectFile::mapReflectionSectionNameToEnumValue( - StringRef SectionName) const { -#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) \ - .Case(MACHO, llvm::swift::Swift5ReflectionSectionKind::KIND) - return StringSwitch(SectionName) -#include "llvm/BinaryFormat/Swift.def" - .Default(llvm::swift::Swift5ReflectionSectionKind::Unknown); -#undef HANDLE_SWIFT_SECTION -} diff --git a/llvm/test/tools/dsymutil/Inputs/main.yaml b/llvm/test/tools/dsymutil/Inputs/main.yaml deleted file mode 100644 index d81931d7861c3..0000000000000 --- a/llvm/test/tools/dsymutil/Inputs/main.yaml +++ /dev/null @@ -1,886 +0,0 @@ -# How to generate this file: -# 1. First take a swift file and run xcrun swiftc -g -v test.swift -# reflection_metadata.swift, make sure the two swift files are in a short path -# like /tmp/ - -# 2. Now you can see what the driver does, generate the object files in the -# tmp directory and link them to create the input binary - -# 3. Run obj2yaml on the input binary to create a yaml file and strip out the -# swift5 reflection sections from the load commands in the text segment - -# 4. I ran delta to reduce this file. - ---- !mach-o -FileHeader: - magic: 0xFEEDFACF - cputype: 0x1000007 - cpusubtype: 0x3 - filetype: 0x2 - ncmds: 18 - sizeofcmds: 2848 - flags: 0x200085 - reserved: 0x0 -LoadCommands: - - cmd: LC_SEGMENT_64 - cmdsize: 72 - segname: __PAGEZERO - vmaddr: 0 - vmsize: 4294967296 - fileoff: 0 - filesize: 0 - maxprot: 0 - initprot: 0 - nsects: 0 - flags: 0 - - cmd: LC_SEGMENT_64 - cmdsize: 952 - segname: __TEXT - vmaddr: 4294967296 - vmsize: 16384 - fileoff: 0 - filesize: 16384 - maxprot: 5 - initprot: 5 - nsects: 11 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x100003EB0 - size: 336 - offset: 0x3EB0 - align: 3 - reloff: 0x0 - nreloc: 0 - flags: 0x0 - reserved1: 0x0 - reserved2: 0x0 - - cmd: LC_SEGMENT_64 - cmdsize: 392 - segname: __DATA_CONST - vmaddr: 4294983680 - vmsize: 16384 - fileoff: 16384 - filesize: 16384 - maxprot: 3 - initprot: 3 - nsects: 4 - flags: 16 - Sections: - - sectname: __got - segname: __DATA_CONST - addr: 0x100004000 - size: 48 - offset: 0x4000 - align: 3 - reloff: 0x0 - nreloc: 0 - flags: 0x6 - reserved1: 0x11 - reserved2: 0x0 - - cmd: LC_SEGMENT_64 - cmdsize: 392 - segname: __DATA - vmaddr: 4295000064 - vmsize: 16384 - fileoff: 32768 - filesize: 16384 - maxprot: 3 - initprot: 3 - nsects: 4 - flags: 0 - Sections: - - sectname: __la_symbol_ptr - segname: __DATA - addr: 0x100008000 - size: 384 - offset: 0x8088 - align: 3 - reloff: 0x0 - nreloc: 0 - flags: 0x0 - reserved1: 0x0 - reserved2: 0x0 - - cmd: LC_SEGMENT_64 - cmdsize: 72 - segname: __LINKEDIT - vmaddr: 4295016448 - vmsize: 32768 - fileoff: 49152 - filesize: 23584 - maxprot: 1 - initprot: 1 - nsects: 0 - flags: 0 - - cmd: LC_DYLD_INFO_ONLY - cmdsize: 48 - rebase_off: 49152 - rebase_size: 64 - bind_off: 49216 - bind_size: 216 - weak_bind_off: 0 - weak_bind_size: 0 - lazy_bind_off: 49432 - lazy_bind_size: 600 - export_off: 50032 - export_size: 1000 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 51136 - nsyms: 638 - stroff: 61504 - strsize: 11232 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 560 - iextdefsym: 560 - nextdefsym: 52 - iundefsym: 612 - nundefsym: 26 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 61344 - nindirectsyms: 40 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 - - cmd: LC_LOAD_DYLINKER - cmdsize: 32 - name: 12 - - cmd: LC_UUID - cmdsize: 24 - uuid: AA0A51FA-8B29-3A7B-85AA-FA6A457B2211 - - cmd: LC_BUILD_VERSION - cmdsize: 32 - platform: 1 - minos: 786432 - sdk: 786688 - ntools: 1 - - cmd: LC_SOURCE_VERSION - cmdsize: 16 - version: 0 - - cmd: LC_MAIN - cmdsize: 24 - entryoff: 9376 - stacksize: 0 - - cmd: LC_LOAD_DYLIB - cmdsize: 56 - dylib: - name: 24 - timestamp: 2 - current_version: 14942208 - compatibility_version: 65536 - - cmd: LC_LOAD_DYLIB - cmdsize: 56 - dylib: - name: 24 - timestamp: 2 - current_version: 85917696 - compatibility_version: 65536 - - cmd: LC_LOAD_DYLIB - cmdsize: 64 - dylib: - name: 24 - timestamp: 2 - current_version: 85196845 - compatibility_version: 65536 - - cmd: LC_FUNCTION_STARTS - cmdsize: 16 - dataoff: 51032 - datasize: 104 - - cmd: LC_DATA_IN_CODE - cmdsize: 16 - dataoff: 51136 - datasize: 0 -LinkEditData: - NameList: - - n_strx: 2355 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294976208 - - n_strx: 2398 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976224 - - n_strx: 2440 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976240 - - n_strx: 2479 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976256 - - n_strx: 2509 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294976272 - - n_strx: 2570 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976320 - - n_strx: 2590 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294976512 - - n_strx: 2635 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294976576 - - n_strx: 2683 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294976608 - - n_strx: 2731 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976640 - - n_strx: 2751 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976656 - - n_strx: 2775 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976704 - - n_strx: 2791 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294976720 - - n_strx: 2814 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976752 - - n_strx: 2838 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976768 - - n_strx: 2873 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294976784 - - n_strx: 2906 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294976832 - - n_strx: 2926 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294977104 - - n_strx: 2946 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294977200 - - n_strx: 2966 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294977376 - - n_strx: 3008 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977392 - - n_strx: 3049 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977408 - - n_strx: 3087 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977424 - - n_strx: 3116 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294977440 - - n_strx: 3176 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977488 - - n_strx: 3201 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977504 - - n_strx: 3232 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977552 - - n_strx: 3270 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977648 - - n_strx: 3318 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294977664 - - n_strx: 3364 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294978352 - - n_strx: 3411 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294978464 - - n_strx: 3447 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294978688 - - n_strx: 3506 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294978832 - - n_strx: 3567 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294978944 - - n_strx: 3587 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979024 - - n_strx: 3607 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979056 - - n_strx: 3627 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979136 - - n_strx: 3647 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294979232 - - n_strx: 3666 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979264 - - n_strx: 3686 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979328 - - n_strx: 3706 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979536 - - n_strx: 3726 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979856 - - n_strx: 3746 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979872 - - n_strx: 3766 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979888 - - n_strx: 3786 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979920 - - n_strx: 3814 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294979936 - - n_strx: 3842 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294980240 - - n_strx: 3871 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294980288 - - n_strx: 3898 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294980320 - - n_strx: 3927 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294980368 - - n_strx: 3951 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294980384 - - n_strx: 3982 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294980448 - - n_strx: 4001 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294980464 - - n_strx: 4032 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294980512 - - n_strx: 4060 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294980800 - - n_strx: 4088 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294981120 - - n_strx: 4116 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294981136 - - n_strx: 4144 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 4294981152 - - n_strx: 4172 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294981184 - - n_strx: 4208 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294981248 - - n_strx: 4225 - n_type: 0x1E - n_sect: 1 - n_desc: 128 - n_value: 4294981280 - - n_strx: 4253 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294981328 - - n_strx: 4276 - n_type: 0x1E - n_sect: 1 - n_desc: 0 - n_value: 4294981376 - - n_strx: 4294 - n_type: 0x1E - n_sect: 5 - n_desc: 128 - n_value: 4294981764 - - n_strx: 4306 - n_type: 0x1E - n_sect: 5 - n_desc: 0 - n_value: 4294981824 - - n_strx: 4322 - n_type: 0x1E - n_sect: 5 - n_desc: 0 - n_value: 4294981952 - - n_strx: 4349 - n_type: 0x1E - n_sect: 5 - n_desc: 0 - n_value: 4294981960 - - n_strx: 4387 - n_type: 0x1E - n_sect: 5 - n_desc: 0 - n_value: 4294981968 - - n_strx: 4423 - n_type: 0x1E - n_sect: 5 - n_desc: 0 - n_value: 4294982160 - - n_strx: 4474 - n_type: 0xE - n_sect: 5 - n_desc: 0 - n_value: 4294982352 - - n_strx: 4503 - n_type: 0xE - n_sect: 5 - n_desc: 0 - n_value: 4294982448 - - n_strx: 4530 - n_type: 0x1E - n_sect: 5 - n_desc: 128 - n_value: 4294982464 - - n_strx: 4558 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982466 - - n_strx: 4571 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982470 - - n_strx: 4608 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982476 - - n_strx: 4639 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982498 - - n_strx: 4666 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982506 - - n_strx: 4691 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982510 - - n_strx: 4727 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982516 - - n_strx: 4758 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982522 - - n_strx: 4790 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982528 - - n_strx: 4820 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982534 - - n_strx: 4859 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982540 - - n_strx: 4902 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982554 - - n_strx: 4945 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 4294982564 - - n_strx: 4986 - n_type: 0x1E - n_sect: 6 - n_desc: 128 - n_value: 0 - - n_strx: 5987 - n_type: 0x66 - n_sect: 3 - n_desc: 1 - n_value: 1638431181 - - n_strx: 7104 - n_type: 0x66 - n_sect: 3 - n_desc: 1 - n_value: 1638431191 - StringTable: - - ' ' - - '_$s4main10MyProtocolMp' - - '_$s4main10MyProtocolTL' - - '_$s4main11ConformanceV5innerSivM' - - '_$s4main11ConformanceV5innerSivg' - - '_$s4main11ConformanceV5innerSivpMV' - - '_$s4main11ConformanceV5innerSivpfi' - - '_$s4main11ConformanceV5innerSivs' - - '_$s4main11ConformanceVAA10MyProtocolAAMc' - - '_$s4main11ConformanceVAA10MyProtocolAAWP' - - '_$s4main11ConformanceVMa' - - '_$s4main11ConformanceVMn' - - '_$s4main11ConformanceVN' - - '_$s4main12Conformance2V5innerSivM' - - '_$s4main12Conformance2V5innerSivg' - - '_$s4main12Conformance2V5innerSivpMV' - - '_$s4main12Conformance2V5innerSivpfi' - - '_$s4main12Conformance2V5innerSivs' - - '_$s4main12Conformance2VAA10MyProtocolAAMc' - - '_$s4main12Conformance2VAA10MyProtocolAAWP' - - '_$s4main12Conformance2VMa' - - '_$s4main12Conformance2VMn' - - '_$s4main12Conformance2VN' - - '_$s4main13MyGenericEnumOMa' - - '_$s4main13MyGenericEnumOMn' - - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC' - - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq' - - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc' - - '_$s4main14MyGenericClassCMa' - - '_$s4main14MyGenericClassCMn' - - '_$s4main14MyGenericClassCfD' - - '_$s4main14MyGenericClassCfd' - - '_$s4main15MyGenericStructVMa' - - '_$s4main15MyGenericStructVMn' - - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF' - - '_$s4main6MyEnumOMa' - - '_$s4main6MyEnumOMn' - - '_$s4main6MyEnumON' - - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC' - - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq' - - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc' - - '_$s4main7MyClassCMa' - - '_$s4main7MyClassCMm' - - '_$s4main7MyClassCMn' - - '_$s4main7MyClassCN' - - '_$s4main7MyClassCfD' - - '_$s4main7MyClassCfd' - - '_$s4main8MyStructVMa' - - '_$s4main8MyStructVMn' - - '_$s4main8MyStructVN' - - '_$s5Inner4main10MyProtocolPTl' - - __mh_execute_header - - _main - - '_$sBi64_WV' - - '_$sBoWV' - - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC' - - '_$sSSN' - - '_$sSaMa' - - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF' - - '_$ss5print_9separator10terminatoryypd_S2StF' - - '_$sypN' - - '_$sytWV' - - '_OBJC_CLASS_$__TtCs12_SwiftObject' - - '_OBJC_METACLASS_$__TtCs12_SwiftObject' - - __objc_empty_cache - - _objc_opt_self - - _swift_allocObject - - _swift_allocateGenericClassMetadata - - _swift_allocateGenericValueMetadata - - _swift_bridgeObjectRelease - - _swift_checkMetadataState - - _swift_deallocClassInstance - - _swift_deallocObject - - _swift_getAssociatedTypeWitness - - _swift_getGenericMetadata - - _swift_initClassMetadata2 - - _swift_release - - _swift_retain - - dyld_stub_binder - - '_$s4main12Conformance2V5innerSivM.resume.0' - - '_$s4main12Conformance2V5innerACSi_tcfcfA_' - - '_$s4main12Conformance2V5innerACSi_tcfC' - - '_$s4main12Conformance2VACycfC' - - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW' - - '_$s4main3AppVAAyyFZ' - - '_$ss27_finalizeUninitializedArrayySayxGABnlF' - - '_$ss5print_9separator10terminatoryypd_S2StFfA0_' - - '_$ss5print_9separator10terminatoryypd_S2StFfA1_' - - '_$s4main3AppVACycfC' - - '_$s4main3AppV5$mainyyFZ' - - '_$s4main3AppVMa' - - '_$sSa12_endMutationyyF' - - '_$s4main7MyClassC1iSivg' - - '_$s4main7MyClassC2msAA0B6StructVvg' - - '_$s4main7MyClassC2meAA0B4EnumOvg' - - '_$s4main6MyEnumOWOy' - - '_$s4main6MyEnumOWOe' - - '_$s4main6MyEnumOWOh' - - '_$s4main11ConformanceV5innerSivM.resume.0' - - '_$s4main11ConformanceV5innerACSi_tcfcfA_' - - '_$s4main11ConformanceV5innerACSi_tcfC' - - '_$s4main11ConformanceVACycfC' - - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW' - - '_$s4main8MyStructVACycfC' - - '_$s4main14MyGenericClassC1txvg' - - '_$s4main14MyGenericClassC1i5InnerQzvg' - - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg' - - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg' - - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh' - - '_$s4main15MyGenericStructVACyxGycfC' - - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_' - - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA' - - '_$s4main6MyEnumOwCP' - - '_$s4main6MyEnumOwxx' - - '_$s4main6MyEnumOwcp' - - '_$s4main6MyEnumOwca' - - ___swift_memcpy9_8 - - '_$s4main6MyEnumOwta' - - '_$s4main6MyEnumOwet' - - '_$s4main6MyEnumOwst' - - '_$s4main6MyEnumOwug' - - '_$s4main6MyEnumOwup' - - '_$s4main6MyEnumOwui' - - '_$s4main14MyGenericClassCMi' - - '_$s4main14MyGenericClassCMr' - - '_$s4main15MyGenericStructVMi' - - '_$s4main13MyGenericEnumOMi' - - ___swift_initWithCopy_strong - - ___swift_destroy_strong - - ___swift_assignWithCopy_strong - - ___swift_memcpy8_8 - - ___swift_assignWithTake_strong - - '_$s4main13MyGenericEnumOwet' - - '_$s4main13MyGenericEnumOwst' - - '_$s4main13MyGenericEnumOwug' - - '_$s4main13MyGenericEnumOwup' - - '_$s4main13MyGenericEnumOwui' - - ___swift_instantiateGenericMetadata - - ___chkstk_darwin - - ___chkstk_darwin_llvm_probe - - ___chkstk_darwin_probe - - ____chkstk_darwin - - '_$s4mainMXM' - - '_$s4main3AppVMn' - - '_$s4main7MyClassC1iSivpWvd' - - '_$s4main7MyClassC2msAA0B6StructVvpWvd' - - '_$s4main7MyClassC2meAA0B4EnumOvpWvd' - - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd' - - '_$s4main15MyGenericStructVMP' - - '_$s4main13MyGenericEnumOMP' - - ___swift_reflection_version - - _symbolic Si - - _symbolic _____ 4main12Conformance2V - - '_symbolic $s4main10MyProtocolP' - - _symbolic _____ 4main3AppV - - _symbolic x - - _symbolic B0 - - _symbolic _____ 4main11ConformanceV - - _symbolic _____ 4main7MyClassC - - _symbolic _____ 4main8MyStructV - - _symbolic _____ 4main6MyEnumO - - _symbolic _____ 4main14MyGenericClassC - - _symbolic 5Inner_____Qz 4main10MyProtocolP - - _symbolic _____yxG 4main15MyGenericStructV - - _symbolic _____yxG 4main13MyGenericEnumO - - _symbolic _____ 4main15MyGenericStructV - - _symbolic _____ 4main13MyGenericEnumO - - _symbolic _____yxG 4main14MyGenericClassC - - '_$s4main12Conformance2VAA10MyProtocolAAMA' - - '_$s4main11ConformanceVAA10MyProtocolAAMA' - - '_$s4main12Conformance2VMF' - - '_$s4main3AppVMF' - - '_$s4main10MyProtocol_pMF' - - '_$s4main7MyClassCMF' - - '_$s4main11ConformanceVMF' - - '_$s4main8MyStructVMF' - - '_$s4main6MyEnumOMF' - - '_$s4main14MyGenericClassCMF' - - '_$s4main15MyGenericStructVMF' - - '_$s4main13MyGenericEnumOMF' - - '_$s4main6MyEnumOMB' - - '_$s4main12Conformance2VMf' - - '_$s4main3AppVMf' - - '_$s4main3AppVN' - - '_$s4main11ConformanceVMf' - - '_$s4main8MyStructVMf' - - '_$s4main6MyEnumOWV' - - '_$s4main6MyEnumOMf' - - ___unnamed_23 - - '_$s4main14MyGenericClassCMP' - - '_$s4main13MyGenericEnumOWV' - - __METACLASS_DATA__TtC4main7MyClass - - __IVARS__TtC4main7MyClass - - __DATA__TtC4main7MyClass - - __IVARS__TtC4main14MyGenericClass - - __dyld_private - - '_$s4main7MyClassCMf' - - '_$s4main14MyGenericClassCMI' - - '_$s4main15MyGenericStructVMI' - - '_$s4main13MyGenericEnumOMI' - - '/tmp/main-1.swiftmodule' - - '/Users/shubham/Development/test76973336/final2objfiletest/' - - test.swift - - '/tmp/test-1.o' - - '_$s4main12Conformance2V5innerSivpfi' - - '_$s4main12Conformance2V5innerSivg' - - '_$s4main12Conformance2V5innerSivs' - - '_$s4main12Conformance2V5innerSivM' - - '_$s4main12Conformance2V5innerSivM.resume.0' - - '_$s4main12Conformance2V5innerACSi_tcfcfA_' - - '_$s4main12Conformance2V5innerACSi_tcfC' - - '_$s4main12Conformance2VACycfC' - - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW' - - '_$s4main3AppVAAyyFZ' - - '_$ss27_finalizeUninitializedArrayySayxGABnlF' - - '_$ss5print_9separator10terminatoryypd_S2StFfA0_' - - '_$ss5print_9separator10terminatoryypd_S2StFfA1_' - - '_$s4main3AppVACycfC' - - '_$s4main3AppV5$mainyyFZ' - - _main - - '_$s4main12Conformance2VMa' - - '_$s4main3AppVMa' - - '_$sSa12_endMutationyyF' - - '_$s4main12Conformance2VAA10MyProtocolAAMc' - - '_$s4main12Conformance2V5innerSivpMV' - - '_$s4mainMXM' - - '_$s4main12Conformance2VMn' - - '_$s4main3AppVMn' - - _symbolic Si - - _symbolic _____ 4main12Conformance2V - - '_symbolic $s4main10MyProtocolP' - - _symbolic _____ 4main3AppV - - '_$s4main12Conformance2VAA10MyProtocolAAMA' - - '_$s4main12Conformance2VMF' - - '_$s4main3AppVMF' - - '_$s4main12Conformance2VMf' - - '_$s4main12Conformance2VN' - - '_$s4main3AppVMf' - - '_$s4main3AppVN' - - '_$s4main12Conformance2VAA10MyProtocolAAWP' - - reflection_metadata.swift - - '/tmp/reflection_metadata-1.o' diff --git a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml deleted file mode 100644 index b2179a23bf28d..0000000000000 --- a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml +++ /dev/null @@ -1,436 +0,0 @@ -# How to generate this file: -# 1. First take a swift file and run xcrun swiftc -g -v file.swift -# secondfile.swift, make sure the two swift files are in a short path like /tmp/ - -# 2. Now you can see what the driver does, generate the object files in the -# tmp directory - -# 3. Run obj2yaml on object file to create a yaml file - -# 4. I ran delta to reduce this file. - ---- !mach-o -FileHeader: - magic: 0xFEEDFACF - cputype: 0x1000007 - cpusubtype: 0x3 - filetype: 0x1 - ncmds: 8 - sizeofcmds: 2800 - flags: 0x2000 - reserved: 0x0 -LoadCommands: - - cmd: LC_SEGMENT_64 - cmdsize: 2552 - segname: '' - vmaddr: 0 - vmsize: 21352 - fileoff: 2832 - filesize: 20967 - maxprot: 7 - initprot: 7 - nsects: 31 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x0 - size: 4571 - offset: 0xB10 - align: 4 - reloff: 0x5CF8 - nreloc: 74 - flags: 0x80000400 - reserved1: 0x0 - reserved2: 0x0 - relocations: - - address: 0x11A1 - symbolnum: 142 - pcrel: true - length: 2 - extern: true - type: 1 - scattered: false - value: 0 - - sectname: __swift5_typeref - segname: __TEXT - addr: 0x11DC - size: 117 - offset: 0x1CEC - align: 1 - reloff: 0x5F48 - nreloc: 22 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 7800423000005369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0035496E6E657201F9FFFFFF517A0001FFFFFFFF797847000001FFFFFFFF797847000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF79784700 - relocations: - - address: 0x6D - symbolnum: 163 - pcrel: false - length: 2 - extern: true - type: 0 - scattered: false - value: 0 - - sectname: __swift5_capture - segname: __TEXT - addr: 0x1254 - size: 24 - offset: 0x1D64 - align: 2 - reloff: 0x5FF8 - nreloc: 6 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 010000000100000002000000F4FFFFFFF0FFFFFFECFFFFFF - relocations: - - address: 0x14 - symbolnum: 29 - pcrel: false - length: 3 - extern: true - type: 0 - scattered: false - value: 0 - - sectname: __swift5_reflstr - segname: __TEXT - addr: 0x17D8 - size: 37 - offset: 0x22E8 - align: 0 - reloff: 0x0 - nreloc: 0 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 496E6E65720069006D73006D6500696E6E6572004300490074006D6773006D676500474300 - - sectname: __swift5_assocty - segname: __TEXT - addr: 0x1800 - size: 24 - offset: 0x2310 - align: 2 - reloff: 0x6530 - nreloc: 8 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF - relocations: - - address: 0x14 - symbolnum: 31 - pcrel: false - length: 2 - extern: true - type: 5 - scattered: false - value: 0 - - sectname: __swift5_fieldmd - segname: __TEXT - addr: 0x1818 - size: 260 - offset: 0x2328 - align: 2 - reloff: 0x6570 - nreloc: 60 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 000000000000000004000C0000000000000000000000000001000C000300000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000000000000000000003000C000200000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF000000000000000001000C000400000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF00000000C8FFFFFFC4FFFFFF000000000000000000000C0000000000000000000000000002000C000100000000000000ECFFFFFFE8FFFFFF - relocations: - - address: 0x100 - symbolnum: 71 - pcrel: false - length: 2 - extern: true - type: 0 - scattered: false - value: 0 - - sectname: __swift5_builtin - segname: __TEXT - addr: 0x1AC8 - size: 20 - offset: 0x25D8 - align: 2 - reloff: 0x67F8 - nreloc: 2 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 00000000090000000800010010000000FE000000 - relocations: - - address: 0x0 - symbolnum: 52 - pcrel: false - length: 2 - extern: true - type: 5 - scattered: false - value: 0 - - sectname: __bss - segname: __DATA - addr: 0x3372 - size: 2084 - offset: 0x50B0 - align: 3 - reloff: 0x0 - nreloc: 0 - flags: 0x6800000B - reserved1: 0x0 - reserved2: 0x0 - relocations: - - address: 0x56 - symbolnum: 1 - pcrel: false - length: 3 - extern: false - type: 0 - scattered: false - value: 0 - - cmd: LC_BUILD_VERSION - cmdsize: 24 - platform: 1 - minos: 786432 - sdk: 786688 - ntools: 0 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 27888 - nsyms: 185 - stroff: 30848 - strsize: 5056 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 79 - iextdefsym: 79 - nextdefsym: 87 - iundefsym: 166 - nundefsym: 19 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 0 - nindirectsyms: 0 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 - - cmd: LC_LINKER_OPTION - cmdsize: 40 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, - 0x0, 0x0, 0x0, 0x0 ] - - cmd: LC_LINKER_OPTION - cmdsize: 24 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, - 0x6F, 0x72, 0x65, 0x0 ] - - cmd: LC_LINKER_OPTION - cmdsize: 32 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, - 0x6E, 0x63, 0x79, 0x0 ] - - cmd: LC_LINKER_OPTION - cmdsize: 24 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0 ] -LinkEditData: - NameList: - - n_strx: 5014 - n_type: 0xE - n_sect: 1 - n_desc: 0 - n_value: 5600 - StringTable: - - '' - - l_objectdestroy - - '_$s4main6MyEnumOWOy' - - '_$s4main6MyEnumOwxx' - - _symbolic x - - '_$s4main6MyEnumOwst' - - '_$s4main13MyGenericEnumOwst' - - '_$s4main6MyEnumOwet' - - '_$s4main13MyGenericEnumOwet' - - '_OBJC_CLASS_$__TtCs12_SwiftObject' - - '_OBJC_METACLASS_$__TtCs12_SwiftObject' - - _swift_deallocObject - - _swift_allocObject - - '_$s4main11ConformanceV5innerSivs' - - _swift_getAssociatedTypeWitness - - __IVARS__TtC4main7MyClass - - __DATA__TtC4main7MyClass - - __METACLASS_DATA__TtC4main7MyClass - - __IVARS__TtC4main14MyGenericClass - - l_protocols - - _objc_classes - - l_protocol_conformances - - l__swift5_reflection_descriptor - - l_coro.devirt.trigger - - '_$s4main14MyGenericClassCMr' - - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq' - - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq' - - '_$s4main6MyEnumOwup' - - '_$s4main13MyGenericEnumOwup' - - '_$s4main6MyEnumOwcp' - - '_$s4main10MyProtocolMp' - - ___swift_reflection_version - - ____chkstk_darwin - - _swift_retain - - '_$s4main8MyStructVMn' - - '_$s4main15MyGenericStructVMn' - - '_$s4main11ConformanceVMn' - - '_$s4main6MyEnumOMn' - - '_$s4main13MyGenericEnumOMn' - - '_$s4main7MyClassCMn' - - '_$s4main14MyGenericClassCMn' - - '_$s4main7MyClassCMm' - - '_$s5Inner4main10MyProtocolPTl' - - '_$s4main6MyEnumOwui' - - '_$s4main13MyGenericEnumOwui' - - '_$s4main11ConformanceV5innerSivpfi' - - _symbolic Si - - '_$s4main15MyGenericStructVMi' - - '_$s4main13MyGenericEnumOMi' - - '_$s4main14MyGenericClassCMi' - - l_llvm.swift_module_hash - - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh' - - '_$s4main6MyEnumOWOh' - - '_$s4main14MyGenericClassC1i5InnerQzvg' - - '_$s4main14MyGenericClassC1txvg' - - '_$s4main11ConformanceV5innerSivg' - - '_$s4main7MyClassC1iSivg' - - '_$s4main7MyClassC2msAA0B6StructVvg' - - '_$s4main7MyClassC2meAA0B4EnumOvg' - - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg' - - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg' - - '_$s4main6MyEnumOwug' - - '_$s4main13MyGenericEnumOwug' - - ___swift_initWithCopy_strong - - ___swift_assignWithCopy_strong - - ___swift_destroy_strong - - ___swift_assignWithTake_strong - - _objc_opt_self - - '_$s4main8MyStructVMf' - - '_$s4main11ConformanceVMf' - - '_$s4main6MyEnumOMf' - - '_$s4main7MyClassCMf' - - _swift_checkMetadataState - - _swift_release - - l_type_metadata_table - - __objc_empty_cache - - _swift_deallocClassInstance - - ___chkstk_darwin_llvm_probe - - '_$s4main6MyEnumOWOe' - - '_$s4main7MyClassC1iSivpWvd' - - '_$s4main7MyClassC2msAA0B6StructVvpWvd' - - '_$s4main7MyClassC2meAA0B4EnumOvpWvd' - - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd' - - '_$s4main7MyClassCfd' - - '_$s4main14MyGenericClassCfd' - - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc' - - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc' - - '_$s4main11ConformanceVAA10MyProtocolAAMc' - - '_$s4main6MyEnumOwta' - - l_metadata - - _swift_allocateGenericClassMetadata - - _swift_allocateGenericValueMetadata - - _swift_getGenericMetadata - - ___swift_instantiateGenericMetadata - - '_$s4main6MyEnumOwca' - - '_$s4main8MyStructVMa' - - '_$s4main15MyGenericStructVMa' - - '_$s4main11ConformanceVMa' - - '_$s4main6MyEnumOMa' - - '_$s4main13MyGenericEnumOMa' - - '_$s4main7MyClassCMa' - - '_$s4main14MyGenericClassCMa' - - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_' - - '_$s4main11ConformanceV5innerACSi_tcfcfA_' - - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW' - - _symbolic _____ 4main8MyStructV - - _symbolic _____ 4main15MyGenericStructV - - _symbolic _____yxG 4main15MyGenericStructV - - _symbolic _____ 4main11ConformanceV - - '_$sytWV' - - '_$sBoWV' - - '_$sBi64_WV' - - '_$s4main6MyEnumOWV' - - '_$s4main13MyGenericEnumOWV' - - '_$s4main11ConformanceV5innerSivpMV' - - '_symbolic $s4main10MyProtocolP' - - _symbolic 5Inner_____Qz 4main10MyProtocolP - - '_$s4main11ConformanceVAA10MyProtocolAAWP' - - '_$s4main15MyGenericStructVMP' - - '_$s4main13MyGenericEnumOMP' - - '_$s4main14MyGenericClassCMP' - - '_$s4main6MyEnumOwCP' - - _symbolic _____ 4main6MyEnumO - - _symbolic _____ 4main13MyGenericEnumO - - _symbolic _____yxG 4main13MyGenericEnumO - - '_$s4main8MyStructVN' - - '_$s4main11ConformanceVN' - - '_$s4main6MyEnumON' - - '_$s4main7MyClassCN' - - '_$s4main11ConformanceV5innerSivM' - - '_$s4mainMXM' - - '_$s4main10MyProtocolTL' - - '_$s4main15MyGenericStructVMI' - - '_$s4main13MyGenericEnumOMI' - - '_$s4main14MyGenericClassCMI' - - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF' - - '_$s4main10MyProtocol_pMF' - - '_$s4main8MyStructVMF' - - '_$s4main15MyGenericStructVMF' - - '_$s4main11ConformanceVMF' - - '_$s4main6MyEnumOMF' - - '_$s4main13MyGenericEnumOMF' - - '_$s4main7MyClassCMF' - - '_$s4main14MyGenericClassCMF' - - '_$s4main7MyClassCfD' - - '_$s4main14MyGenericClassCfD' - - _symbolic _____ 4main7MyClassC - - _symbolic _____ 4main14MyGenericClassC - - _symbolic _____yxG 4main14MyGenericClassC - - '_$s4main15MyGenericStructVACyxGycfC' - - '_$s4main8MyStructVACycfC' - - '_$s4main11ConformanceVACycfC' - - '_$s4main11ConformanceV5innerACSi_tcfC' - - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC' - - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC' - - '_$s4main6MyEnumOMB' - - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA' - - '_$s4main11ConformanceVAA10MyProtocolAAMA' - - l___unnamed_29 - - l___unnamed_19 - - ___swift_memcpy9_8 - - ___swift_memcpy8_8 - - l___unnamed_28 - - l___unnamed_18 - - l___unnamed_27 - - l___unnamed_17 - - l___unnamed_26 - - l___unnamed_16 - - l___unnamed_25 - - l___unnamed_15 - - l___unnamed_4 - - l___unnamed_24 - - l___unnamed_14 - - l___unnamed_3 - - ___unnamed_23 - - l___unnamed_13 - - _swift_initClassMetadata2 - - l___unnamed_2 - - l___unnamed_12 - - l___unnamed_1 - - l___unnamed_11 - - _symbolic B0 - - l___unnamed_30 - - l___unnamed_10 - - '_$s4main11ConformanceV5innerSivM.resume.0' diff --git a/llvm/test/tools/dsymutil/Inputs/test.yaml b/llvm/test/tools/dsymutil/Inputs/test.yaml deleted file mode 100644 index da3aa9a8aaf34..0000000000000 --- a/llvm/test/tools/dsymutil/Inputs/test.yaml +++ /dev/null @@ -1,254 +0,0 @@ -# How to generate this file: -# 1. First take a swift file and run xcrun swiftc -g -v file.swift -# secondfile.swift, make sure the two swift files are in a short path like /tmp/ - -# 2. Now you can see what the driver does, generate the object files in the -# tmp directory - -# 3. Run obj2yaml on object file to create a yaml file - -# 4. I ran delta to reduce this file. - ---- !mach-o -FileHeader: - magic: 0xFEEDFACF - cputype: 0x1000007 - cpusubtype: 0x3 - filetype: 0x1 - ncmds: 8 - sizeofcmds: 2240 - flags: 0x2000 - reserved: 0x0 -LoadCommands: - - cmd: LC_SEGMENT_64 - cmdsize: 1992 - segname: '' - vmaddr: 0 - vmsize: 6592 - fileoff: 2272 - filesize: 6592 - maxprot: 7 - initprot: 7 - nsects: 24 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x0 - size: 593 - offset: 0x8E0 - align: 4 - reloff: 0x22A0 - nreloc: 24 - flags: 0x80000400 - reserved1: 0x0 - reserved2: 0x0 - relocations: - - address: 0x233 - symbolnum: 2 - pcrel: true - length: 2 - extern: true - type: 4 - scattered: false - value: 0 - - sectname: __swift5_typeref - segname: __TEXT - addr: 0x2D6 - size: 38 - offset: 0xBB6 - align: 1 - reloff: 0x2418 - nreloc: 4 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 5369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF00 - relocations: - - address: 0x21 - symbolnum: 46 - pcrel: false - length: 3 - extern: true - type: 0 - scattered: false - value: 0 - - sectname: __swift5_reflstr - segname: __TEXT - addr: 0x318 - size: 12 - offset: 0xBF8 - align: 0 - reloff: 0x0 - nreloc: 0 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 496E6E657200696E6E657200 - - sectname: __swift5_assocty - segname: __TEXT - addr: 0x324 - size: 24 - offset: 0xC04 - align: 2 - reloff: 0x2450 - nreloc: 8 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF - relocations: - - address: 0x14 - symbolnum: 5 - pcrel: false - length: 2 - extern: true - type: 0 - scattered: false - value: 0 - - sectname: __swift5_fieldmd - segname: __TEXT - addr: 0x378 - size: 44 - offset: 0xC58 - align: 2 - reloff: 0x24C0 - nreloc: 8 - flags: 0x10000000 - reserved1: 0x0 - reserved2: 0x0 - content: 000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000 - relocations: - - address: 0x1C - symbolnum: 12 - pcrel: false - length: 3 - extern: false - type: 0 - scattered: false - value: 0 - - cmd: LC_BUILD_VERSION - cmdsize: 24 - platform: 1 - minos: 786432 - sdk: 786688 - ntools: 0 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 9824 - nsyms: 57 - stroff: 10736 - strsize: 1544 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 16 - iextdefsym: 16 - nextdefsym: 31 - iundefsym: 47 - nundefsym: 10 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 0 - nindirectsyms: 0 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 - - cmd: LC_LINKER_OPTION - cmdsize: 40 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, - 0x0, 0x0, 0x0, 0x0 ] - - cmd: LC_LINKER_OPTION - cmdsize: 24 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, - 0x6F, 0x72, 0x65, 0x0 ] - - cmd: LC_LINKER_OPTION - cmdsize: 32 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, - 0x6E, 0x63, 0x79, 0x0 ] - - cmd: LC_LINKER_OPTION - cmdsize: 24 - count: 1 - PayloadBytes: [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0 ] -LinkEditData: - NameList: - - n_strx: 1494 - n_type: 0xE - n_sect: 9 - n_desc: 0 - n_value: 0 - StringTable: - - '' - - l_entry_point - - '_$s4main12Conformance2V5innerSivs' - - l_protocol_conformances - - l_coro.devirt.trigger - - '_$s4main10MyProtocolMp' - - ___swift_reflection_version - - _main - - '_$s4main3AppVMn' - - '_$s4main12Conformance2VMn' - - '_$s4main12Conformance2V5innerSivpfi' - - _symbolic Si - - l_llvm.swift_module_hash - - '_$s4main12Conformance2V5innerSivg' - - '_$s4main3AppVMf' - - '_$s4main12Conformance2VMf' - - _swift_bridgeObjectRelease - - l_type_metadata_table - - '_$s4main12Conformance2VAA10MyProtocolAAMc' - - '_$sSaMa' - - '_$s4main3AppVMa' - - '_$s4main12Conformance2VMa' - - '_$s4main12Conformance2V5innerACSi_tcfcfA_' - - '_$ss5print_9separator10terminatoryypd_S2StFfA1_' - - '_$ss5print_9separator10terminatoryypd_S2StFfA0_' - - '_$s4main3AppV5$mainyyFZ' - - '_$s4main3AppVAAyyFZ' - - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW' - - _symbolic _____ 4main3AppV - - '_$sytWV' - - '_$sBi64_WV' - - '_$s4main12Conformance2V5innerSivpMV' - - _symbolic _____ 4main12Conformance2V - - '_symbolic $s4main10MyProtocolP' - - '_$s4main12Conformance2VAA10MyProtocolAAWP' - - '_$sypN' - - '_$s4main3AppVN' - - '_$s4main12Conformance2VN' - - '_$sSSN' - - '_$s4main12Conformance2V5innerSivM' - - '_$s4mainMXM' - - '_$sSa12_endMutationyyF' - - '_$ss5print_9separator10terminatoryypd_S2StF' - - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF' - - '_$ss27_finalizeUninitializedArrayySayxGABnlF' - - '_$s4main3AppVMF' - - '_$s4main12Conformance2VMF' - - '_$s4main3AppVACycfC' - - '_$s4main12Conformance2VACycfC' - - '_$s4main12Conformance2V5innerACSi_tcfC' - - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC' - - '_$s4main12Conformance2VAA10MyProtocolAAMA' - - l___unnamed_8 - - l___unnamed_7 - - l___unnamed_6 - - l___unnamed_5 - - l___unnamed_4 - - '_$s4main12Conformance2V5innerSivM.resume.0' - - '' - - '' - - '' - - '' - - '' - - '' - - '' diff --git a/llvm/test/tools/dsymutil/reflection-dump.test b/llvm/test/tools/dsymutil/reflection-dump.test deleted file mode 100644 index 3fea11ee57adc..0000000000000 --- a/llvm/test/tools/dsymutil/reflection-dump.test +++ /dev/null @@ -1,42 +0,0 @@ -RUN: rm -rf %t.dir && mkdir -p %t.dir/tmp -RUN: cp %p/Inputs/main.yaml %t.dir -RUN: cp %p/Inputs/test.yaml %t.dir -RUN: cp %p/Inputs/reflection_metadata.yaml %t.dir -RUN: yaml2obj %p/Inputs/main.yaml -o %t.dir/main -RUN: yaml2obj %p/Inputs/test.yaml -o %t.dir/tmp/test-1.o -RUN: yaml2obj %p/Inputs/reflection_metadata.yaml -o %t.dir/tmp/reflection_metadata-1.o - -RUN: dsymutil -oso-prepend-path=%t.dir %t.dir/main -o %t.dir/main.dSYM -RUN: llvm-objdump -s %t.dir/main.dSYM/Contents/Resources/DWARF/main | FileCheck %s - -CHECK: Contents of section __DWARF,__swift5_typeref: -CHECK-NEXT: 10000e000 53690000 01ffffff ff002473 346d6169 Si........$s4mai -CHECK-NEXT: 10000e010 6e31304d 7950726f 746f636f 6c500000 n10MyProtocolP.. -CHECK-NEXT: 10000e020 01ffffff ff007800 42300000 53690000 ......x.B0..Si.. -CHECK-NEXT: 10000e030 01ffffff ff002473 346d6169 6e31304d ......$s4main10M -CHECK-NEXT: 10000e040 7950726f 746f636f 6c500000 01ffffff yProtocolP...... - -CHECK: Contents of section __DWARF,__swift5_reflstr: -CHECK-NEXT: 10000e09b 496e6e65 7200696e 6e657200 496e6e65 Inner.inner.Inne -CHECK-NEXT: 10000e0ab 72006900 6d73006d 6500696e 6e657200 r.i.ms.me.inner. -CHECK-NEXT: 10000e0bb 43004900 74006d67 73006d67 65004743 C.I.t.mgs.mge.GC -CHECK-NEXT: 10000e0cb 00 - -CHECK: Contents of section __DWARF,__swift5_assocty: -CHECK-NEXT: 10000e0cc 00000000 fcffffff 01000000 08000000 ................ -CHECK-NEXT: 10000e0dc f0ffffff ecffffff 00000000 fcffffff ................ -CHECK-NEXT: 10000e0ec 01000000 08000000 f0ffffff ecffffff ................ - -CHECK: Contents of section __DWARF,__swift5_fieldmd: -CHECK-NEXT: 10000e0fc 00000000 00000000 00000c00 01000000 ................ -CHECK-NEXT: 10000e10c 02000000 ecffffff e8ffffff 00000000 ................ -CHECK-NEXT: 10000e11c 00000000 00000c00 00000000 00000000 ................ -CHECK-NEXT: 10000e12c 00000000 04000c00 00000000 00000000 ................ - -CHECK: Contents of section __DWARF,__swift5_capture: -CHECK-NEXT: 10000e22c 01000000 01000000 02000000 f4ffffff ................ -CHECK-NEXT: 10000e23c f0ffffff ecffffff ........ - -CHECK: Contents of section __DWARF,__swift5_builtin: -CHECK-NEXT: 10000e244 00000000 09000000 08000100 10000000 ................ -CHECK-NEXT: 10000e254 fe000000 .... diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index b9682b83ac67a..a8dfde0865377 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -30,7 +30,6 @@ #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/MachO.h" -#include "llvm/BinaryFormat/Swift.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" @@ -175,7 +174,7 @@ bool DwarfLinkerForBinary::createStreamer(const Triple &TheTriple, [&](const Twine &Warning, StringRef Context, const DWARFDie *) { warn(Warning, Context); }); - return Streamer->init(TheTriple, "__DWARF"); + return Streamer->init(TheTriple); } ErrorOr @@ -296,77 +295,6 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj, return ErrorOrObj.getError(); } -static bool binaryHasSwiftReflectionSections(const DebugMap &Map, - const LinkOptions &Options, - BinaryHolder &BinHolder) { - // If the input binary has swift5 reflection sections, there is no need to - // copy them to the .dSYM. Only copy them for binaries where the linker - // omitted the reflection metadata. - if (!Map.getBinaryPath().empty() && - Options.FileType == OutputFileType::Object) { - - auto ObjectEntry = BinHolder.getObjectEntry(Map.getBinaryPath()); - // If ObjectEntry or Object has an error, no binary exists, therefore no - // reflection sections exist. - if (!ObjectEntry) { - // Any errors will be diagnosed later in the main loop, ignore them here. - llvm::consumeError(ObjectEntry.takeError()); - return false; - } - - auto Object = - ObjectEntry->getObjectAs(Map.getTriple()); - if (!Object) { - // Any errors will be diagnosed later in the main loop, ignore them here. - llvm::consumeError(Object.takeError()); - return false; - } - - for (auto &Section : Object->sections()) { - llvm::Expected NameOrErr = - Object->getSectionName(Section.getRawDataRefImpl()); - if (!NameOrErr) { - llvm::consumeError(NameOrErr.takeError()); - continue; - } - NameOrErr->consume_back("__TEXT"); - if (Object->mapReflectionSectionNameToEnumValue(*NameOrErr) != - llvm::swift::Swift5ReflectionSectionKind::Unknown) { - return true; - } - } - } - return false; -} - -static void -copySwiftReflectionMetadata(const llvm::dsymutil::DebugMapObject *Obj, - DwarfStreamer *Streamer) { - auto OF = - llvm::object::ObjectFile::createObjectFile(Obj->getObjectFilename()); - if (!OF) { - llvm::consumeError(OF.takeError()); - return; - } else if (auto *MO = - dyn_cast(OF->getBinary())) { - for (auto &Section : OF->getBinary()->sections()) { - llvm::Expected NameOrErr = - MO->getSectionName(Section.getRawDataRefImpl()); - if (!NameOrErr) { - llvm::consumeError(NameOrErr.takeError()); - continue; - } - llvm::Expected SectionContents = Section.getContents(); - if (SectionContents) { - NameOrErr->consume_back("__TEXT"); - Streamer->emitSwiftReflectionSection( - MO->mapReflectionSectionNameToEnumValue(*NameOrErr), - *SectionContents, Section.getAlignment(), Section.getSize()); - } - } - } -} - bool DwarfLinkerForBinary::link(const DebugMap &Map) { if (!createStreamer(Map.getTriple(), OutFile)) return false; @@ -461,19 +389,8 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) { llvm_unreachable("Unhandled DebugMap object"); }); GeneralLinker.setSwiftInterfacesMap(&ParseableSwiftInterfaces); - bool ReflectionSectionsPresentInBinary = false; - // If there is no output specified, no point in checking the binary for swift5 - // reflection sections. - if (!Options.NoOutput) { - ReflectionSectionsPresentInBinary = - binaryHasSwiftReflectionSections(Map, Options, BinHolder); - } for (const auto &Obj : Map.objects()) { - - if (!ReflectionSectionsPresentInBinary) - copySwiftReflectionMetadata(Obj.get(), Streamer.get()); - // N_AST objects (swiftmodule files) should get dumped directly into the // appropriate DWARF section. if (Obj->getType() == MachO::N_AST) { @@ -514,6 +431,7 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) { continue; } + if (auto ErrorOrObj = loadObject(*Obj, Map, RL)) GeneralLinker.addObjectFile(*ErrorOrObj); else { From e39c262979e6870fcd74ae8ac7428b940f3b6c07 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 21 Jan 2022 13:53:03 -0800 Subject: [PATCH 222/946] Revert "[gn build] Set HAVE_MALLINFO2=1" This reverts commit 9f4cc5a6bb56b42bb90ea31f10ecf8fed8f07653. Breaks http://45.33.8.238/macm1/26108/step_4.txt. --- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 3c882a80fee19..3efbdde7d85b4 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -89,7 +89,7 @@ write_cmake_config("config") { "HAVE_LIBPFM=", "HAVE_LIBPSAPI=", "HAVE_MALLCTL=", - "HAVE_MALLINFO2=1", + "HAVE_MALLINFO2=", "HAVE_SIGNAL_H=1", "HAVE_STD_IS_TRIVIALLY_COPYABLE=1", "HAVE_STRERROR=1", From 705d8c49f9be82415a9cdd793cda405333fba71e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Fri, 21 Jan 2022 22:59:14 +0100 Subject: [PATCH 223/946] [x86] regenerate smul-with-overflow.ll; add test which failed with llvm 13 and lower (NFC) --- llvm/test/CodeGen/X86/smul-with-overflow.ll | 734 +++++++++++++++++++- 1 file changed, 716 insertions(+), 18 deletions(-) diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll index 7154a896a359d..1feb6f4026299 100644 --- a/llvm/test/CodeGen/X86/smul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll @@ -1,9 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-- | FileCheck %s @ok = internal constant [4 x i8] c"%d\0A\00" @no = internal constant [4 x i8] c"no\0A\00" define i1 @test1(i32 %v1, i32 %v2) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jno .LBB0_1 +; CHECK-NEXT: # %bb.2: # %overflow +; CHECK-NEXT: pushl $no +; CHECK-NEXT: calll printf@PLT +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB0_1: # %normal +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $ok +; CHECK-NEXT: calll printf@PLT +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: retl entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %sum = extractvalue {i32, i1} %t, 0 @@ -17,12 +36,27 @@ normal: overflow: %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind ret i1 false -; CHECK-LABEL: test1: -; CHECK: imull -; CHECK-NEXT: jno } define i1 @test2(i32 %v1, i32 %v2) nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jno .LBB1_2 +; CHECK-NEXT: # %bb.1: # %overflow +; CHECK-NEXT: pushl $no +; CHECK-NEXT: calll printf@PLT +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_2: # %normal +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: pushl $ok +; CHECK-NEXT: calll printf@PLT +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: retl entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) %sum = extractvalue {i32, i1} %t, 0 @@ -36,48 +70,712 @@ overflow: normal: %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind ret i1 true -; CHECK-LABEL: test2: -; CHECK: imull -; CHECK-NEXT: jno } declare i32 @printf(i8*, ...) nounwind declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) define i32 @test3(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl %eax, %eax +; CHECK-NEXT: retl entry: %tmp0 = add i32 %b, %a %tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2) %tmp2 = extractvalue { i32, i1 } %tmp1, 0 ret i32 %tmp2 -; CHECK-LABEL: test3: -; CHECK: addl -; CHECK-NEXT: addl -; CHECK-NEXT: ret } define i32 @test4(i32 %a, i32 %b) nounwind readnone { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull $4, %eax, %eax +; CHECK-NEXT: retl entry: %tmp0 = add i32 %b, %a %tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4) %tmp2 = extractvalue { i32, i1 } %tmp1, 0 ret i32 %tmp2 -; CHECK-LABEL: test4: -; CHECK: addl -; CHECK: mull -; CHECK-NEXT: ret } declare { i63, i1 } @llvm.smul.with.overflow.i63(i63, i63) nounwind readnone +; Was returning false, should return true (not constant folded yet though). +; PR13991 define i1 @test5() nounwind { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: retl entry: %res = call { i63, i1 } @llvm.smul.with.overflow.i63(i63 4, i63 4611686018427387903) %sum = extractvalue { i63, i1 } %res, 0 %overflow = extractvalue { i63, i1 } %res, 1 ret i1 %overflow -; Was returning false, should return true (not constant folded yet though). -; PR13991 -; CHECK-LABEL: test5: -; CHECK-NOT: xorb +} + + + +declare { i129, i1 } @llvm.smul.with.overflow.i129(i129, i129) + +define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { +; CHECK-LABEL: smul_ovf: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $164, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: andl $1, %ebp +; CHECK-NEXT: negl %ebp +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, %ebx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: adcl $0, %ebx +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %edx, %ebx +; CHECK-NEXT: setb %cl +; CHECK-NEXT: addl %eax, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: adcl %edx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %ebx, %esi +; CHECK-NEXT: adcl %eax, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: movl %ebp, %ebx +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: addl %edx, %ebp +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: adcl %edx, %ecx +; CHECK-NEXT: setb %bl +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movzbl %bl, %eax +; CHECK-NEXT: adcl %edx, %eax +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl %ecx, %ebx +; CHECK-NEXT: adcl $0, %ebx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: setb %al +; CHECK-NEXT: addl %edi, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %ebp, %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: adcl %ecx, %eax +; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: mull %esi +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: mull %esi +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: addl %ecx, %ebx +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: mull %edi +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: addl %ebx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %esi, %ecx +; CHECK-NEXT: setb %bl +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: mull %edi +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl %bl, %eax +; CHECK-NEXT: adcl %eax, %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: mull %esi +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %esi +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: addl %ecx, %ebp +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: mull %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: addl %ebp, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %esi, %edi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %ebx +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: adcl %eax, %edx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: mull %ebx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %ebx +; CHECK-NEXT: movl %edx, %ebp +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: addl %edi, %ebx +; CHECK-NEXT: adcl $0, %ebp +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: mull %edi +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: addl %ebx, %eax +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: adcl %ebp, %ecx +; CHECK-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %edi +; CHECK-NEXT: movl %edx, %ebp +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: addl %ecx, %edi +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-NEXT: adcl %eax, %ebp +; CHECK-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %edi +; CHECK-NEXT: adcl $0, %ebp +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: mull %esi +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %esi +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: addl %ecx, %ebx +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: addl %ebx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %esi, %ecx +; CHECK-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: addl %ecx, %ebx +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-NEXT: adcl %eax, %edx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: adcl %ebp, %esi +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-NEXT: adcl %eax, %ebx +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl $0, (%esp) # 4-byte Folded Spill +; CHECK-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: addl %esi, %ebp +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: adcl $0, %edi +; CHECK-NEXT: addl %ebx, %ebp +; CHECK-NEXT: adcl %esi, %edi +; CHECK-NEXT: setb %bl +; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: movzbl %bl, %eax +; CHECK-NEXT: adcl %edx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %ecx +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: addl %ebx, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %eax, %edx +; CHECK-NEXT: setb %bl +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl %bl, %eax +; CHECK-NEXT: adcl %esi, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: addl %edx, %esi +; CHECK-NEXT: movl %ebp, %ebx +; CHECK-NEXT: adcl %eax, %ebx +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: addl %ecx, %esi +; CHECK-NEXT: adcl %edx, %eax +; CHECK-NEXT: setb %cl +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: adcl %ebp, %eax +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: adcl %edi, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: adcl $0, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl (%esp), %ecx # 4-byte Folded Reload +; CHECK-NEXT: movl %ecx, (%esp) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: addl %edx, %ebx +; CHECK-NEXT: movl %edx, %ebp +; CHECK-NEXT: adcl $0, %ebp +; CHECK-NEXT: addl %eax, %ebx +; CHECK-NEXT: movl %ebx, %esi +; CHECK-NEXT: adcl %edx, %ebp +; CHECK-NEXT: setb %ch +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl %ch, %ecx +; CHECK-NEXT: adcl %edx, %ecx +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %ebp, %ebx +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %ecx, %esi +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: movl %ecx, %ebp +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: addl %edi, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %edx, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: adcl $0, %ebx +; CHECK-NEXT: movl %ebp, %edx +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: addl %eax, %ebx +; CHECK-NEXT: adcl %ecx, %edx +; CHECK-NEXT: setb %al +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: addl %ecx, %ebx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; CHECK-NEXT: adcl %ebp, %edx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: adcl %edi, %eax +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl (%esp), %eax # 4-byte Reload +; CHECK-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-NEXT: adcl %eax, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; CHECK-NEXT: mull %ebp +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %ebp +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %esi, %ebx +; CHECK-NEXT: movl %edx, %ebp +; CHECK-NEXT: adcl $0, %ebp +; CHECK-NEXT: addl %ecx, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %esi, %ebp +; CHECK-NEXT: setb %cl +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: adcl %edx, %eax +; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %edi +; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: mull %edi +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: addl %ebx, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl %edx, %esi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: addl %eax, %esi +; CHECK-NEXT: movzbl %cl, %edx +; CHECK-NEXT: adcl %edi, %edx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: adcl %edx, %eax +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: adcl $0, %edi +; CHECK-NEXT: movl (%esp), %ebx # 4-byte Reload +; CHECK-NEXT: movl %ebx, %ebp +; CHECK-NEXT: adcl $0, %ebp +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: adcl $0, %edx +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: adcl %ebp, %edx +; CHECK-NEXT: setb %al +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %ebx +; CHECK-NEXT: movl %ebx, (%esp) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: addl %ebp, %eax +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: adcl %ecx, %eax +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: imull %edx +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: addl %eax, %eax +; CHECK-NEXT: adcl %edx, %ecx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: adcl %eax, %ebp +; CHECK-NEXT: adcl %ecx, %edi +; CHECK-NEXT: addl %esi, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl (%esp), %edi # 4-byte Folded Reload +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %edx, %esi +; CHECK-NEXT: adcl $0, %edi +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: movl %ecx, %ebp +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: addl %eax, %esi +; CHECK-NEXT: movl %esi, (%esp) # 4-byte Spill +; CHECK-NEXT: adcl %edx, %edi +; CHECK-NEXT: setb %cl +; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: adcl %edx, %ecx +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: adcl %ecx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %edi +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ebp, %esi +; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: addl %edx, %ebp +; CHECK-NEXT: adcl $0, %ecx +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: mull {{[0-9]+}}(%esp) +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: adcl %edx, %ecx +; CHECK-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; CHECK-NEXT: adcl %edx, %esi +; CHECK-NEXT: movl %ebx, %edx +; CHECK-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl %ecx, %ebx +; CHECK-NEXT: adcl $0, %ebx +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: adcl $0, %eax +; CHECK-NEXT: addl %edi, %ebx +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; CHECK-NEXT: addl %edx, %ebx +; CHECK-NEXT: adcl %ebp, %eax +; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload +; CHECK-NEXT: adcl %ecx, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl $0, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: imull %ecx, %edi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: imull %ecx, %edi +; CHECK-NEXT: addl %ebp, %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: imull %ecx, %ebp +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl %ebp, %ecx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: adcl %edi, %ecx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: addl %ebx, %ebp +; CHECK-NEXT: movl %ebp, %ebx +; CHECK-NEXT: adcl %eax, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: adcl %esi, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; CHECK-NEXT: movl (%esp), %eax # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: movl %ebp, %ebx +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; CHECK-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: sarl $31, %eax +; CHECK-NEXT: xorl %eax, %ebx +; CHECK-NEXT: xorl %eax, %edx +; CHECK-NEXT: orl %ebx, %edx +; CHECK-NEXT: xorl %eax, %ebp +; CHECK-NEXT: orl %edx, %ebp +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; CHECK-NEXT: xorl %eax, %ebx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: orl %ebx, %ecx +; CHECK-NEXT: xorl %eax, %esi +; CHECK-NEXT: orl %ecx, %esi +; CHECK-NEXT: xorl %eax, %edi +; CHECK-NEXT: xorl (%esp), %eax # 4-byte Folded Reload +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: orl %ebp, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: negl %edx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; CHECK-NEXT: xorl %edx, %ebx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; CHECK-NEXT: xorl %edx, %esi +; CHECK-NEXT: orl %ebx, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; CHECK-NEXT: xorl %edx, %ebx +; CHECK-NEXT: orl %esi, %ebx +; CHECK-NEXT: xorl %edi, %edx +; CHECK-NEXT: orl %ebx, %edx +; CHECK-NEXT: orl %eax, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: movl %edx, 4(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: movl %edx, (%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; CHECK-NEXT: movl %edx, 12(%eax) +; CHECK-NEXT: movb %cl, 16(%eax) +; CHECK-NEXT: setne 20(%eax) +; CHECK-NEXT: addl $164, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl $4 + %r = tail call { i129, i1 } @llvm.smul.with.overflow.i129(i129 %x, i129 %y) + ret { i129, i1 } %r } From 6df05697ca1d3e691f674014f7728ff71147bbe7 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 21 Jan 2022 11:55:17 -0800 Subject: [PATCH 224/946] [gn build] Set HAVE_MALLINFO2=1 I'm seeing deprecated warnings due to using mallinfo() instead of mallinfo2(). ../../llvm/lib/Support/Unix/Process.inc:98:10: warning: 'mallinfo' is deprecated [-Wdeprecated-declarations] mi = ::mallinfo(); mallinfo2() is part of glibc 2.33 which was released in Feb 2021, which is fairly recent but I think gn users should be using fairly up to date glibcs. If this breaks people we could make this a gn arg instead. Differential Revision: https://reviews.llvm.org/D117916 --- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 3efbdde7d85b4..07b6453ea9b8a 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -89,7 +89,6 @@ write_cmake_config("config") { "HAVE_LIBPFM=", "HAVE_LIBPSAPI=", "HAVE_MALLCTL=", - "HAVE_MALLINFO2=", "HAVE_SIGNAL_H=1", "HAVE_STD_IS_TRIVIALLY_COPYABLE=1", "HAVE_STRERROR=1", @@ -143,6 +142,7 @@ write_cmake_config("config") { "HAVE_LINK_H=1", "HAVE_LSEEK64=1", "HAVE_MALLINFO=1", + "HAVE_MALLINFO2=1", "HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1", ] } else { @@ -151,6 +151,7 @@ write_cmake_config("config") { "HAVE_LINK_H=", "HAVE_LSEEK64=", "HAVE_MALLINFO=", + "HAVE_MALLINFO2=", "HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=", ] } From 653b007dc186845699d330c66dc9dfb3aaf396df Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 21 Jan 2022 23:18:23 +0100 Subject: [PATCH 225/946] [CodeComplete] fix nullptr crash in 612f5ed8823120 --- clang/lib/Sema/SemaCodeComplete.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index cc08dee266136..b86bfe869c69d 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -5519,9 +5519,10 @@ QualType getApproximateType(const Expr *E) { : getApproximateType(CDSME->getBase()); if (CDSME->isArrow() && !Base.isNull()) Base = Base->getPointeeType(); // could handle unique_ptr etc here? - auto *RD = Base.isNull() - ? nullptr - : llvm::dyn_cast(getAsRecordDecl(Base)); + auto *RD = + Base.isNull() + ? nullptr + : llvm::dyn_cast_or_null(getAsRecordDecl(Base)); if (RD && RD->isCompleteDefinition()) { // Look up member heuristically, including in bases. for (const auto *Member : RD->lookupDependentName( From b796709a62da2a09ab753236deb3c9f2fc14cf47 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Fri, 21 Jan 2022 14:40:36 -0800 Subject: [PATCH 226/946] Only run MLIR PyTACO tests when python bindings are enabled. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D117930 --- .../test/Integration/Dialect/SparseTensor/taco/lit.local.cfg | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg new file mode 100644 index 0000000000000..cf04454dea6ef --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg @@ -0,0 +1,5 @@ +# Disable ASAN's leak detection for python OpsDSL tests. +config.environment['ASAN_OPTIONS'] = 'detect_leaks=0' +# Only run when python bindings are enabled. +if not config.enable_bindings_python: + config.unsupported = True From ba093fe58b152e12049663e87536ea15a6de638d Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Fri, 21 Jan 2022 15:15:06 -0800 Subject: [PATCH 227/946] Fix a commit. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D117932 --- mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg index cf04454dea6ef..7137d0fba95f8 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg @@ -1,4 +1,4 @@ -# Disable ASAN's leak detection for python OpsDSL tests. +# Disable ASAN's leak detection for python taco tests. config.environment['ASAN_OPTIONS'] = 'detect_leaks=0' # Only run when python bindings are enabled. if not config.enable_bindings_python: From 6ba1fb04214bad08b8b19afba91798818ea276e5 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Fri, 21 Jan 2022 15:09:42 -0800 Subject: [PATCH 228/946] [llvm-pdbutil] Fix gaps ouput. --- llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp index f284b87128640..e6b5d21f36e5f 100644 --- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp +++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp @@ -586,8 +586,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, AutoIndent Indent(P, 7); P.formatLine("offset = {0}, range = {1}", Def.Hdr.Offset, formatRange(Def.Range)); - P.formatLine("gaps = {2}", Def.Hdr.Offset, - formatGaps(P.getIndentLevel() + 9, Def.Gaps)); + P.formatLine("gaps = [{0}]", formatGaps(P.getIndentLevel() + 9, Def.Gaps)); return Error::success(); } @@ -599,7 +598,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, formatRegisterId(Def.Hdr.Register, CompilationCPU), int32_t(Def.Hdr.BasePointerOffset), Def.offsetInParent(), Def.hasSpilledUDTMember()); - P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range), + P.formatLine("range = {0}, gaps = [{1}]", formatRange(Def.Range), formatGaps(P.getIndentLevel() + 9, Def.Gaps)); return Error::success(); } @@ -626,7 +625,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}", formatRegisterId(Def.Hdr.Register, CompilationCPU), NoName, uint32_t(Def.Hdr.OffsetInParent)); - P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range), + P.formatLine("range = {0}, gaps = [{1}]", formatRange(Def.Range), formatGaps(P.getIndentLevel() + 9, Def.Gaps)); return Error::success(); } @@ -636,7 +635,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, AutoIndent Indent(P, 7); P.formatLine("program = {0}, offset in parent = {1}, range = {2}", Def.Program, Def.OffsetInParent, formatRange(Def.Range)); - P.formatLine("gaps = {0}", formatGaps(P.getIndentLevel() + 9, Def.Gaps)); + P.formatLine("gaps = [{0}]", formatGaps(P.getIndentLevel() + 9, Def.Gaps)); return Error::success(); } @@ -644,7 +643,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) { AutoIndent Indent(P, 7); P.formatLine("program = {0}, range = {1}", Def.Program, formatRange(Def.Range)); - P.formatLine("gaps = {0}", formatGaps(P.getIndentLevel() + 9, Def.Gaps)); + P.formatLine("gaps = [{0}]", formatGaps(P.getIndentLevel() + 9, Def.Gaps)); return Error::success(); } From 58ee14e29e98384bba6d2e1c1789b7f8e3060d24 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 20 Jan 2022 14:18:20 -0800 Subject: [PATCH 229/946] [lldb] Fix timer logging inverted quiet condition The logic of `g_quiet` was inverted in D26243. This corrects the issue. Without this, running `log timers enable` produces a high volume of incremental timer output. Differential Revision: https://reviews.llvm.org/D117837 --- lldb/source/Utility/Timer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Utility/Timer.cpp b/lldb/source/Utility/Timer.cpp index 2f3afe4c87037..b190f35007d50 100644 --- a/lldb/source/Utility/Timer.cpp +++ b/lldb/source/Utility/Timer.cpp @@ -63,7 +63,7 @@ Timer::Timer(Timer::Category &category, const char *format, ...) TimerStack &stack = GetTimerStackForCurrentThread(); stack.push_back(this); - if (g_quiet && stack.size() <= g_display_depth) { + if (!g_quiet && stack.size() <= g_display_depth) { std::lock_guard lock(GetFileMutex()); // Indent @@ -89,7 +89,7 @@ Timer::~Timer() { Signposts->endInterval(this, m_category.GetName()); TimerStack &stack = GetTimerStackForCurrentThread(); - if (g_quiet && stack.size() <= g_display_depth) { + if (!g_quiet && stack.size() <= g_display_depth) { std::lock_guard lock(GetFileMutex()); ::fprintf(stdout, "%*s%.9f sec (%.9f sec)\n", int(stack.size() - 1) * TIMER_INDENT_AMOUNT, "", From efa15f417847439eb8884f1ef47e163fa450ac7c Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Thu, 20 Jan 2022 17:27:23 -0800 Subject: [PATCH 230/946] [mlir][sparse] add ability for sparse tensor output Rationale: Although file I/O is a bit alien to MLIR itself, we provide two convenient ways for sparse tensor I/O. The input part was already there (behind the swiss army knife sparse_tensor.new). Now we have a sparse_tensor.out to write out data. As before, the ops are kept vague and may change in the future. For now this allows us to compare TACO vs MLIR very easily. Reviewed By: bixia Differential Revision: https://reviews.llvm.org/D117850 --- .../SparseTensor/IR/SparseTensorOps.td | 20 ++++++ .../SparseTensor/IR/SparseTensorDialect.cpp | 6 ++ .../Transforms/SparseTensorConversion.cpp | 70 +++++++++++++++---- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 48 +++++++++++++ .../test/Dialect/SparseTensor/conversion.mlir | 24 +++++++ mlir/test/Dialect/SparseTensor/invalid.mlir | 8 +++ mlir/test/Dialect/SparseTensor/roundtrip.mlir | 14 ++++ 7 files changed, 177 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index b7fce5b3137c1..1209a70a72c22 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -351,4 +351,24 @@ def SparseTensor_ReleaseOp : SparseTensor_Op<"release", []>, let assemblyFormat = "$tensor attr-dict `:` type($tensor)"; } +def SparseTensor_OutOp : SparseTensor_Op<"out", []>, + Arguments<(ins AnyType:$tensor, AnyType:$dest)> { + string summary = "Outputs a sparse tensor to the given destination"; + string description = [{ + Outputs the contents of a sparse tensor to the destination defined by an + opaque pointer provided by `dest`. For targets that have access to a file + system, for example, this pointer may specify a filename (or file) for output. + The form of the operation is kept deliberately very general to allow for + alternative implementations in the future, such as sending the contents to + a buffer defined by a pointer. + + Example: + + ```mlir + sparse_tensor.out %t, %dest : tensor<1024x1024xf64, #CSR>, !Dest + ``` + }]; + let assemblyFormat = "$tensor `,` $dest attr-dict `:` type($tensor) `,` type($dest)"; +} + #endif // SPARSETENSOR_OPS diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 8f44b5b1b9e22..8a7942c8d666a 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -329,6 +329,12 @@ static LogicalResult verify(ReleaseOp op) { return success(); } +static LogicalResult verify(OutOp op) { + if (!getSparseTensorEncoding(op.tensor().getType())) + return op.emitError("expected a sparse tensor for output"); + return success(); +} + //===----------------------------------------------------------------------===// // TensorDialect Methods. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index a28f9ac70b318..94e87b3b79b7f 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "CodegenUtils.h" + #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" @@ -189,8 +190,8 @@ static Value genBuffer(ConversionPatternRewriter &rewriter, Location loc, /// computation. static void newParams(ConversionPatternRewriter &rewriter, SmallVector ¶ms, Operation *op, - SparseTensorEncodingAttr &enc, Action action, - ValueRange szs, Value ptr = Value()) { + ShapedType stp, SparseTensorEncodingAttr &enc, + Action action, ValueRange szs, Value ptr = Value()) { Location loc = op->getLoc(); ArrayRef dlt = enc.getDimLevelType(); unsigned sz = dlt.size(); @@ -218,7 +219,7 @@ static void newParams(ConversionPatternRewriter &rewriter, } params.push_back(genBuffer(rewriter, loc, rev)); // Secondary and primary types encoding. - Type elemTp = op->getResult(0).getType().cast().getElementType(); + Type elemTp = stp.getElementType(); params.push_back(constantPointerTypeEncoding(rewriter, loc, enc)); params.push_back(constantIndexTypeEncoding(rewriter, loc, enc)); params.push_back(constantPrimaryTypeEncoding(rewriter, loc, elemTp)); @@ -420,9 +421,10 @@ class SparseTensorNewConverter : public OpConversionPattern { // inferred from the result type of the new operator. SmallVector sizes; SmallVector params; - sizesFromType(rewriter, sizes, op.getLoc(), resType.cast()); + ShapedType stp = resType.cast(); + sizesFromType(rewriter, sizes, op.getLoc(), stp); Value ptr = adaptor.getOperands()[0]; - newParams(rewriter, params, op, enc, Action::kFromFile, sizes, ptr); + newParams(rewriter, params, op, stp, enc, Action::kFromFile, sizes, ptr); rewriter.replaceOp(op, genNewCall(rewriter, op, params)); return success(); } @@ -441,7 +443,9 @@ class SparseTensorInitConverter : public OpConversionPattern { // Generate the call to construct empty tensor. The sizes are // explicitly defined by the arguments to the init operator. SmallVector params; - newParams(rewriter, params, op, enc, Action::kEmpty, adaptor.getOperands()); + ShapedType stp = resType.cast(); + newParams(rewriter, params, op, stp, enc, Action::kEmpty, + adaptor.getOperands()); rewriter.replaceOp(op, genNewCall(rewriter, op, params)); return success(); } @@ -472,15 +476,15 @@ class SparseTensorConvertConverter : public OpConversionPattern { } SmallVector sizes; SmallVector params; - sizesFromPtr(rewriter, sizes, op, encSrc, srcType.cast(), - src); + ShapedType stp = srcType.cast(); + sizesFromPtr(rewriter, sizes, op, encSrc, stp, src); // Set up encoding with right mix of src and dst so that the two // method calls can share most parameters, while still providing // the correct sparsity information to either of them. auto enc = SparseTensorEncodingAttr::get( op->getContext(), encDst.getDimLevelType(), encDst.getDimOrdering(), encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); - newParams(rewriter, params, op, enc, Action::kToCOO, sizes, src); + newParams(rewriter, params, op, stp, enc, Action::kToCOO, sizes, src); Value coo = genNewCall(rewriter, op, params); params[3] = constantPointerTypeEncoding(rewriter, loc, encDst); params[4] = constantIndexTypeEncoding(rewriter, loc, encDst); @@ -512,7 +516,8 @@ class SparseTensorConvertConverter : public OpConversionPattern { SmallVector sizes; SmallVector params; sizesFromPtr(rewriter, sizes, op, encSrc, srcTensorTp, src); - newParams(rewriter, params, op, encDst, Action::kToIterator, sizes, src); + newParams(rewriter, params, op, dstTensorTp, encDst, Action::kToIterator, + sizes, src); Value iter = genNewCall(rewriter, op, params); Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); @@ -567,7 +572,7 @@ class SparseTensorConvertConverter : public OpConversionPattern { SmallVector sizes; SmallVector params; sizesFromSrc(rewriter, sizes, loc, src); - newParams(rewriter, params, op, encDst, Action::kEmptyCOO, sizes); + newParams(rewriter, params, op, stp, encDst, Action::kEmptyCOO, sizes); Value ptr = genNewCall(rewriter, op, params); Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); Value perm = params[2]; @@ -771,6 +776,45 @@ class SparseTensorCompressConverter : public OpConversionPattern { } }; +class SparseTensorOutConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(OutOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + ShapedType srcType = op.tensor().getType().cast(); + // Convert to default permuted COO. + Value src = adaptor.getOperands()[0]; + auto encSrc = getSparseTensorEncoding(srcType); + SmallVector sizes; + SmallVector params; + sizesFromPtr(rewriter, sizes, op, encSrc, srcType, src); + auto enc = SparseTensorEncodingAttr::get( + op->getContext(), encSrc.getDimLevelType(), AffineMap(), + encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); + newParams(rewriter, params, op, srcType, enc, Action::kToCOO, sizes, src); + Value coo = genNewCall(rewriter, op, params); + // Then output the tensor to external file with indices in the externally + // visible lexicographic index order. A sort is required if the source was + // not in that order yet (note that the sort can be dropped altogether if + // external format does not care about the order at all, but here we assume + // it does). + bool sort = + encSrc.getDimOrdering() && !encSrc.getDimOrdering().isIdentity(); + params.clear(); + params.push_back(coo); + params.push_back(adaptor.getOperands()[1]); + params.push_back(constantI1(rewriter, loc, sort)); + Type eltType = srcType.getElementType(); + SmallString<18> name{"outSparseTensor", primaryTypeFunctionSuffix(eltType)}; + TypeRange noTp; + replaceOpWithFuncCall(rewriter, op, name, noTp, params, + EmitCInterface::Off); + return success(); + } +}; + } // namespace //===----------------------------------------------------------------------===// @@ -787,6 +831,6 @@ void mlir::populateSparseTensorConversionPatterns(TypeConverter &typeConverter, SparseTensorReleaseConverter, SparseTensorToPointersConverter, SparseTensorToIndicesConverter, SparseTensorToValuesConverter, SparseTensorLoadConverter, SparseTensorLexInsertConverter, - SparseTensorExpandConverter, SparseTensorCompressConverter>( - typeConverter, patterns.getContext()); + SparseTensorExpandConverter, SparseTensorCompressConverter, + SparseTensorOutConverter>(typeConverter, patterns.getContext()); } diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 20cd1b53d31b4..605e17764773f 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -713,6 +715,31 @@ static SparseTensorCOO *openSparseTensorCOO(char *filename, uint64_t rank, return tensor; } +/// Writes the sparse tensor to extended FROSTT format. +template +void outSparseTensor(const SparseTensorCOO &tensor, char *filename) { + auto &sizes = tensor.getSizes(); + auto &elements = tensor.getElements(); + uint64_t rank = tensor.getRank(); + uint64_t nnz = elements.size(); + std::fstream file; + file.open(filename, std::ios_base::out | std::ios_base::trunc); + assert(file.is_open()); + file << "; extended FROSTT format\n" << rank << " " << nnz << std::endl; + for (uint64_t r = 0; r < rank - 1; r++) + file << sizes[r] << " "; + file << sizes[rank - 1] << std::endl; + for (uint64_t i = 0; i < nnz; i++) { + auto &idx = elements[i].indices; + for (uint64_t r = 0; r < rank; r++) + file << (idx[r] + 1) << " "; + file << elements[i].value << std::endl; + } + file.flush(); + file.close(); + assert(file.good()); +} + } // namespace extern "C" { @@ -845,6 +872,17 @@ extern "C" { cursor, values, filled, added, count); \ } +#define IMPL_OUT(NAME, V) \ + void NAME(void *tensor, void *dest, bool sort) { \ + assert(tensor &&dest); \ + auto coo = static_cast *>(tensor); \ + if (sort) \ + coo->sort(); \ + char *filename = static_cast(dest); \ + outSparseTensor(*coo, filename); \ + delete coo; \ + } + // Assume index_t is in fact uint64_t, so that _mlir_ciface_newSparseTensor // can safely rewrite kIndex to kU64. We make this assertion to guarantee // that this file cannot get out of sync with its header. @@ -1026,6 +1064,14 @@ IMPL_EXPINSERT(expInsertI32, int32_t) IMPL_EXPINSERT(expInsertI16, int16_t) IMPL_EXPINSERT(expInsertI8, int8_t) +/// Helper to output a sparse tensor, one per value type. +IMPL_OUT(outSparseTensorF64, double) +IMPL_OUT(outSparseTensorF32, float) +IMPL_OUT(outSparseTensorI64, int64_t) +IMPL_OUT(outSparseTensorI32, int32_t) +IMPL_OUT(outSparseTensorI16, int16_t) +IMPL_OUT(outSparseTensorI8, int8_t) + #undef CASE #undef IMPL_SPARSEVALUES #undef IMPL_GETOVERHEAD @@ -1033,6 +1079,7 @@ IMPL_EXPINSERT(expInsertI8, int8_t) #undef IMPL_GETNEXT #undef IMPL_LEXINSERT #undef IMPL_EXPINSERT +#undef IMPL_OUT //===----------------------------------------------------------------------===// // @@ -1162,6 +1209,7 @@ void convertFromMLIRSparseTensor(void *tensor, uint64_t *pRank, uint64_t *pNse, *pValues = values; *pIndices = indices; } + } // extern "C" #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir index 89ee0d5b7c816..04c8a51817813 100644 --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -468,3 +468,27 @@ func @sparse_compression(%arg0: tensor<8x8xf64, #SparseMatrix>, : tensor<8x8xf64, #SparseMatrix>, memref, memref, memref, memref, index return } + +// CHECK-LABEL: func @sparse_out1( +// CHECK-SAME: %[[A:.*]]: !llvm.ptr, +// CHECK-SAME: %[[B:.*]]: !llvm.ptr) +// CHECK-DAG: %[[C:.*]] = arith.constant false +// CHECK: %[[T:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[A]]) +// CHECK: call @outSparseTensorF64(%[[T]], %[[B]], %[[C]]) : (!llvm.ptr, !llvm.ptr, i1) -> () +// CHECK: return +func @sparse_out1(%arg0: tensor, %arg1: !llvm.ptr) { + sparse_tensor.out %arg0, %arg1 : tensor, !llvm.ptr + return +} + +// CHECK-LABEL: func @sparse_out2( +// CHECK-SAME: %[[A:.*]]: !llvm.ptr, +// CHECK-SAME: %[[B:.*]]: !llvm.ptr) +// CHECK-DAG: %[[C:.*]] = arith.constant true +// CHECK: %[[T:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[A]]) +// CHECK: call @outSparseTensorF32(%[[T]], %[[B]], %[[C]]) : (!llvm.ptr, !llvm.ptr, i1) -> () +// CHECK: return +func @sparse_out2(%arg0: tensor, %arg1: !llvm.ptr) { + sparse_tensor.out %arg0, %arg1 : tensor, !llvm.ptr + return +} diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 06d662127174c..84990221e4df4 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -204,3 +204,11 @@ func @sparse_convert_dim_mismatch(%arg0: tensor<10x?xf32>) -> tensor<10x10xf32, %0 = sparse_tensor.convert %arg0 : tensor<10x?xf32> to tensor<10x10xf32, #CSR> return %0 : tensor<10x10xf32, #CSR> } + +// ----- + +func @invalid_out_dense(%arg0: tensor<10xf64>, %arg1: !llvm.ptr) { + // expected-error@+1 {{expected a sparse tensor for output}} + sparse_tensor.out %arg0, %arg1 : tensor<10xf64>, !llvm.ptr + return +} diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index 853befc1cdef4..5457e55f57e6a 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -179,3 +179,17 @@ func @sparse_compression(%arg0: tensor<8x8xf64, #SparseMatrix>, : tensor<8x8xf64, #SparseMatrix>, memref, memref, memref, memref, index return } + +// ----- + +#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +// CHECK-LABEL: func @sparse_out( +// CHECK-SAME: %[[A:.*]]: tensor>, +// CHECK-SAME: %[[B:.*]]: !llvm.ptr) +// CHECK: sparse_tensor.out %[[A]], %[[B]] : tensor>, !llvm.ptr +// CHECK: return +func @sparse_out(%arg0: tensor, %arg1: !llvm.ptr) { + sparse_tensor.out %arg0, %arg1 : tensor, !llvm.ptr + return +} From 10d0d8c0c1db57b7ff465df7ced78a42a20d592d Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 16 Jan 2022 06:14:24 +0000 Subject: [PATCH 231/946] [clang][cmake] Use `GNUInstallDirs` to support custom installation dirs I am breaking apart D99484 so the cause of build failures is easier to understand. Differential Revision: https://reviews.llvm.org/D117419 --- clang/CMakeLists.txt | 14 +++++++++----- clang/cmake/modules/AddClang.cmake | 5 +++-- clang/cmake/modules/CMakeLists.txt | 4 +++- clang/tools/c-index-test/CMakeLists.txt | 2 +- clang/tools/clang-format/CMakeLists.txt | 12 ++++++------ clang/tools/clang-nvlink-wrapper/CMakeLists.txt | 2 +- clang/tools/clang-rename/CMakeLists.txt | 4 ++-- clang/tools/libclang/CMakeLists.txt | 2 +- clang/tools/scan-build-py/CMakeLists.txt | 6 +++--- clang/tools/scan-build/CMakeLists.txt | 6 +++--- clang/tools/scan-view/CMakeLists.txt | 4 ++-- clang/utils/hmaptool/CMakeLists.txt | 2 +- 12 files changed, 35 insertions(+), 28 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 7ea37850ad609..49150fa7c5612 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -4,7 +4,13 @@ cmake_minimum_required(VERSION 3.13.4) # standalone project, using LLVM as an external library: if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) project(Clang) + set(CLANG_BUILT_STANDALONE TRUE) +endif() + +# Must go below project(..) +include(GNUInstallDirs) +if(CLANG_BUILT_STANDALONE) set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to") set(CMAKE_CXX_STANDARD_REQUIRED YES) set(CMAKE_CXX_EXTENSIONS NO) @@ -185,8 +191,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) endif() endif() - set(CLANG_BUILT_STANDALONE TRUE) - set(BACKEND_PACKAGE_STRING "LLVM ${LLVM_PACKAGE_VERSION}") else() set(BACKEND_PACKAGE_STRING "${PACKAGE_STRING}") @@ -424,7 +428,7 @@ include_directories(BEFORE if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/clang include/clang-c - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT clang-headers FILES_MATCHING PATTERN "*.def" @@ -433,7 +437,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) ) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/clang - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" COMPONENT clang-headers FILES_MATCHING PATTERN "CMakeFiles" EXCLUDE @@ -453,7 +457,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) add_custom_target(bash-autocomplete DEPENDS utils/bash-autocomplete.sh) install(PROGRAMS utils/bash-autocomplete.sh - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT bash-autocomplete) if(NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-bash-autocomplete diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake index 5752f4277444e..9bbbfc032b7df 100644 --- a/clang/cmake/modules/AddClang.cmake +++ b/clang/cmake/modules/AddClang.cmake @@ -1,3 +1,4 @@ +include(GNUInstallDirs) include(LLVMDistributionSupport) function(clang_tablegen) @@ -120,7 +121,7 @@ macro(add_clang_library name) ${export_to_clangtargets} LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} - RUNTIME DESTINATION bin) + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${lib} @@ -159,7 +160,7 @@ macro(add_clang_tool name) get_target_export_arg(${name} Clang export_to_clangtargets) install(TARGETS ${name} ${export_to_clangtargets} - RUNTIME DESTINATION bin + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT ${name}) if(NOT LLVM_ENABLE_IDE) diff --git a/clang/cmake/modules/CMakeLists.txt b/clang/cmake/modules/CMakeLists.txt index e9cc1240dafb7..c6f6ce9fe5d69 100644 --- a/clang/cmake/modules/CMakeLists.txt +++ b/clang/cmake/modules/CMakeLists.txt @@ -1,3 +1,4 @@ +include(ExtendPath) include(LLVMDistributionSupport) include(FindPrefixFromConfig) @@ -42,8 +43,9 @@ find_prefix_from_config(CLANG_CONFIG_CODE CLANG_INSTALL_PREFIX "${CLANG_INSTALL_ set(CLANG_CONFIG_CMAKE_DIR "\${CLANG_INSTALL_PREFIX}/${CLANG_INSTALL_PACKAGE_DIR}") set(CLANG_CONFIG_LLVM_CMAKE_DIR "\${CLANG_INSTALL_PREFIX}/${LLVM_INSTALL_PACKAGE_DIR}") get_config_exports_includes(Clang CLANG_CONFIG_INCLUDE_EXPORTS) +extend_path(base_includedir "\${CLANG_INSTALL_PREFIX}" "${CMAKE_INSTALL_INCLUDEDIR}") set(CLANG_CONFIG_INCLUDE_DIRS - "\${CLANG_INSTALL_PREFIX}/include" + "${base_includedir}" ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/ClangConfig.cmake.in diff --git a/clang/tools/c-index-test/CMakeLists.txt b/clang/tools/c-index-test/CMakeLists.txt index 99c6081db2d63..0ae1b4e55244e 100644 --- a/clang/tools/c-index-test/CMakeLists.txt +++ b/clang/tools/c-index-test/CMakeLists.txt @@ -49,7 +49,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) set_property(TARGET c-index-test APPEND PROPERTY INSTALL_RPATH "@executable_path/../../lib") else() - set(INSTALL_DESTINATION bin) + set(INSTALL_DESTINATION "${CMAKE_INSTALL_BINDIR}") endif() install(TARGETS c-index-test diff --git a/clang/tools/clang-format/CMakeLists.txt b/clang/tools/clang-format/CMakeLists.txt index 35ecdb11253ce..bbdef93b576b8 100644 --- a/clang/tools/clang-format/CMakeLists.txt +++ b/clang/tools/clang-format/CMakeLists.txt @@ -21,20 +21,20 @@ if( LLVM_LIB_FUZZING_ENGINE OR LLVM_USE_SANITIZE_COVERAGE ) endif() install(PROGRAMS clang-format-bbedit.applescript - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-format) install(PROGRAMS clang-format-diff.py - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-format) install(PROGRAMS clang-format-sublime.py - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-format) install(PROGRAMS clang-format.el - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-format) install(PROGRAMS clang-format.py - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-format) install(PROGRAMS git-clang-format - DESTINATION bin + DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clang-format) diff --git a/clang/tools/clang-nvlink-wrapper/CMakeLists.txt b/clang/tools/clang-nvlink-wrapper/CMakeLists.txt index 033392f1c2bdc..2c979e5097958 100644 --- a/clang/tools/clang-nvlink-wrapper/CMakeLists.txt +++ b/clang/tools/clang-nvlink-wrapper/CMakeLists.txt @@ -22,4 +22,4 @@ target_link_libraries(clang-nvlink-wrapper ${CLANG_NVLINK_WRAPPER_LIB_DEPS} ) -install(TARGETS clang-nvlink-wrapper RUNTIME DESTINATION bin) +install(TARGETS clang-nvlink-wrapper RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") diff --git a/clang/tools/clang-rename/CMakeLists.txt b/clang/tools/clang-rename/CMakeLists.txt index cda8e29ec5b18..58da000272f6a 100644 --- a/clang/tools/clang-rename/CMakeLists.txt +++ b/clang/tools/clang-rename/CMakeLists.txt @@ -19,8 +19,8 @@ clang_target_link_libraries(clang-rename ) install(PROGRAMS clang-rename.py - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-rename) install(PROGRAMS clang-rename.el - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-rename) diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt index bf88dca0a34b1..4e0647971ab46 100644 --- a/clang/tools/libclang/CMakeLists.txt +++ b/clang/tools/libclang/CMakeLists.txt @@ -186,7 +186,7 @@ endif() if(INTERNAL_INSTALL_PREFIX) set(LIBCLANG_HEADERS_INSTALL_DESTINATION "${INTERNAL_INSTALL_PREFIX}/include") else() - set(LIBCLANG_HEADERS_INSTALL_DESTINATION include) + set(LIBCLANG_HEADERS_INSTALL_DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") endif() install(DIRECTORY ../../include/clang-c diff --git a/clang/tools/scan-build-py/CMakeLists.txt b/clang/tools/scan-build-py/CMakeLists.txt index c9f1cb7d6b2a7..061dc7ef4dd9e 100644 --- a/clang/tools/scan-build-py/CMakeLists.txt +++ b/clang/tools/scan-build-py/CMakeLists.txt @@ -43,7 +43,7 @@ foreach(BinFile ${BinFiles}) ${CMAKE_BINARY_DIR}/bin/scan-build-py DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/scan-build) install (PROGRAMS "bin/scan-build" - DESTINATION bin + DESTINATION "${CMAKE_INSTALL_BINDIR}" RENAME scan-build-py COMPONENT scan-build-py) list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/scan-build-py) @@ -56,7 +56,7 @@ foreach(BinFile ${BinFiles}) ${CMAKE_BINARY_DIR}/bin/ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) install(PROGRAMS bin/${BinFile} - DESTINATION bin + DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT scan-build-py) list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) endif() @@ -72,7 +72,7 @@ foreach(lib ${LibExecs}) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${lib}) list(APPEND Depends ${CMAKE_BINARY_DIR}/libexec/${lib}) install(PROGRAMS libexec/${lib} - DESTINATION libexec + DESTINATION "${CMAKE_INSTALL_LIBEXECDIR}" COMPONENT scan-build-py) endforeach() diff --git a/clang/tools/scan-build/CMakeLists.txt b/clang/tools/scan-build/CMakeLists.txt index 74334e53c9b18..4a578b4c6f3ed 100644 --- a/clang/tools/scan-build/CMakeLists.txt +++ b/clang/tools/scan-build/CMakeLists.txt @@ -47,7 +47,7 @@ if(CLANG_INSTALL_SCANBUILD) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) install(PROGRAMS bin/${BinFile} - DESTINATION bin + DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT scan-build) endforeach() @@ -61,7 +61,7 @@ if(CLANG_INSTALL_SCANBUILD) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${LibexecFile}) list(APPEND Depends ${CMAKE_BINARY_DIR}/libexec/${LibexecFile}) install(PROGRAMS libexec/${LibexecFile} - DESTINATION libexec + DESTINATION "${CMAKE_INSTALL_LIBEXECDIR}" COMPONENT scan-build) endforeach() @@ -89,7 +89,7 @@ if(CLANG_INSTALL_SCANBUILD) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/share/scan-build/${ShareFile}) list(APPEND Depends ${CMAKE_BINARY_DIR}/share/scan-build/${ShareFile}) install(FILES share/scan-build/${ShareFile} - DESTINATION share/scan-build + DESTINATION "${CMAKE_INSTALL_DATADIR}/scan-build" COMPONENT scan-build) endforeach() diff --git a/clang/tools/scan-view/CMakeLists.txt b/clang/tools/scan-view/CMakeLists.txt index eccc6b83195b6..07aec76ee66f5 100644 --- a/clang/tools/scan-view/CMakeLists.txt +++ b/clang/tools/scan-view/CMakeLists.txt @@ -20,7 +20,7 @@ if(CLANG_INSTALL_SCANVIEW) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) install(PROGRAMS bin/${BinFile} - DESTINATION bin + DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT scan-view) endforeach() @@ -34,7 +34,7 @@ if(CLANG_INSTALL_SCANVIEW) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/share/${ShareFile}) list(APPEND Depends ${CMAKE_BINARY_DIR}/share/scan-view/${ShareFile}) install(FILES share/${ShareFile} - DESTINATION share/scan-view + DESTINATION "${CMAKE_INSTALL_DATADIR}/scan-view" COMPONENT scan-view) endforeach() diff --git a/clang/utils/hmaptool/CMakeLists.txt b/clang/utils/hmaptool/CMakeLists.txt index 62f2de0cb15ce..f0d9866782b88 100644 --- a/clang/utils/hmaptool/CMakeLists.txt +++ b/clang/utils/hmaptool/CMakeLists.txt @@ -10,7 +10,7 @@ add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/${CLANG_HM list(APPEND Depends ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/${CLANG_HMAPTOOL}) install(PROGRAMS ${CLANG_HMAPTOOL} - DESTINATION bin + DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT hmaptool) add_custom_target(hmaptool ALL DEPENDS ${Depends}) From 1613f8b8d7d5fc155e1a0ce2f88c4be6b115936e Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Fri, 21 Jan 2022 16:22:29 -0800 Subject: [PATCH 232/946] NFC (build fix): Add header for llvm::errs(). Looks like e9211e039377 unfortunately broke the sanitizer build bots, because those bots compile the symbolizer with DLLVM_ENABLE_THREADS=Off. Likely, before the patch, this header was transitively included. --- llvm/lib/Support/ThreadPool.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index bf2584950c4ac..6eec368e626ff 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -14,6 +14,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; From 08574ce4d6254edf857cd374b54eca71dd2710f6 Mon Sep 17 00:00:00 2001 From: not-jenni Date: Fri, 21 Jan 2022 16:16:29 -0800 Subject: [PATCH 233/946] [mlir][tosa] Add clamp + clamp as single clamp canonicalization When 2 clamp ops are in a row, they can be canonicalized into a single clamp that uses the most constrained range Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D117934 --- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 31 ++++++++++++++++++++++++ mlir/test/Dialect/Tosa/canonicalize.mlir | 10 ++++++++ 2 files changed, 41 insertions(+) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index f93e5b2052b9d..af8fa306d7651 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -526,9 +526,40 @@ struct ClampIsNoOp : public OpRewritePattern { } }; +struct ClampClampOptimization : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::ClampOp op, + PatternRewriter &rewriter) const override { + Value input = op.input(); + + Operation *definingOp = input.getDefiningOp(); + if (!definingOp) + return failure(); + + if (tosa::ClampOp clampOp = dyn_cast(definingOp)) { + auto min_fp = std::max(op.min_fp(), clampOp.min_fp()).convertToFloat(); + auto max_fp = std::min(op.max_fp(), clampOp.max_fp()).convertToFloat(); + + auto min_int = std::max(op.min_int(), clampOp.min_int()); + auto max_int = std::min(op.max_int(), clampOp.max_int()); + + rewriter.replaceOpWithNewOp( + op, op.getType(), clampOp.input(), + rewriter.getI64IntegerAttr(min_int), + rewriter.getI64IntegerAttr(max_int), rewriter.getF32FloatAttr(min_fp), + rewriter.getF32FloatAttr(max_fp)); + return success(); + } + + return failure(); + } +}; + void ClampOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); + results.insert(context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index c1e828cc7fca4..41303eebc0693 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -98,6 +98,16 @@ func @clamp_uint8_is_noop(%arg0: tensor<4xui8>) -> tensor<4xui8> { // ----- +// CHECK-LABEL: @clamp_twice_is_single_clamp +func @clamp_twice_is_single_clamp(%arg0: tensor<4xi8>) -> tensor<4xi8> { + // CHECK: "tosa.clamp"(%arg0) {max_fp = 3.000000e+00 : f32, max_int = 2 : i64, min_fp = -3.000000e+00 : f32, min_int = -2 : i64} + %0 = "tosa.clamp"(%arg0) {max_fp = 3.0 : f32, max_int = 4 : i64, min_fp = -5.0 : f32, min_int = -2 : i64} : (tensor<4xi8>) -> tensor<4xi8> + %1 = "tosa.clamp"(%0) {max_fp = 5.0 : f32, max_int = 2 : i64, min_fp = -3.0 : f32, min_int = -4 : i64} : (tensor<4xi8>) -> tensor<4xi8> + return %1 : tensor<4xi8> +} + +// ----- + // CHECK-LABEL: @concat_fold func @concat_fold(%arg0: tensor) -> tensor { // CHECK: return %arg0 From 13fa17db3a720d149bcd0783856347a4f09cf634 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Fri, 21 Jan 2022 10:47:15 -0600 Subject: [PATCH 234/946] [split-file] Respect input file's line endings This change adds support for split-file to respect the line ending style of the input file. This enables split-file to work as expected on Windows with input files containing CRLF line endings. The test files added along with this change mirror the existing basic tests, but are forced to contain CRLF line endings via git attributes. This will result in the tests always containing CRLF line endings when checked out regardless of the user's OS. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D117897 --- llvm/.gitattributes | 2 ++ llvm/test/tools/split-file/Inputs/basic-aa.crlf | 2 ++ llvm/test/tools/split-file/Inputs/basic-bb.crlf | 4 ++++ llvm/test/tools/split-file/basic.crlf.test | 10 ++++++++++ llvm/tools/split-file/split-file.cpp | 3 ++- 5 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/split-file/Inputs/basic-aa.crlf create mode 100644 llvm/test/tools/split-file/Inputs/basic-bb.crlf create mode 100644 llvm/test/tools/split-file/basic.crlf.test diff --git a/llvm/.gitattributes b/llvm/.gitattributes index 710defda24b62..b07b652eee02e 100644 --- a/llvm/.gitattributes +++ b/llvm/.gitattributes @@ -25,3 +25,5 @@ test/tools/llvm-mca/X86/directives-handle-crlf.s text eol=crlf test/tools/llvm-strings/radix.test text eol=lf test/tools/split-file/basic.test text eol=lf test/tools/split-file/Inputs/basic-*.txt eol=lf +test/tools/split-file/basic.crlf.test text eol=crlf +test/tools/split-file/Inputs/basic-*.crlf eol=crlf diff --git a/llvm/test/tools/split-file/Inputs/basic-aa.crlf b/llvm/test/tools/split-file/Inputs/basic-aa.crlf new file mode 100644 index 0000000000000..0b9ddeb2fc12a --- /dev/null +++ b/llvm/test/tools/split-file/Inputs/basic-aa.crlf @@ -0,0 +1,2 @@ + +aa diff --git a/llvm/test/tools/split-file/Inputs/basic-bb.crlf b/llvm/test/tools/split-file/Inputs/basic-bb.crlf new file mode 100644 index 0000000000000..b6c3c808ec62f --- /dev/null +++ b/llvm/test/tools/split-file/Inputs/basic-bb.crlf @@ -0,0 +1,4 @@ + + + +bb diff --git a/llvm/test/tools/split-file/basic.crlf.test b/llvm/test/tools/split-file/basic.crlf.test new file mode 100644 index 0000000000000..f01074a879630 --- /dev/null +++ b/llvm/test/tools/split-file/basic.crlf.test @@ -0,0 +1,10 @@ +#--- aa +aa +;--- bb +bb +;--- end + +# RUN: rm -rf %t +# RUN: split-file --leading-lines %s %t +# RUN: diff %S/Inputs/basic-aa.crlf %t/aa +# RUN: diff %S/Inputs/basic-bb.crlf %t/bb diff --git a/llvm/tools/split-file/split-file.cpp b/llvm/tools/split-file/split-file.cpp index bde7d21a51e9a..4a92c1be78a2b 100644 --- a/llvm/tools/split-file/split-file.cpp +++ b/llvm/tools/split-file/split-file.cpp @@ -71,6 +71,7 @@ struct Part { static int handle(MemoryBuffer &inputBuf, StringRef input) { DenseMap partToBegin; StringRef lastPart, separator; + StringRef EOL = inputBuf.getBuffer().detectEOL(); for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) { const int64_t lineNo = i.line_number(); const StringRef line = *i++; @@ -128,7 +129,7 @@ static int handle(MemoryBuffer &inputBuf, StringRef input) { Part &part = keyValue.second; for (int64_t i = 0; i != part.leadingLines; ++i) - (*f).os().write('\n'); + (*f).os() << EOL; if (part.begin) (*f).os().write(part.begin, part.end - part.begin); outputFiles.push_back(std::move(f)); From 4f547ee8b8a7d3e298c6cd95e58dd8916883e2d5 Mon Sep 17 00:00:00 2001 From: Joe Loser Date: Fri, 21 Jan 2022 15:01:34 -0500 Subject: [PATCH 235/946] [libc++][test] Add const and reference tests for enable_view. NFC. As discussed in https://reviews.llvm.org/D117714, there is missing test coverage for the behavior of `enable_view` when given a const or reference qualified type. Add such tests showing the current behavior. Differential Revision: https://reviews.llvm.org/D117918 --- .../range.view/enable_view.compile.pass.cpp | 65 +++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp index 19ea867f3773a..7a12ccfc51e7f 100644 --- a/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp @@ -22,27 +22,84 @@ // Doesn't derive from view_base struct Empty { }; static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); // Derives from view_base, but privately struct PrivateViewBase : private std::ranges::view_base { }; static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); // Derives from view_base, but specializes enable_view to false struct EnableViewFalse : std::ranges::view_base { }; -namespace std::ranges { template <> constexpr bool enable_view = false; } +template <> constexpr bool std::ranges::enable_view = false; static_assert(!std::ranges::enable_view); - +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); // Derives from view_base struct PublicViewBase : std::ranges::view_base { }; static_assert(std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); // Does not derive from view_base, but specializes enable_view to true struct EnableViewTrue { }; -namespace std::ranges { template <> constexpr bool enable_view = true; } +template <> constexpr bool std::ranges::enable_view = true; static_assert(std::ranges::enable_view); - +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); // Make sure that enable_view is a bool, not some other contextually-convertible-to-bool type. ASSERT_SAME_TYPE(decltype(std::ranges::enable_view), const bool); ASSERT_SAME_TYPE(decltype(std::ranges::enable_view), const bool); + +struct V1 : std::ranges::view_interface {}; +static_assert(std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); + +struct V2 : std::ranges::view_interface, std::ranges::view_interface {}; +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); + +struct V3 : std::ranges::view_interface {}; +static_assert(std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(std::ranges::enable_view); +static_assert(!std::ranges::enable_view); +static_assert(!std::ranges::enable_view); + +struct PrivateInherit : private std::ranges::view_interface {}; +static_assert(!std::ranges::enable_view); + +// ADL-proof +struct Incomplete; +template struct Holder { T t; }; +static_assert(!std::ranges::enable_view*>); + +static_assert(!std::ranges::enable_view); From 9cddfe3085c4c500e64350b56c37ae2ed1cbe3f6 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Mon, 10 Jan 2022 14:51:37 -0800 Subject: [PATCH 236/946] [CMake] Passthrough OSX CMake options to builtins and runtimes When using the default target, there's no other way to pass these into the builtins and runtimes subbuilds. Differential Revision: https://reviews.llvm.org/D116976 --- llvm/runtimes/CMakeLists.txt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index ba78f466ab3a2..05567b5234eea 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -90,7 +90,10 @@ function(builtin_default_target compiler_rt_path) -DCMAKE_ASM_COMPILER_WORKS=ON ${COMMON_CMAKE_ARGS} ${BUILTINS_CMAKE_ARGS} - PASSTHROUGH_PREFIXES COMPILER_RT + PASSTHROUGH_PREFIXES CMAKE_OSX + COMPILER_RT + DARWIN + SANITIZER USE_TOOLCHAIN TARGET_TRIPLE ${TARGET_TRIPLE} ${EXTRA_ARGS}) @@ -181,10 +184,10 @@ foreach(entry ${runtimes}) if (${canon_name} STREQUAL "OPENMP") list(APPEND prefixes "LIBOMP" "LIBOMPTARGET") endif() - # Many compiler-rt options start with SANITIZER_ rather than COMPILER_RT_, - # so when compiler-rt is enabled, consider both. + # Many compiler-rt options start with SANITIZER_ and DARWIN_ rather than + # COMPILER_RT_, so when compiler-rt is enabled, consider both. if(canon_name STREQUAL "COMPILER_RT") - list(APPEND prefixes SANITIZER) + list(APPEND prefixes SANITIZER DARWIN) endif() string(FIND ${projName} "lib" LIB_IDX) @@ -241,7 +244,8 @@ function(runtime_default_target) -DCMAKE_ASM_COMPILER_WORKS=ON ${COMMON_CMAKE_ARGS} ${RUNTIMES_CMAKE_ARGS} - PASSTHROUGH_PREFIXES LLVM_ENABLE_RUNTIMES + PASSTHROUGH_PREFIXES CMAKE_OSX + LLVM_ENABLE_RUNTIMES LLVM_USE_LINKER ${ARG_PREFIXES} EXTRA_TARGETS ${extra_targets} From e6cdef187ed37468c14c53723c58cbde3e1341db Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 21 Jan 2022 17:00:03 -0800 Subject: [PATCH 237/946] [XRay][test] Clean up llc RUN lines --- .../CodeGen/AArch64/xray-attribute-instrumentation.ll | 2 +- llvm/test/CodeGen/AArch64/xray-omit-function-index.ll | 4 ++-- .../AArch64/xray-partial-instrumentation-skip-entry.ll | 2 +- .../AArch64/xray-partial-instrumentation-skip-exit.ll | 2 +- llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll | 2 +- .../CodeGen/ARM/xray-armv6-attribute-instrumentation.ll | 4 ++-- .../CodeGen/ARM/xray-armv7-attribute-instrumentation.ll | 4 ++-- llvm/test/CodeGen/ARM/xray-tail-call-sled.ll | 4 ++-- llvm/test/CodeGen/Hexagon/xray-pred-ret.ll | 2 +- llvm/test/CodeGen/Hexagon/xray.ll | 4 ++-- .../CodeGen/Mips/xray-mips-attribute-instrumentation.ll | 8 ++++---- llvm/test/CodeGen/Mips/xray-section-group.ll | 8 ++++---- .../CodeGen/PowerPC/xray-attribute-instrumentation.ll | 5 ++--- llvm/test/CodeGen/PowerPC/xray-conditional-return.ll | 2 +- llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll | 2 +- llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll | 2 +- llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll | 2 +- llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll | 7 +++---- llvm/test/CodeGen/X86/xray-custom-log.ll | 4 ++-- llvm/test/CodeGen/X86/xray-empty-firstmbb.mir | 2 +- llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll | 4 ++-- llvm/test/CodeGen/X86/xray-log-args.ll | 4 ++-- llvm/test/CodeGen/X86/xray-loop-detection.ll | 4 ++-- llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir | 2 +- .../X86/xray-partial-instrumentation-skip-entry.ll | 7 +++---- .../CodeGen/X86/xray-partial-instrumentation-skip-exit.ll | 6 +++--- llvm/test/CodeGen/X86/xray-section-group.ll | 6 +++--- llvm/test/CodeGen/X86/xray-selective-instrumentation.ll | 2 +- llvm/test/CodeGen/X86/xray-tail-call-sled.ll | 4 ++-- 29 files changed, 54 insertions(+), 57 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll index b14463ed32a89..5ca170ac0a2b9 100644 --- a/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK-LABEL: foo: diff --git a/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll b/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll index 385298387b6b4..4b2e6b72c02f2 100644 --- a/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll +++ b/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -no-xray-index -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -no-xray-index -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK-LABEL: Lxray_sled_0: @@ -30,4 +30,4 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK: .xword .Lxray_sled_1 ; CHECK-LABEL: Lxray_sleds_end0 -; CHECK-NOT: xray_fn_idx \ No newline at end of file +; CHECK-NOT: xray_fn_idx diff --git a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll index 43e1dfd51740c..a28d780bf4975 100644 --- a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll +++ b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-entry" { ; CHECK-NOT: Lxray_sled_0: diff --git a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll index 4a74e9d19b4c3..ecfa6ac29f62b 100644 --- a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll +++ b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-exit" { ; CHECK-LABEL: Lxray_sled_0: diff --git a/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll b/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll index b6f7a4edbed5b..b4a541bca3284 100644 --- a/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll +++ b/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -o - -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .p2align 2 diff --git a/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll b/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll index 3cec7cd699ad9..53bc8d62fd833 100644 --- a/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=asm -o - -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -filetype=asm -o - -mtriple=armv6-apple-ios6.0.0 < %s | FileCheck %s +; RUN: llc -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=armv6-apple-ios6.0.0 < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK-LABEL: Lxray_sled_0: diff --git a/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll b/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll index e10746d33001f..98dbabcb6aba6 100644 --- a/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -filetype=asm -o - -mtriple=armv7-apple-ios6.0.0 < %s | FileCheck %s +; RUN: llc -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=armv7-apple-ios6.0.0 < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK-LABEL: Lxray_sled_0: diff --git a/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll b/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll index 2d3af5595f13f..93b9e2f3387a1 100644 --- a/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll +++ b/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -filetype=asm -o - -mtriple=armv7-apple-ios6.0.0 < %s | FileCheck %s +; RUN: llc -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=armv7-apple-ios6.0.0 < %s | FileCheck %s define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .p2align 2 diff --git a/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll b/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll index c7d5333059253..306a00fc298e0 100644 --- a/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll +++ b/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -o - -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s +; RUN: llc -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s define void @Foo(i32 signext %a, i32 signext %b) #0 { ; CHECK-LABEL: @Foo diff --git a/llvm/test/CodeGen/Hexagon/xray.ll b/llvm/test/CodeGen/Hexagon/xray.ll index ba5913a12de39..b9b25b80ef00f 100644 --- a/llvm/test/CodeGen/Hexagon/xray.ll +++ b/llvm/test/CodeGen/Hexagon/xray.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=asm -o - -mtriple=hexagon-unknown-elf < %s | FileCheck %s -; RUN: llc -filetype=asm -o - -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s +; RUN: llc -mtriple=hexagon-unknown-elf < %s | FileCheck %s +; RUN: llc -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK-LABEL: .Lxray_sled_0: diff --git a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll index 2b28fae57dbf5..ae542146a7997 100644 --- a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll @@ -1,7 +1,7 @@ -; RUN: llc -filetype=asm -o - -mtriple=mips-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s -; RUN: llc -filetype=asm -o - -mtriple=mipsel-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s -; RUN: llc -filetype=asm -o - -mtriple=mips64-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s -; RUN: llc -filetype=asm -o - -mtriple=mips64el-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s +; RUN: llc -mtriple=mips-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s +; RUN: llc -mtriple=mipsel-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s +; RUN: llc -mtriple=mips64-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s +; RUN: llc -mtriple=mips64el-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .p2align 2 diff --git a/llvm/test/CodeGen/Mips/xray-section-group.ll b/llvm/test/CodeGen/Mips/xray-section-group.ll index 218516dd189ca..5a208217092dd 100644 --- a/llvm/test/CodeGen/Mips/xray-section-group.ll +++ b/llvm/test/CodeGen/Mips/xray-section-group.ll @@ -1,11 +1,11 @@ -; RUN: llc -filetype=asm -o - -mtriple=mips-unknown-linux-gnu -function-sections < %s | FileCheck %s -; RUN: llc -filetype=asm -o - -mtriple=mipsel-unknown-linux-gnu -function-sections < %s | FileCheck %s +; RUN: llc -mtriple=mips-unknown-linux-gnu -function-sections < %s | FileCheck %s +; RUN: llc -mtriple=mipsel-unknown-linux-gnu -function-sections < %s | FileCheck %s ; RUN: llc -filetype=obj -o %t -mtriple=mips-unknown-linux-gnu -function-sections < %s ; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=CHECK-OBJ ; RUN: llc -filetype=obj -o %t -mtriple=mipsel-unknown-linux-gnu -function-sections < %s ; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=CHECK-OBJ -; RUN: llc -filetype=asm -o - -mtriple=mips64-unknown-linux-gnu -function-sections < %s | FileCheck %s -; RUN: llc -filetype=asm -o - -mtriple=mips64el-unknown-linux-gnu -function-sections < %s | FileCheck %s +; RUN: llc -mtriple=mips64-unknown-linux-gnu -function-sections < %s | FileCheck %s +; RUN: llc -mtriple=mips64el-unknown-linux-gnu -function-sections < %s | FileCheck %s ; RUN: llc -filetype=obj -o %t -mtriple=mips64-unknown-linux-gnu -function-sections < %s ; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=CHECK-OBJ ; RUN: llc -filetype=obj -o %t -mtriple=mips64el-unknown-linux-gnu -function-sections < %s diff --git a/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll index f73679001158d..fcebe37753127 100644 --- a/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll @@ -1,6 +1,5 @@ -; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK-LABEL: foo: diff --git a/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll b/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll index c0e8c1c3c20ca..5851e92f78938 100644 --- a/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll +++ b/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s define void @Foo(i32 signext %a, i32 signext %b) #0 { ; CHECK-LABEL: @Foo diff --git a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll index 1f176f6f36676..3f15d46290a1e 100644 --- a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll +++ b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s define void @ILLBeBack() #0 { ; CHECK-LABEL: @ILLBeBack diff --git a/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll b/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll index d427dbb4238e1..949b2837fd5c2 100644 --- a/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll +++ b/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s declare hidden i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" diff --git a/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll b/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll index e071e8ae40133..186ec53f88a95 100644 --- a/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll +++ b/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=asm -relocation-model=pic -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -relocation-model=pic -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK-LABEL: .Ltmp0: diff --git a/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll index 8d1d2bc77c646..585c28ea12549 100644 --- a/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll @@ -1,7 +1,6 @@ -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - \ -; RUN: -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .p2align 1, 0x90 diff --git a/llvm/test/CodeGen/X86/xray-custom-log.ll b/llvm/test/CodeGen/X86/xray-custom-log.ll index f0d882ddbac67..1579e4b909de7 100644 --- a/llvm/test/CodeGen/X86/xray-custom-log.ll +++ b/llvm/test/CodeGen/X86/xray-custom-log.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -mtriple=x86_64 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=x86_64 -relocation-model=pic < %s | FileCheck %s --check-prefix=PIC +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64 -relocation-model=pic < %s | FileCheck %s --check-prefix=PIC ; RUN: llc -mtriple=x86_64 -filetype=obj %s -o %t ; RUN: llvm-dwarfdump %t | FileCheck %s --check-prefix=DBG diff --git a/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir b/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir index e87c86591ce8d..df5dc7b28ec1a 100644 --- a/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir +++ b/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir @@ -1,4 +1,4 @@ -# RUN: llc -run-pass xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s +# RUN: llc -run-pass=xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s # # Make sure we can handle empty first basic blocks. diff --git a/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll b/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll index 2450d991e3aa2..29c9bea7509c8 100644 --- a/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll +++ b/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define i32 @foo(i32 %i) nounwind noinline uwtable "xray-instruction-threshold"="10" "xray-ignore-loops" { entry: diff --git a/llvm/test/CodeGen/X86/xray-log-args.ll b/llvm/test/CodeGen/X86/xray-log-args.ll index 812e04a483fb7..1aac51f42c75a 100644 --- a/llvm/test/CodeGen/X86/xray-log-args.ll +++ b/llvm/test/CodeGen/X86/xray-log-args.ll @@ -1,7 +1,7 @@ ; When logging arguments is specified, emit the entry sled accordingly. -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define i32 @callee(i32 %arg) nounwind noinline uwtable "function-instrument"="xray-always" "xray-log-args"="1" { ret i32 %arg diff --git a/llvm/test/CodeGen/X86/xray-loop-detection.ll b/llvm/test/CodeGen/X86/xray-loop-detection.ll index 4acb22983be3d..81450da7408ef 100644 --- a/llvm/test/CodeGen/X86/xray-loop-detection.ll +++ b/llvm/test/CodeGen/X86/xray-loop-detection.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define i32 @foo(i32 %i) nounwind noinline uwtable "xray-instruction-threshold"="10" { entry: diff --git a/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir b/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir index 69e8c6bfda4be..60a33b95f1412 100644 --- a/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir +++ b/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir @@ -1,4 +1,4 @@ -# RUN: llc -verify-machineinstrs -run-pass xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s +# RUN: llc -run-pass=xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s # # Make sure we can handle multiple ret instructions in a single basic block for # XRay. diff --git a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll index e0beb2e9082d1..83c254a0d8877 100644 --- a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll +++ b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll @@ -1,7 +1,6 @@ -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - \ -; RUN: -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-entry" { ; CHECK-NOT: Lxray_sled_0: diff --git a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll index 9c370e541219d..a7afad5980196 100644 --- a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll +++ b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll @@ -1,7 +1,7 @@ -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - \ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc \ ; RUN: -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-exit" { ; CHECK: .p2align 1, 0x90 diff --git a/llvm/test/CodeGen/X86/xray-section-group.ll b/llvm/test/CodeGen/X86/xray-section-group.ll index 9bfe82d400c29..c05520adf8997 100644 --- a/llvm/test/CodeGen/X86/xray-section-group.ll +++ b/llvm/test/CodeGen/X86/xray-section-group.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu -function-sections < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=obj -o %t -mtriple=x86_64-unknown-linux-gnu -function-sections < %s -; RUN: llvm-objdump --triple=x86_64-unknown-linux-gnu --disassemble-all %t | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -function-sections < %s | FileCheck %s +; RUN: llc -filetype=obj -o %t -mtriple=x86_64-unknown-linux-gnu -function-sections < %s +; RUN: llvm-objdump --disassemble-all %t | FileCheck %s --check-prefix=CHECK-OBJ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .section .text.foo,"ax",@progbits diff --git a/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll b/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll index 8e3e0be412bcb..7bf47ea2894c0 100644 --- a/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll +++ b/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mcpu=nehalem | FileCheck %s +; RUN: llc < %s -mcpu=nehalem | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" diff --git a/llvm/test/CodeGen/X86/xray-tail-call-sled.ll b/llvm/test/CodeGen/X86/xray-tail-call-sled.ll index d109cf1c3dea0..b89f833abb3b8 100644 --- a/llvm/test/CodeGen/X86/xray-tail-call-sled.ll +++ b/llvm/test/CodeGen/X86/xray-tail-call-sled.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s define dso_local i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .p2align 1, 0x90 From 04eb93b1d559b40ffd6e8f3146cfb2ade6bb49d0 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 19 Jan 2022 09:16:07 -0800 Subject: [PATCH 238/946] [flang] Fix repeated "DT" editing User-defined derived type editing in formatted I/O wasn't working with repeat counts; e.g., "2DT(10)". The solution required some code to be moved from GetNextDataEdit() to CueUpNextDataEdit() so that a stack entry for a nonparenthesized repeated data edit descriptor would work correctly -- all other data edit descriptors are capable of dealing with repetition in their callees, so the bug hadn't been exposed before. Debugging this problem led to some improvements in error messages for bad format strings, and those changes have been retained; also, a dead member function was discovered and expunged. Differential Revision: https://reviews.llvm.org/D117904 --- flang/runtime/descriptor-io.cpp | 2 +- flang/runtime/format-implementation.h | 135 ++++++++------------------ flang/runtime/format.h | 14 ++- flang/runtime/io-stmt.h | 5 +- 4 files changed, 55 insertions(+), 101 deletions(-) diff --git a/flang/runtime/descriptor-io.cpp b/flang/runtime/descriptor-io.cpp index 20828a6d9a84e..d34ac68c8a533 100644 --- a/flang/runtime/descriptor-io.cpp +++ b/flang/runtime/descriptor-io.cpp @@ -19,7 +19,7 @@ std::optional DefinedFormattedIo(IoStatementState &io, peek->descriptor == DataEdit::ListDirected)) { // User-defined derived type formatting IoErrorHandler &handler{io.GetIoErrorHandler()}; - DataEdit edit{*io.GetNextDataEdit()}; // consume it this time + DataEdit edit{*io.GetNextDataEdit(1)}; // now consume it; no repeats RUNTIME_CHECK(handler, edit.descriptor == peek->descriptor); char ioType[2 + edit.maxIoTypeChars]; auto ioTypeLen{std::size_t{2} /*"DT"*/ + edit.ioTypeChars}; diff --git a/flang/runtime/format-implementation.h b/flang/runtime/format-implementation.h index a32bb8e928fd7..b9c1b8427afe3 100644 --- a/flang/runtime/format-implementation.h +++ b/flang/runtime/format-implementation.h @@ -33,59 +33,6 @@ FormatControl::FormatControl(const Terminator &terminator, stack_[0].remaining = Iteration::unlimited; // 13.4(8) } -template -int FormatControl::GetMaxParenthesisNesting( - IoErrorHandler &handler, const CharType *format, std::size_t formatLength) { - int maxNesting{0}; - int nesting{0}; - const CharType *end{format + formatLength}; - std::optional quote; - int repeat{0}; - for (const CharType *p{format}; p < end; ++p) { - if (quote) { - if (*p == *quote) { - quote.reset(); - } - } else if (*p >= '0' && *p <= '9') { - repeat = 10 * repeat + *p - '0'; - } else if (*p != ' ') { - switch (*p) { - case '\'': - case '"': - quote = *p; - break; - case 'h': - case 'H': // 9HHOLLERITH - p += repeat; - if (p >= end) { - handler.SignalError(IostatErrorInFormat, - "Hollerith (%dH) too long in FORMAT", repeat); - return maxNesting; - } - break; - case ' ': - break; - case '(': - ++nesting; - maxNesting = std::max(nesting, maxNesting); - break; - case ')': - nesting = std::max(nesting - 1, 0); - break; - } - repeat = 0; - } - } - if (quote) { - handler.SignalError( - IostatErrorInFormat, "Unbalanced quotation marks in FORMAT string"); - } else if (nesting) { - handler.SignalError( - IostatErrorInFormat, "Unbalanced parentheses in FORMAT string"); - } - return maxNesting; -} - template int FormatControl::GetIntField( IoErrorHandler &handler, CharType firstCh) { @@ -98,7 +45,11 @@ int FormatControl::GetIntField( int result{0}; bool negate{ch == '-'}; if (negate || ch == '+') { - firstCh = '\0'; + if (firstCh) { + firstCh = '\0'; + } else { + ++offset_; + } ch = PeekNext(); } while (ch >= '0' && ch <= '9') { @@ -222,6 +173,15 @@ static void HandleControl(CONTEXT &context, char ch, char next, int n) { template int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { int unlimitedLoopCheck{-1}; + // Do repetitions remain on an unparenthesized data edit? + while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') { + offset_ = stack_[height_ - 1].start; + int repeat{stack_[height_ - 1].remaining}; + --height_; + if (repeat > 0) { + return repeat; + } + } while (true) { std::optional repeat; bool unlimited{false}; @@ -242,16 +202,18 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { unlimited = true; ch = GetNextChar(context); if (ch != '(') { - context.SignalError(IostatErrorInFormat, - "Invalid FORMAT: '*' may appear only before '('"); + ReportBadFormat(context, + "Invalid FORMAT: '*' may appear only before '('", + maybeReversionPoint); return 0; } } ch = Capitalize(ch); if (ch == '(') { if (height_ >= maxHeight_) { - context.SignalError(IostatErrorInFormat, - "FORMAT stack overflow: too many nested parentheses"); + ReportBadFormat(context, + "FORMAT stack overflow: too many nested parentheses", + maybeReversionPoint); return 0; } stack_[height_].start = offset_ - 1; // the '(' @@ -271,11 +233,11 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { // Subtle point (F'2018 13.4 para 9): tha last parenthesized group // at height 1 becomes the restart point after control reaches the // end of the format, including its repeat count. - stack_[0].start = maybeReversionPoint - 1; + stack_[0].start = maybeReversionPoint; } ++height_; } else if (height_ == 0) { - context.SignalError(IostatErrorInFormat, "FORMAT lacks initial '('"); + ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint); return 0; } else if (ch == ')') { if (height_ == 1) { @@ -284,12 +246,16 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { } context.AdvanceRecord(); // implied / before rightmost ) } - auto restart{stack_[height_ - 1].start + 1}; + auto restart{stack_[height_ - 1].start}; + if (format_[restart] == '(') { + ++restart; + } if (stack_[height_ - 1].remaining == Iteration::unlimited) { offset_ = restart; if (offset_ == unlimitedLoopCheck) { - context.SignalError(IostatErrorInFormat, - "Unlimited repetition in FORMAT lacks data edit descriptors"); + ReportBadFormat(context, + "Unlimited repetition in FORMAT lacks data edit descriptors", + restart); } } else if (stack_[height_ - 1].remaining-- > 0) { offset_ = restart; @@ -304,8 +270,9 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { ++offset_; } if (offset_ >= formatLength_) { - context.SignalError(IostatErrorInFormat, - "FORMAT missing closing quote on character literal"); + ReportBadFormat(context, + "FORMAT missing closing quote on character literal", + maybeReversionPoint); return 0; } ++offset_; @@ -322,8 +289,8 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { } else if (ch == 'H') { // 9HHOLLERITH if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) { - context.SignalError( - IostatErrorInFormat, "Invalid width on Hollerith in FORMAT"); + ReportBadFormat(context, "Invalid width on Hollerith in FORMAT", + maybeReversionPoint); return 0; } context.Emit(format_ + offset_, static_cast(*repeat)); @@ -364,8 +331,8 @@ int FormatControl::CueUpNextDataEdit(Context &context, bool stop) { // TODO: any other raw characters? context.Emit(format_ + offset_ - 1, 1); } else { - context.SignalError(IostatErrorInFormat, - "Invalid character '%c' in FORMAT", static_cast(ch)); + ReportBadFormat( + context, "Invalid character in FORMAT", maybeReversionPoint); return 0; } } @@ -410,11 +377,9 @@ DataEdit FormatControl::GetNextDataEdit( } } if (!ok) { - context.SignalError( - IostatErrorInFormat, "Unclosed DT'iotype' in FORMAT"); + ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start); } else if (tooLong) { - context.SignalError( - IostatErrorInFormat, "Excessive DT'iotype' in FORMAT"); + ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start); } } if (PeekNext() == '(') { @@ -434,11 +399,9 @@ DataEdit FormatControl::GetNextDataEdit( } } if (!ok) { - context.SignalError( - IostatErrorInFormat, "Unclosed DT(v_list) in FORMAT"); + ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start); } else if (tooLong) { - context.SignalError( - IostatErrorInFormat, "Excessive DT(v_list) in FORMAT"); + ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start); } } } @@ -460,27 +423,13 @@ DataEdit FormatControl::GetNextDataEdit( } } edit.modes = context.mutableModes(); - // Handle repeated nonparenthesized edit descriptors + edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0 if (repeat > maxRepeat) { stack_[height_].start = start; // after repeat count - stack_[height_].remaining = repeat; // full count + stack_[height_].remaining = repeat - edit.repeat; ++height_; } - edit.repeat = std::min(1, maxRepeat); // 0 if maxRepeat==0 - if (height_ > 1) { // Subtle: stack_[0].start doesn't necessarily point to '(' - int start{stack_[height_ - 1].start}; - if (format_[start] != '(') { - if (stack_[height_ - 1].remaining > maxRepeat) { - edit.repeat = maxRepeat; - stack_[height_ - 1].remaining -= maxRepeat; - offset_ = start; // repeat same edit descriptor next time - } else { - edit.repeat = stack_[height_ - 1].remaining; - --height_; - } - } - } return edit; } diff --git a/flang/runtime/format.h b/flang/runtime/format.h index 8dccaab969a6a..98c1136b0f519 100644 --- a/flang/runtime/format.h +++ b/flang/runtime/format.h @@ -86,11 +86,6 @@ template class FormatControl { FormatControl(const Terminator &, const CharType *format, std::size_t formatLength, int maxHeight = maxMaxHeight); - // Determines the max parenthesis nesting level by scanning and validating - // the FORMAT string. - static int GetMaxParenthesisNesting( - IoErrorHandler &, const CharType *format, std::size_t formatLength); - // For attempting to allocate in a user-supplied stack area static std::size_t GetNeededSize(int maxHeight) { return sizeof(FormatControl) - @@ -146,6 +141,15 @@ template class FormatControl { return ch >= 'a' && ch <= 'z' ? ch + 'A' - 'a' : ch; } + void ReportBadFormat(Context &context, const char *msg, int offset) const { + if constexpr (std::is_same_v) { + context.SignalError(IostatErrorInFormat, + "%s; at offset %d in format '%s'", msg, offset, format_); + } else { + context.SignalError(IostatErrorInFormat, "%s; at offset %d", msg, offset); + } + } + // Data members are arranged and typed so as to reduce size. // This structure may be allocated in stack space loaned by the // user program for internal I/O. diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 8327326c7f9ef..baf038dbdd245 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -87,7 +87,7 @@ class IoStatementState { void BackspaceRecord(); void HandleRelativePosition(std::int64_t); void HandleAbsolutePosition(std::int64_t); // for r* in list I/O - std::optional GetNextDataEdit(int = 1); + std::optional GetNextDataEdit(int maxRepeat = 1); ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit bool BeginReadingRecord(); void FinishReadingRecord(); @@ -287,7 +287,8 @@ struct IoStatementBase : public IoErrorHandler { void BackspaceRecord(); void HandleRelativePosition(std::int64_t); void HandleAbsolutePosition(std::int64_t); - std::optional GetNextDataEdit(IoStatementState &, int = 1); + std::optional GetNextDataEdit( + IoStatementState &, int maxRepeat = 1); ExternalFileUnit *GetExternalFileUnit() const; bool BeginReadingRecord(); void FinishReadingRecord(); From db07e082abafb1494ba674047645e6113316673c Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Fri, 21 Jan 2022 16:58:06 -0800 Subject: [PATCH 239/946] [TSan] Omit vfork interceptor iOS simulator runtime `_vfork` moved from libsystem_kernel.dylib to libsystem_c.dylib as part of the below changes. The iOS simulator does not actually have libsystem_kernel.dylib of its own, it only has the host Mac's. The umbrella-nature of Libsystem makes this movement transparent to everyone; except the simulator! So when we "back deploy", i.e., use the current version of TSan with an older simulator runtime then this symbol is now missing, when we run on the latest OS (but an older simulator runtime). Note we use `SANITIZER_IOS` because usage of vfork is forbidden on iOS and the API is completely unavailable on watchOS and tvOS, even if this problem is specific to the iOS simulator. Caused by: rdar://74818691 (Shim vfork() to fork syscall on iOS) rdar://76762076 (Shim vfork() to fork syscall on macOS) Radar-Id: rdar://8634734 --- compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index c4f43d8171abb..056bd15e0907a 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -2204,6 +2204,7 @@ void atfork_child() { FdOnFork(thr, pc); } +#if !SANITIZER_IOS TSAN_INTERCEPTOR(int, vfork, int fake) { // Some programs (e.g. openjdk) call close for all file descriptors // in the child process. Under tsan it leads to false positives, because @@ -2220,6 +2221,7 @@ TSAN_INTERCEPTOR(int, vfork, int fake) { // Instead we simply turn vfork into fork. return WRAP(fork)(fake); } +#endif #if SANITIZER_LINUX TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags, From 3726626a26ec7bfccfd526e02f89c1ac5fe3520a Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Tue, 18 Jan 2022 15:17:15 -0800 Subject: [PATCH 240/946] [flang] Fix crash from USE-associated defined I/O subprograms User-defined derived type I/O implementation subroutines and generic interfaces may be USE-associated, but the code that builds the type description table wasn't allowing for that possibility. Add a call to GetUltimate() to cope. Differential Revision: https://reviews.llvm.org/D117902 --- flang/lib/Semantics/runtime-type-info.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp index 4c53df09ee637..250223bfafab1 100644 --- a/flang/lib/Semantics/runtime-type-info.cpp +++ b/flang/lib/Semantics/runtime-type-info.cpp @@ -1065,7 +1065,7 @@ void RuntimeTableBuilder::IncorporateDefinedIoGenericInterfaces( GenericKind::DefinedIo definedIo, const Scope *scope) { for (; !scope->IsGlobal(); scope = &scope->parent()) { if (auto asst{scope->find(name)}; asst != scope->end()) { - const Symbol &generic{*asst->second}; + const Symbol &generic{asst->second->GetUltimate()}; const auto &genericDetails{generic.get()}; CHECK(std::holds_alternative( genericDetails.kind().u)); From b95150418fb6e2d22a0bd84abcdc1f3cc7e5a0bf Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 13 Jan 2022 16:02:45 -0800 Subject: [PATCH 241/946] [lldb] Allow aliases to aliases of raw input commands Allow users to create aliases for aliases to raw input commands. That probably sounds convoluted, so here's an example: ``` command alias some-setup env SOMEVAR=SOMEVALUE ``` This an alias based on `env`, which itself is an alias for `_regex-env`. `_regex-env` is a `command regex` command, which takes raw input. The above `some-setup` alias fails with: ``` error: Unable to create requested alias. ``` This change allows such aliases to be created. lldb already supports aliases to aliases for parsed commands. Differential Revision: https://reviews.llvm.org/D117259 --- lldb/source/Commands/CommandObjectCommands.cpp | 5 +++-- .../API/commands/command/nested_alias/TestNestedAlias.py | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index 1ec54cf7ededa..defa21af7c170 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -485,8 +485,9 @@ rather than using a positional placeholder:" OptionArgVectorSP option_arg_vector_sp = OptionArgVectorSP(new OptionArgVector); - if (CommandObjectSP cmd_obj_sp = - m_interpreter.GetCommandSPExact(cmd_obj.GetCommandName())) { + const bool include_aliases = true; + if (CommandObjectSP cmd_obj_sp = m_interpreter.GetCommandSPExact( + cmd_obj.GetCommandName(), include_aliases)) { if (m_interpreter.AliasExists(alias_command) || m_interpreter.UserCommandExists(alias_command)) { result.AppendWarningWithFormat( diff --git a/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py b/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py index d4fc99492a698..bbe9c14f69f6d 100644 --- a/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py +++ b/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py @@ -46,6 +46,8 @@ def cleanup(): self.runCmd('command unalias rd', check=False) self.runCmd('command unalias fo', check=False) self.runCmd('command unalias foself', check=False) + self.runCmd('command unalias add_two', check=False) + self.runCmd('command unalias two', check=False) # Execute the cleanup function during test case tear down. self.addTearDownHook(cleanup) @@ -96,3 +98,8 @@ def cleanup(): 'Show variables for the current', 'stack frame.'], matching=True) + + # Check that aliases can be created for raw input commands. + self.expect('command alias two expr -- 2') + self.expect('command alias add_two two +') + self.expect('add_two 3', patterns=[' = 5$']) From e796eaf2af65d5b7f09d7024a545ab0c61b832ac Mon Sep 17 00:00:00 2001 From: Alex Fan Date: Fri, 21 Jan 2022 19:51:09 +0800 Subject: [PATCH 242/946] [RISCV][RFC] add MC support for zbkc subextension Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117874 --- llvm/lib/Support/RISCVISAInfo.cpp | 1 + llvm/lib/Target/RISCV/RISCV.td | 15 ++++++++++++++ llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 9 ++++++--- llvm/lib/Target/RISCV/RISCVSchedRocket.td | 2 +- llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 2 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 2 ++ llvm/test/CodeGen/RISCV/attributes.ll | 4 ++++ llvm/test/MC/RISCV/attribute-arch.s | 3 +++ llvm/test/MC/RISCV/rv32zbkc-invalid.s | 9 +++++++++ llvm/test/MC/RISCV/rv32zbkc-valid.s | 23 ++++++++++++++++++++++ 10 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 llvm/test/MC/RISCV/rv32zbkc-invalid.s create mode 100644 llvm/test/MC/RISCV/rv32zbkc-valid.s diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 80fae5510326d..c42f3604d67ff 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -57,6 +57,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zbs", RISCVExtensionVersion{1, 0}}, {"zbkb", RISCVExtensionVersion{1, 0}}, + {"zbkc", RISCVExtensionVersion{1, 0}}, }; static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 378720bc6b26d..2b6ea4067a8ea 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -163,6 +163,21 @@ def HasStdExtZbbOrZbpOrZbkb "'Zbp' (Permutation 'B' Instructions) or " "'Zbkb' (Bitmanip instructions for Cryptography)">; +// The Carry-less multiply subextension for cryptography is a subset of basic carry-less multiply subextension. The former should be enabled if the latter is enabled. +def FeatureStdExtZbkc + : SubtargetFeature<"zbkc", "HasStdExtZbkc", "true", + "'Zbkc' (Carry-less multiply instructions for Cryptography)">; +def HasStdExtZbkc + : Predicate<"Subtarget->hasStdExtZbkc()">, + AssemblerPredicate<(all_of FeatureStdExtZbkc), + "'Zbkc' (Carry-less multiply instructions for Cryptography)">; + +def HasStdExtZbcOrZbkc + : Predicate<"Subtarget->hasStdExtZbc() || Subtarget->hasStdExtZbkc()">, + AssemblerPredicate<(any_of FeatureStdExtZbc, FeatureStdExtZbkc), + "'Zbc' (Carry-Less 'B' Instructions) or " + "'Zbkc' (Carry-less multiply instructions for Cryptography)">; + def FeatureNoRVCHints : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false", "Disable RVC Hint Instructions.">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index a8fffb2d7d792..560ebb4eb08c6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -440,13 +440,16 @@ def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">, Sched<[]>; let Predicates = [HasStdExtZbc] in { -def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">, - Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>; def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">, Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>; +} // Predicates = [HasStdExtZbc] + +let Predicates = [HasStdExtZbcOrZbkc] in { +def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">, + Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>; def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">, Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>; -} // Predicates = [HasStdExtZbc] +} // Predicates = [HasStdExtZbcOrZbkc] let Predicates = [HasStdExtZbb] in { def MIN : ALU_rr<0b0000101, 0b100, "min">, diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 6cc24fa17c84a..783e65c1aa185 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = false; - let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions, HasVInstructionsI64]; + let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions, HasVInstructionsI64]; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 2da8c14088890..d164514ce70f0 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = 0; - let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions]; + let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions]; } // The SiFive7 microarchitecture has two pipelines: A and B. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 141e7114b5883..bacb8fae37941 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -84,6 +84,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasStdExtZfhmin = false; bool HasStdExtZfh = false; bool HasStdExtZbkb = false; + bool HasStdExtZbkc = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; @@ -158,6 +159,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } bool hasStdExtZbkb() const { return HasStdExtZbkb; } + bool hasStdExtZbkc() const { return HasStdExtZbkc; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index fa02d72797350..19ba02d531567 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -20,6 +20,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s ; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s ; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s +; RUN: llc -mtriple=riscv32 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV32ZBKC %s ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s @@ -40,6 +41,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s ; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s ; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s +; RUN: llc -mtriple=riscv64 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV64ZBKC %s ; RV32M: .attribute 5, "rv32i2p0_m2p0" @@ -62,6 +64,7 @@ ; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0" +; RV32ZBKC: .attribute 5, "rv32i2p0_zbkc1p0" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64A: .attribute 5, "rv64i2p0_a2p0" @@ -83,6 +86,7 @@ ; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0" +; RV64ZBKC: .attribute 5, "rv64i2p0_zbkc1p0" define i32 @addi(i32 %a) { %1 = add i32 %a, 1 diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index d95e99348e434..65d4008a5869e 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -130,3 +130,6 @@ .attribute arch, "rv32i_zbkb1p0" # CHECK: attribute 5, "rv32i2p0_zbkb1p0" + +.attribute arch, "rv32i_zbkc1p0" +# CHECK: attribute 5, "rv32i2p0_zbkc1p0" diff --git a/llvm/test/MC/RISCV/rv32zbkc-invalid.s b/llvm/test/MC/RISCV/rv32zbkc-invalid.s new file mode 100644 index 0000000000000..dba625fe20c2a --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zbkc-invalid.s @@ -0,0 +1,9 @@ +# RUN: not llvm-mc -triple riscv32 -mattr=+zbkc < %s 2>&1 | FileCheck %s + +# Too few operands +clmul t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction +# Too few operands +clmulh t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction + +# Undefined zbc instruction in zbkc +clmulr t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbc' (Carry-Less 'B' Instructions) diff --git a/llvm/test/MC/RISCV/rv32zbkc-valid.s b/llvm/test/MC/RISCV/rv32zbkc-valid.s new file mode 100644 index 0000000000000..10a62d9d2ce2d --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zbkc-valid.s @@ -0,0 +1,23 @@ +# With Bitmanip carry-less multiply extension: +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkc -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zbkc -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkc < %s \ +# RUN: | llvm-objdump --mattr=+zbkc -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkc < %s \ +# RUN: | llvm-objdump --mattr=+zbkc -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkc,+zbc -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkc,+zbc < %s \ +# RUN: | llvm-objdump --mattr=+zbkc,+zbc -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: clmul t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x12,0x73,0x0a] +clmul t0, t1, t2 +# CHECK-ASM-AND-OBJ: clmulh t0, t1, t2 +# CHECK-ASM: encoding: [0xb3,0x32,0x73,0x0a] +clmulh t0, t1, t2 From b1856009fbc1fd594e283460a781f2b34fbd7d55 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Tue, 18 Jan 2022 10:40:10 -0800 Subject: [PATCH 243/946] [flang] Allow INQUIRE() on a child unit in user-defined I/O procedure A procedure that implements a user-defined derived type I/O operation is allowed to perform an INQUIRE statement on its unit. Differential Revision: https://reviews.llvm.org/D117905https://reviews.llvm.org/D117905 --- flang/runtime/io-api.cpp | 49 ++++++++++++++++++++++------------------ flang/runtime/unit.h | 2 +- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 03c878d1a8506..64c798d7ff8ae 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -156,6 +156,13 @@ Cookie BeginExternalListIO(const char *what, int unitNumber, } ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( unitNumber, DIR, false /*!unformatted*/, terminator)}; + if (!unit.isUnformatted.has_value()) { + unit.isUnformatted = false; + } + if (*unit.isUnformatted) { + terminator.Crash("%s attempted on unformatted file", what); + return nullptr; + } if (ChildIo * child{unit.GetChildIo()}) { return child->CheckFormattingAndDirection(terminator, what, false, DIR) ? &child->BeginIoStatement>( @@ -166,13 +173,6 @@ Cookie BeginExternalListIO(const char *what, int unitNumber, terminator.Crash("%s attempted on direct access file", what); return nullptr; } - if (!unit.isUnformatted.has_value()) { - unit.isUnformatted = false; - } - if (*unit.isUnformatted) { - terminator.Crash("%s attempted on unformatted file", what); - return nullptr; - } IoErrorHandler handler{terminator}; unit.SetDirection(DIR, handler); IoStatementState &io{unit.BeginIoStatement>( @@ -202,6 +202,13 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, } ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( unitNumber, DIR, false /*!unformatted*/, terminator)}; + if (!unit.isUnformatted.has_value()) { + unit.isUnformatted = false; + } + if (*unit.isUnformatted) { + terminator.Crash("Formatted I/O attempted on unformatted file"); + return nullptr; + } if (ChildIo * child{unit.GetChildIo()}) { return child->CheckFormattingAndDirection(terminator, DIR == Direction::Output ? "formatted output" @@ -211,13 +218,6 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, *child, sourceFile, sourceLine) : nullptr; } else { - if (!unit.isUnformatted.has_value()) { - unit.isUnformatted = false; - } - if (*unit.isUnformatted) { - terminator.Crash("Formatted I/O attempted on unformatted file"); - return nullptr; - } IoErrorHandler handler{terminator}; unit.SetDirection(DIR, handler); IoStatementState &io{ @@ -247,6 +247,12 @@ Cookie BeginUnformattedIO( Terminator terminator{sourceFile, sourceLine}; ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( unitNumber, DIR, true /*unformatted*/, terminator)}; + if (!unit.isUnformatted.has_value()) { + unit.isUnformatted = true; + } + if (!*unit.isUnformatted) { + terminator.Crash("Unformatted I/O attempted on formatted file"); + } if (ChildIo * child{unit.GetChildIo()}) { return child->CheckFormattingAndDirection(terminator, DIR == Direction::Output ? "unformatted output" @@ -256,12 +262,6 @@ Cookie BeginUnformattedIO( *child, sourceFile, sourceLine) : nullptr; } else { - if (!unit.isUnformatted.has_value()) { - unit.isUnformatted = true; - } - if (!*unit.isUnformatted) { - terminator.Crash("Unformatted I/O attempted on formatted file"); - } IoStatementState &io{ unit.BeginIoStatement>( unit, sourceFile, sourceLine)}; @@ -367,8 +367,13 @@ Cookie IONAME(BeginRewind)( Cookie IONAME(BeginInquireUnit)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { - return &unit->BeginIoStatement( - *unit, sourceFile, sourceLine); + if (ChildIo * child{unit->GetChildIo()}) { + return &child->BeginIoStatement( + *unit, sourceFile, sourceLine); + } else { + return &unit->BeginIoStatement( + *unit, sourceFile, sourceLine); + } } else { // INQUIRE(UNIT=unrecognized unit) Terminator oom{sourceFile, sourceLine}; diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index a63921c9db75d..eaaccd578d8b8 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -183,7 +183,7 @@ class ChildIo { ChildListIoStatementState, ChildListIoStatementState, ChildUnformattedIoStatementState, - ChildUnformattedIoStatementState> + ChildUnformattedIoStatementState, InquireUnitState> u_; std::optional io_; }; From 55d887b833646baeea0e3371fd2cbbd7550a8d4d Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 18 Jan 2022 14:08:48 -0800 Subject: [PATCH 244/946] [time-trace] Add optimizer and codegen regions to NPM Optimizer and codegen regions were only added to legacy PM. Add them to NPM as well. Differential Revision: https://reviews.llvm.org/D117605 --- clang/lib/CodeGen/BackendUtil.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 6b8e052305b49..9ae5c870afc81 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1492,8 +1492,11 @@ void EmitAssemblyHelper::RunOptimizationPipeline( } // Now that we have all of the passes ready, run them. - PrettyStackTraceString CrashInfo("Optimizer"); - MPM.run(*TheModule, MAM); + { + PrettyStackTraceString CrashInfo("Optimizer"); + llvm::TimeTraceScope TimeScope("Optimizer"); + MPM.run(*TheModule, MAM); + } } void EmitAssemblyHelper::RunCodegenPipeline( @@ -1525,8 +1528,11 @@ void EmitAssemblyHelper::RunCodegenPipeline( return; } - PrettyStackTraceString CrashInfo("Code generation"); - CodeGenPasses.run(*TheModule); + { + PrettyStackTraceString CrashInfo("Code generation"); + llvm::TimeTraceScope TimeScope("CodeGenPasses"); + CodeGenPasses.run(*TheModule); + } } /// A clean version of `EmitAssembly` that uses the new pass manager. From b6098c07cb2076e53b4251df9edfc0a01d75ee4c Mon Sep 17 00:00:00 2001 From: Prashant Kumar Date: Fri, 21 Jan 2022 23:53:22 +0530 Subject: [PATCH 245/946] [MLIR] Fix negative gcd in `normalizeDivisionByGCD` function. When the coefficients of dividend are negative, the gcd may be negative which will change the sign of dividend and overflow denominator. Reviewed By: Groverkss Differential Revision: https://reviews.llvm.org/D117911 --- mlir/lib/Analysis/Presburger/Utils.cpp | 7 ++++-- .../Presburger/IntegerPolyhedronTest.cpp | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp index 765d4fb6a8dfd..7b8fe49b23c83 100644 --- a/mlir/lib/Analysis/Presburger/Utils.cpp +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -25,7 +25,10 @@ static void normalizeDivisionByGCD(SmallVectorImpl ÷nd, unsigned &divisor) { if (divisor == 0 || dividend.empty()) return; - int64_t gcd = llvm::greatestCommonDivisor(dividend.front(), int64_t(divisor)); + // We take the absolute value of dividend's coefficients to make sure that + // `gcd` is positive. + int64_t gcd = + llvm::greatestCommonDivisor(std::abs(dividend.front()), int64_t(divisor)); // The reason for ignoring the constant term is as follows. // For a division: @@ -35,7 +38,7 @@ static void normalizeDivisionByGCD(SmallVectorImpl ÷nd, // Since `{a/m}/d` in the dividend satisfies 0 <= {a/m}/d < 1/d, it will not // influence the result of the floor division and thus, can be ignored. for (size_t i = 1, m = dividend.size() - 1; i < m; i++) { - gcd = llvm::greatestCommonDivisor(dividend[i], gcd); + gcd = llvm::greatestCommonDivisor(std::abs(dividend[i]), gcd); if (gcd == 1) return; } diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp index 2e4c135770431..5d1cfb4c6e781 100644 --- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp +++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp @@ -969,4 +969,28 @@ TEST(IntegerPolyhedronTest, mergeDivisionsConstants) { } } +TEST(IntegerPolyhedronTest, negativeDividends) { + // (x) : (exists y = [-x + 1 / 2], z = [-x - 2 / 3]: y + z >= x). + IntegerPolyhedron poly1(1); + poly1.addLocalFloorDiv({-1, 1}, 2); // y = [x + 1 / 2]. + // Normalization test with negative dividends + poly1.addLocalFloorDiv({-3, 0, -6}, 9); // z = [3x + 6 / 9] -> [x + 2 / 3]. + poly1.addInequality({-1, 1, 1, 0}); // y + z >= x. + + // (x) : (exists y = [x + 1 / 3], z = [x + 2 / 3]: y + z <= x). + IntegerPolyhedron poly2(1); + // Normalization test. + poly2.addLocalFloorDiv({-2, 2}, 4); // y = [-2x + 2 / 4] -> [-x + 1 / 2]. + poly2.addLocalFloorDiv({-1, 0, -2}, 3); // z = [-x - 2 / 3]. + poly2.addInequality({1, -1, -1, 0}); // y + z <= x. + + poly1.mergeLocalIds(poly2); + + // Merging triggers normalization. + std::vector> divisions = {{-1, 0, 0, 1}, + {-1, 0, 0, -2}}; + SmallVector denoms = {2, 3}; + checkDivisionRepresentation(poly1, divisions, denoms); +} + } // namespace mlir From de872382951572b70dfaefe8d77eb98d15586115 Mon Sep 17 00:00:00 2001 From: luxufan <932494295@qq.com> Date: Sat, 22 Jan 2022 16:07:17 +0800 Subject: [PATCH 246/946] [JITLink] Add anonymous symbols in LinkGraph for unnamed temporary symbols In RISCV, temporary symbols will be used to generate dwarf, eh_frame sections..., and will be placed in object code's symbol table. However, LLVM does not use names on these temporary symbols. This patch add anonymous symbols in LinkGraph for these temporary symbols. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D116475 --- .../llvm/ExecutionEngine/JITLink/riscv.h | 8 +++++++- .../JITLink/ELFLinkGraphBuilder.h | 12 +++++++---- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 8 ++++++++ llvm/lib/ExecutionEngine/JITLink/riscv.cpp | 2 ++ .../JITLink/RISCV/anonymous_symbol.s | 20 +++++++++++++++++++ 5 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h index 3c5cdfcfdba40..4d045a23c012b 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h @@ -79,6 +79,12 @@ enum EdgeKind_riscv : Edge::Kind { /// Fixup <- (Target - Fixup + Addend) R_RISCV_CALL, + /// 32 bits PC relative relocation + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) + R_RISCV_32_PCREL, + /// PC relative GOT offset /// /// Fixup expression: @@ -137,7 +143,7 @@ enum EdgeKind_riscv : Edge::Kind { /// /// Fixup expression /// Fixup <- (Target - *{1}Fixup - Addend) - R_RISCV_SUB8 + R_RISCV_SUB8, }; /// Returns a string name for the given riscv edge. For debugging purposes diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 931a60224ee2f..2ab7ed61f71b4 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -441,11 +441,15 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { << "\"\n"; }); - // Model the section symbols as anonymous symbol. + // In RISCV, temporary symbols (Used to generate dwarf, eh_frame + // sections...) will appear in object code's symbol table, and LLVM does + // not use names on these temporary symbols (RISCV gnu toolchain uses + // names on these temporary symbols). If the symbol is unnamed, add an + // anonymous symbol. auto &GSym = - Sym.getType() == ELF::STT_SECTION - ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, false, - false) + Name->empty() + ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, + false, false) : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L, S, Sym.getType() == ELF::STT_FUNC, false); setGraphSymbol(SymIndex, GSym); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index 4483147c1b1d7..b3bc5cdb13f1d 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -359,6 +359,12 @@ class ELFJITLinker_riscv : public JITLinker { *FixupPtr = static_cast(Value); break; } + case R_RISCV_32_PCREL: { + // FIXME: It seems that R_RISCV_32_PCREL relocation will only appear in debug sections + // like eh_frame section. Currently, because of eh_frame will not be processed in JITLink's RISCV + // backend, test this relocation is difficult, so here report error if needs to fixup this relocation + return make_error("Fixup of relocation type R_RISCV_32_PCREL is not supportted"); + } } return Error::success(); } @@ -409,6 +415,8 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder { return EdgeKind_riscv::R_RISCV_SUB16; case ELF::R_RISCV_SUB8: return EdgeKind_riscv::R_RISCV_SUB8; + case ELF::R_RISCV_32_PCREL: + return EdgeKind_riscv::R_RISCV_32_PCREL; } return make_error("Unsupported riscv relocation:" + diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp index 4d1ace73a04e3..1dcc3ebb8e97b 100644 --- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp @@ -38,6 +38,8 @@ const char *getEdgeKindName(Edge::Kind K) { return "R_RISCV_PCREL_LO12_S"; case R_RISCV_CALL: return "R_RISCV_CALL"; + case R_RISCV_32_PCREL: + return "R_RISCV_32_PCREL"; case R_RISCV_ADD64: return "R_RISCV_ADD64"; case R_RISCV_ADD32: diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s new file mode 100644 index 0000000000000..cb4c59e196dc9 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t %s +# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s +# +# Because of the exist of cfi directive, sections like eh_frame section will be emitted +# in llvm's object code emission phase. Anonymous symbols will also be emitted to indicate +# the section start and section end. So that by relocating these symbol, the section length +# can be calculated. +# +# CHECK: Creating defined graph symbol for ELF symbol "" +# CHECK: Creating defined graph symbol for ELF symbol "" + .text + .globl main + .p2align 2 + .type main,@function +main: + .cfi_startproc + ret + .Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc From fdb6578514dd3799ad23c8bbb7699577c0fb414d Mon Sep 17 00:00:00 2001 From: luxufan <932494295@qq.com> Date: Sat, 22 Jan 2022 17:26:54 +0800 Subject: [PATCH 247/946] Revert "[JITLink] Add anonymous symbols in LinkGraph for unnamed temporary symbols" This reverts commit de872382951572b70dfaefe8d77eb98d15586115. Buildbot check error --- .../llvm/ExecutionEngine/JITLink/riscv.h | 8 +------- .../JITLink/ELFLinkGraphBuilder.h | 12 ++++------- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 8 -------- llvm/lib/ExecutionEngine/JITLink/riscv.cpp | 2 -- .../JITLink/RISCV/anonymous_symbol.s | 20 ------------------- 5 files changed, 5 insertions(+), 45 deletions(-) delete mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h index 4d045a23c012b..3c5cdfcfdba40 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h @@ -79,12 +79,6 @@ enum EdgeKind_riscv : Edge::Kind { /// Fixup <- (Target - Fixup + Addend) R_RISCV_CALL, - /// 32 bits PC relative relocation - /// - /// Fixup expression: - /// Fixup <- (Target - Fixup + Addend) - R_RISCV_32_PCREL, - /// PC relative GOT offset /// /// Fixup expression: @@ -143,7 +137,7 @@ enum EdgeKind_riscv : Edge::Kind { /// /// Fixup expression /// Fixup <- (Target - *{1}Fixup - Addend) - R_RISCV_SUB8, + R_RISCV_SUB8 }; /// Returns a string name for the given riscv edge. For debugging purposes diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 2ab7ed61f71b4..931a60224ee2f 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -441,15 +441,11 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { << "\"\n"; }); - // In RISCV, temporary symbols (Used to generate dwarf, eh_frame - // sections...) will appear in object code's symbol table, and LLVM does - // not use names on these temporary symbols (RISCV gnu toolchain uses - // names on these temporary symbols). If the symbol is unnamed, add an - // anonymous symbol. + // Model the section symbols as anonymous symbol. auto &GSym = - Name->empty() - ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, - false, false) + Sym.getType() == ELF::STT_SECTION + ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, false, + false) : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L, S, Sym.getType() == ELF::STT_FUNC, false); setGraphSymbol(SymIndex, GSym); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index b3bc5cdb13f1d..4483147c1b1d7 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -359,12 +359,6 @@ class ELFJITLinker_riscv : public JITLinker { *FixupPtr = static_cast(Value); break; } - case R_RISCV_32_PCREL: { - // FIXME: It seems that R_RISCV_32_PCREL relocation will only appear in debug sections - // like eh_frame section. Currently, because of eh_frame will not be processed in JITLink's RISCV - // backend, test this relocation is difficult, so here report error if needs to fixup this relocation - return make_error("Fixup of relocation type R_RISCV_32_PCREL is not supportted"); - } } return Error::success(); } @@ -415,8 +409,6 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder { return EdgeKind_riscv::R_RISCV_SUB16; case ELF::R_RISCV_SUB8: return EdgeKind_riscv::R_RISCV_SUB8; - case ELF::R_RISCV_32_PCREL: - return EdgeKind_riscv::R_RISCV_32_PCREL; } return make_error("Unsupported riscv relocation:" + diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp index 1dcc3ebb8e97b..4d1ace73a04e3 100644 --- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp @@ -38,8 +38,6 @@ const char *getEdgeKindName(Edge::Kind K) { return "R_RISCV_PCREL_LO12_S"; case R_RISCV_CALL: return "R_RISCV_CALL"; - case R_RISCV_32_PCREL: - return "R_RISCV_32_PCREL"; case R_RISCV_ADD64: return "R_RISCV_ADD64"; case R_RISCV_ADD32: diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s deleted file mode 100644 index cb4c59e196dc9..0000000000000 --- a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s +++ /dev/null @@ -1,20 +0,0 @@ -# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t %s -# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s -# -# Because of the exist of cfi directive, sections like eh_frame section will be emitted -# in llvm's object code emission phase. Anonymous symbols will also be emitted to indicate -# the section start and section end. So that by relocating these symbol, the section length -# can be calculated. -# -# CHECK: Creating defined graph symbol for ELF symbol "" -# CHECK: Creating defined graph symbol for ELF symbol "" - .text - .globl main - .p2align 2 - .type main,@function -main: - .cfi_startproc - ret - .Lfunc_end0: - .size main, .Lfunc_end0-main - .cfi_endproc From f7d4cafe5a6a51ccc6072c9dd304ced4f8e96aa7 Mon Sep 17 00:00:00 2001 From: fourdim Date: Sat, 22 Jan 2022 03:34:13 +0800 Subject: [PATCH 248/946] [JITLink][RISCV] Support R_RISCV_SET* and R_RISCV_32_PCREL relocations This patch supports R_RISCV_SET* and R_RISCV_32_PCREL relocations in JITLink. Reviewed By: StephenFan Differential Revision: https://reviews.llvm.org/D117082 --- .../llvm/ExecutionEngine/JITLink/riscv.h | 32 +++++++++++++- .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 43 +++++++++++++++++++ llvm/lib/ExecutionEngine/JITLink/riscv.cpp | 10 +++++ .../JITLink/RISCV/ELF_pc_relative.s | 19 ++++++++ .../JITLink/RISCV/ELF_reloc_set.s | 31 +++++++++++++ 5 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h index 3c5cdfcfdba40..ed874c53d269b 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h @@ -137,7 +137,37 @@ enum EdgeKind_riscv : Edge::Kind { /// /// Fixup expression /// Fixup <- (Target - *{1}Fixup - Addend) - R_RISCV_SUB8 + R_RISCV_SUB8, + + /// Local label assignment + /// + /// Fixup expression: + /// Fixup <- (Target + Addend) + R_RISCV_SET6, + + /// Local label assignment + /// + /// Fixup expression: + /// Fixup <- (Target + Addend) + R_RISCV_SET8, + + /// Local label assignment + /// + /// Fixup expression: + /// Fixup <- (Target + Addend) + R_RISCV_SET16, + + /// Local label assignment + /// + /// Fixup expression: + /// Fixup <- (Target + Addend) + R_RISCV_SET32, + + /// Local label assignment + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) + R_RISCV_32_PCREL, }; /// Returns a string name for the given riscv edge. For debugging purposes diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp index 4483147c1b1d7..f83001417e946 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp @@ -359,6 +359,39 @@ class ELFJITLinker_riscv : public JITLinker { *FixupPtr = static_cast(Value); break; } + case R_RISCV_SET6: { + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + uint32_t RawData = *(little32_t *)FixupPtr; + int64_t Word6 = Value & 0x3f; + *(little32_t *)FixupPtr = (RawData & 0xffffffc0) | Word6; + break; + } + case R_RISCV_SET8: { + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + uint32_t RawData = *(little32_t *)FixupPtr; + int64_t Word8 = Value & 0xff; + *(little32_t *)FixupPtr = (RawData & 0xffffff00) | Word8; + break; + } + case R_RISCV_SET16: { + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + uint32_t RawData = *(little32_t *)FixupPtr; + int64_t Word16 = Value & 0xffff; + *(little32_t *)FixupPtr = (RawData & 0xffff0000) | Word16; + break; + } + case R_RISCV_SET32: { + int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue(); + int64_t Word32 = Value & 0xffffffff; + *(little32_t *)FixupPtr = Word32; + break; + } + case R_RISCV_32_PCREL: { + int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress; + int64_t Word32 = Value & 0xffffffff; + *(little32_t *)FixupPtr = Word32; + break; + } } return Error::success(); } @@ -409,6 +442,16 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder { return EdgeKind_riscv::R_RISCV_SUB16; case ELF::R_RISCV_SUB8: return EdgeKind_riscv::R_RISCV_SUB8; + case ELF::R_RISCV_SET6: + return EdgeKind_riscv::R_RISCV_SET6; + case ELF::R_RISCV_SET8: + return EdgeKind_riscv::R_RISCV_SET8; + case ELF::R_RISCV_SET16: + return EdgeKind_riscv::R_RISCV_SET16; + case ELF::R_RISCV_SET32: + return EdgeKind_riscv::R_RISCV_SET32; + case ELF::R_RISCV_32_PCREL: + return EdgeKind_riscv::R_RISCV_32_PCREL; } return make_error("Unsupported riscv relocation:" + diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp index 4d1ace73a04e3..6efd7abd85ddf 100644 --- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp @@ -54,6 +54,16 @@ const char *getEdgeKindName(Edge::Kind K) { return "R_RISCV_SUB16"; case R_RISCV_SUB8: return "R_RISCV_SUB8"; + case R_RISCV_SET6: + return "R_RISCV_SET6"; + case R_RISCV_SET8: + return "R_RISCV_SET8"; + case R_RISCV_SET16: + return "R_RISCV_SET16"; + case R_RISCV_SET32: + return "R_RISCV_SET32"; + case R_RISCV_32_PCREL: + return "R_RISCV_32_PCREL"; } return getGenericEdgeKindName(K); } diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s new file mode 100644 index 0000000000000..b4dd87a16ffc6 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s @@ -0,0 +1,19 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t/riscv64_pc_relative.o %s +# RUN: llvm-mc -triple=riscv32 -filetype=obj -o %t/riscv32_pc_relative.o %s +# RUN: llvm-jitlink -noexec -check %s %t/riscv64_pc_relative.o +# RUN: llvm-jitlink -noexec -check %s %t/riscv32_pc_relative.o + +# jitlink-check: *{4}(foo) = 0x4 + +.global main +main: + lw a0, foo + +.section ".text","",@progbits +.type foo,@function +foo: + nop + nop + .reloc foo, R_RISCV_32_PCREL, foo+4 + .size foo, 8 diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s new file mode 100644 index 0000000000000..edd617b70c8f0 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s @@ -0,0 +1,31 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t/riscv64_reloc_set.o %s +# RUN: llvm-mc -triple=riscv32 -filetype=obj -o %t/riscv32_reloc_set.o %s +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0xfff0f0f0 -slab-page-size 4096 \ +# RUN: -check %s %t/riscv64_reloc_set.o +# RUN: llvm-jitlink -noexec \ +# RUN: -slab-allocate 100Kb -slab-address 0xfff0f0f0 -slab-page-size 4096 \ +# RUN: -check %s %t/riscv32_reloc_set.o + +# jitlink-check: *{4}(foo) = foo +# jitlink-check: *{2}(foo+4) = foo[15:0] +# jitlink-check: *{1}(foo+6) = foo[7:0] +# jitlink-check: *{1}(foo+7) = foo[5:0] + +.global main +main: + lw a0, foo + +.section ".rodata","",@progbits +.type foo,@object +foo: + .reloc foo, R_RISCV_SET32, foo + .reloc foo+4, R_RISCV_SET16, foo + .reloc foo+6, R_RISCV_SET8, foo + .reloc foo+7, R_RISCV_SET6, foo + .word 0 + .half 0 + .byte 0 + .byte 0 + .size foo, 8 From 26544b98f7bf744d2ccd29cc6559db24bc1a4e50 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Fri, 21 Jan 2022 20:08:57 +0100 Subject: [PATCH 249/946] [libc++] Use addressof in unordered_set. This addresses the usage of `operator&` in ``. (Note there are still more headers with the same issue.) Reviewed By: #libc, philnik, Quuxplusone Differential Revision: https://reviews.llvm.org/D117917 --- libcxx/include/unordered_set | 15 +++--- .../move.addressof.compile.pass.cpp | 29 ++++++++++++ .../move_alloc.addressof.compile.pass.cpp | 33 +++++++++++++ .../emplace_hint.addressof.compile.pass.cpp | 30 ++++++++++++ ...nt_const_lvalue.addressof.compile.pass.cpp | 28 +++++++++++ ...ert_hint_rvalue.addressof.compile.pass.cpp | 27 +++++++++++ ...rator.operators.addressof.compile.pass.cpp | 47 +++++++++++++++++++ .../move.addressof.compile.pass.cpp | 29 ++++++++++++ .../move_alloc.addressof.compile.pass.cpp | 35 ++++++++++++++ 9 files changed, 266 insertions(+), 7 deletions(-) create mode 100644 libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index ad58fda24f87e..29a19f2f0cb5b 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -463,6 +463,7 @@ template #include <__debug> #include <__functional/is_transparent.h> #include <__hash_table> +#include <__memory/addressof.h> #include <__node_handle> #include <__utility/forward.h> #include @@ -640,7 +641,7 @@ public: #if _LIBCPP_DEBUG_LEVEL == 2 iterator emplace_hint(const_iterator __p, _Args&&... __args) { - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_set::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered_set"); return __table_.__emplace_unique(_VSTD::forward<_Args>(__args)...).first; @@ -657,7 +658,7 @@ public: #if _LIBCPP_DEBUG_LEVEL == 2 iterator insert(const_iterator __p, value_type&& __x) { - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_set::insert(const_iterator, value_type&&) called with an iterator not" " referring to this unordered_set"); return insert(_VSTD::move(__x)).first; @@ -678,7 +679,7 @@ public: #if _LIBCPP_DEBUG_LEVEL == 2 iterator insert(const_iterator __p, const value_type& __x) { - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, + _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this, "unordered_set::insert(const_iterator, const value_type&) called with an iterator not" " referring to this unordered_set"); return insert(__x).first; @@ -1019,7 +1020,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -1037,7 +1038,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -1660,7 +1661,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( { _VSTD::__debug_db_insert_c(this); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } @@ -1678,7 +1679,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset( } #if _LIBCPP_DEBUG_LEVEL == 2 else - __get_db()->swap(this, &__u); + __get_db()->swap(this, _VSTD::addressof(__u)); #endif } diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..9df029e61341a --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> +// class unordered_multiset + +// unordered_multiset(unordered_multiset&& u); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_multiset so; + std::unordered_multiset s(std::move(so)); +} diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..cd1f70eab9e68 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> +// class unordered_multiset + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_allocator.h" +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + using A = test_allocator; + using H = std::hash; + using P = std::equal_to; + + const A a; + std::unordered_multiset so; + std::unordered_multiset s(std::move(so), a); +} diff --git a/libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..af6e37eaef960 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> +// class unordered_set + +// template +// iterator emplace_hint(const_iterator p, Args&&... args); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_set s; + s.emplace_hint(s.cbegin()); +} diff --git a/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..f83f28fc923fc --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> +// class unordered_set + +// iterator insert(const_iterator p, const value_type& x); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_set s; + const operator_hijacker v; + s.insert(s.cbegin(), v); +} diff --git a/libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..333c673e30b9a --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> +// class unordered_set + +// iterator insert(const_iterator p, value_type&& x); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_set s; + s.insert(s.cbegin(), operator_hijacker()); +} diff --git a/libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..457172d11f6bd --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> + +// class unordered_set + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +template +void test() { + FromIterator from; + ToIterator copy(from); + copy = from; + + ToIterator move(std::move(from)); + from = FromIterator(); + move = std::move(from); +} + +void test() { + { + using I = std::unordered_set::iterator; + using CI = std::unordered_set::const_iterator; + test(); + test(); + test(); + } + { + using IL = std::unordered_set::local_iterator; + using CIL = std::unordered_set::const_local_iterator; + test(); + test(); + test(); + } +} diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..f7e9eb444bd58 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> +// class unordered_set + +// unordered_set(unordered_set&& u); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::unordered_set so; + std::unordered_set s(std::move(so)); +} diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp new file mode 100644 index 0000000000000..8e9d8cb8d87b2 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// + +// template , class Pred = equal_to, +// class Alloc = allocator> +// class unordered_set + +// unordered_set(unordered_set&& u, const allocator_type& a); + +// Validate whether the operation properly guards against ADL-hijacking operator& + +#include + +#include "test_allocator.h" +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + using A = test_allocator; + using H = std::hash; + using P = std::equal_to; + + const A a; + std::unordered_set so; + std::unordered_set s(std::move(so), a); +} From 4041354b4c12fb4329853b67f61b8617252188d6 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Sat, 22 Jan 2022 11:42:47 +0100 Subject: [PATCH 250/946] [mlir] Add SingleBlockImplicitTerminator<"tensor::YieldOp"> to PadOp. --- mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td | 5 +++-- mlir/test/Dialect/Tensor/invalid.mlir | 9 --------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 05cb41d791d35..882ea33b9c03b 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -782,13 +782,14 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> { // PadOp //===----------------------------------------------------------------------===// -def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect]> { +def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect, + SingleBlockImplicitTerminator<"mlir::tensor::YieldOp">]> { let summary = "tensor pad operation"; let description = [{ `tensor.pad` is an operation that pads the `source` tensor with given `low` and `high` padding config. - The PadTensor operation supports the following arguments: + The PadOp operation supports the following arguments: * source: the "base" tensor on which to pad. * low: A list contains the padding along the start of each diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir index cec4718595a45..7dbf662fd6fe3 100644 --- a/mlir/test/Dialect/Tensor/invalid.mlir +++ b/mlir/test/Dialect/Tensor/invalid.mlir @@ -343,15 +343,6 @@ func @pad_number_of_block_args(%arg0: tensor, %arg1: i32) -> tensor, %arg1: i32) -> tensor { - // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}} - %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { - } : tensor to tensor - return %0 : tensor -} - -// ----- - func @pad_block_args(%arg0: tensor, %arg1: i32) -> tensor { // expected-error @+1 {{op expected block argument 1 to be an index}} %0 = tensor.pad %arg0 low[1, 2] high[2, 3] { From 93deac2e2ba96248c05441a0bfa8385a73d78acb Mon Sep 17 00:00:00 2001 From: Micah Weston Date: Sat, 22 Jan 2022 12:39:22 +0000 Subject: [PATCH 251/946] [AArch64] Optimize add/sub with immediate through MIPeepholeOpt Fixes the build issue with D111034, whose goal was to optimize add/sub with long immediates. Optimize ([add|sub] r, imm) -> ([ADD|SUB] ([ADD|SUB] r, #imm0, lsl #12), #imm1), if imm == (imm0<<12)+imm1. and both imm0 and imm1 are non-zero 12-bit unsigned integers. Optimize ([add|sub] r, imm) -> ([SUB|ADD] ([SUB|ADD] r, #imm0, lsl #12), #imm1), if imm == -(imm0<<12)-imm1, and both imm0 and imm1 are non-zero 12-bit unsigned integers. The change which fixed the build issue in D111034 was the use of new virtual registers so that SSA form is maintained until deleting MI. Differential Revision: https://reviews.llvm.org/D117429 --- .../Target/AArch64/AArch64MIPeepholeOpt.cpp | 204 +++++++++++++++--- .../test/CodeGen/AArch64/addsub-24bit-imm.mir | 63 ++++++ llvm/test/CodeGen/AArch64/addsub.ll | 96 ++++++--- .../AArch64/large-offset-gep.ll | 5 +- 4 files changed, 302 insertions(+), 66 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 3e1306eb32972..0ddfc717e47f3 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -11,12 +11,19 @@ // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri // MOVi64imm + ANDXrr ==> ANDXri + ANDXri // +// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi +// MOVi64imm + ADDXrr ==> ANDXri + ANDXri +// +// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi +// MOVi64imm + SUBXrr ==> SUBXri + SUBXri +// // The mov pseudo instruction could be expanded to multiple mov instructions // later. In this case, we could try to split the constant operand of mov -// instruction into two bitmask immediates. It makes two AND instructions -// intead of multiple `mov` + `and` instructions. +// instruction into two immediates which can be directly encoded into +// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of +// multiple `mov` + `and/add/sub` instructions. // -// 2. Remove redundant ORRWrs which is generated by zero-extend. +// 4. Remove redundant ORRWrs which is generated by zero-extend. // // %3:gpr32 = ORRWrs $wzr, %2, 0 // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 @@ -51,6 +58,12 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { MachineLoopInfo *MLI; MachineRegisterInfo *MRI; + bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, + MachineInstr *&SubregToRegMI); + + template + bool visitADDSUB(MachineInstr &MI, + SmallSetVector &ToBeRemoved, bool IsAdd); template bool visitAND(MachineInstr &MI, SmallSetVector &ToBeRemoved); @@ -131,36 +144,9 @@ bool AArch64MIPeepholeOpt::visitAND( assert((RegSize == 32 || RegSize == 64) && "Invalid RegSize for AND bitmask peephole optimization"); - // Check whether AND's MBB is in loop and the AND is loop invariant. - MachineBasicBlock *MBB = MI.getParent(); - MachineLoop *L = MLI->getLoopFor(MBB); - if (L && !L->isLoopInvariant(MI)) - return false; - - // Check whether AND's operand is MOV with immediate. - MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); - if (!MovMI) - return false; - - MachineInstr *SubregToRegMI = nullptr; - // If it is SUBREG_TO_REG, check its operand. - if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { - SubregToRegMI = MovMI; - MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); - if (!MovMI) - return false; - } - - if (MovMI->getOpcode() != AArch64::MOVi32imm && - MovMI->getOpcode() != AArch64::MOVi64imm) - return false; - - // If the MOV has multiple uses, do not split the immediate because it causes - // more instructions. - if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) - return false; - - if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) + // Perform several essential checks against current MI. + MachineInstr *MovMI = nullptr, *SubregToRegMI = nullptr; + if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) return false; // Split the bitmask immediate into two. @@ -177,6 +163,7 @@ bool AArch64MIPeepholeOpt::visitAND( // Create new AND MIs. DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); const TargetRegisterClass *ANDImmRC = (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass; Register DstReg = MI.getOperand(0).getReg(); @@ -251,6 +238,145 @@ bool AArch64MIPeepholeOpt::visitORR( return true; } +template +static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { + // The immediate must be in the form of ((imm0 << 12) + imm1), in which both + // imm0 and imm1 are non-zero 12-bit unsigned int. + if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || + (Imm & ~static_cast(0xffffff)) != 0) + return false; + + // The immediate can not be composed via a single instruction. + SmallVector Insn; + AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); + if (Insn.size() == 1) + return false; + + // Split Imm into (Imm0 << 12) + Imm1; + Imm0 = (Imm >> 12) & 0xfff; + Imm1 = Imm & 0xfff; + return true; +} + +template +bool AArch64MIPeepholeOpt::visitADDSUB( + MachineInstr &MI, SmallSetVector &ToBeRemoved, + bool IsAdd) { + // Try below transformation. + // + // MOVi32imm + ADDWrr ==> ADDWri + ADDWri + // MOVi64imm + ADDXrr ==> ADDXri + ADDXri + // + // MOVi32imm + SUBWrr ==> SUBWri + SUBWri + // MOVi64imm + SUBXrr ==> SUBXri + SUBXri + // + // The mov pseudo instruction could be expanded to multiple mov instructions + // later. Let's try to split the constant operand of mov instruction into two + // legal add/sub immediates. It makes only two ADD/SUB instructions intead of + // multiple `mov` + `and/sub` instructions. + + unsigned RegSize = sizeof(T) * 8; + assert((RegSize == 32 || RegSize == 64) && + "Invalid RegSize for legal add/sub immediate peephole optimization"); + + // Perform several essential checks against current MI. + MachineInstr *MovMI, *SubregToRegMI; + if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) + return false; + + // Split the immediate to Imm0 and Imm1, and calculate the Opcode. + T Imm = static_cast(MovMI->getOperand(1).getImm()), Imm0, Imm1; + unsigned Opcode; + if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) { + if (IsAdd) + Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri; + else + Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri; + } else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) { + if (IsAdd) + Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri; + else + Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri; + } else { + return false; + } + + // Create new ADD/SUB MIs. + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); + const TargetRegisterClass *RC = + (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass; + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register NewTmpReg = MRI->createVirtualRegister(RC); + Register NewDstReg = MRI->createVirtualRegister(RC); + + MRI->constrainRegClass(SrcReg, RC); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) + .addReg(SrcReg) + .addImm(Imm0) + .addImm(12); + + MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); + BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) + .addReg(NewTmpReg) + .addImm(Imm1) + .addImm(0); + + MRI->replaceRegWith(DstReg, NewDstReg); + // replaceRegWith changes MI's definition register. Keep it for SSA form until + // deleting MI. + MI.getOperand(0).setReg(DstReg); + + // Record the MIs need to be removed. + ToBeRemoved.insert(&MI); + if (SubregToRegMI) + ToBeRemoved.insert(SubregToRegMI); + ToBeRemoved.insert(MovMI); + + return true; +} + +// Checks if the corresponding MOV immediate instruction is applicable for +// this peephole optimization. +bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, + MachineInstr *&MovMI, + MachineInstr *&SubregToRegMI) { + // Check whether current MBB is in loop and the AND is loop invariant. + MachineBasicBlock *MBB = MI.getParent(); + MachineLoop *L = MLI->getLoopFor(MBB); + if (L && !L->isLoopInvariant(MI)) + return false; + + // Check whether current MI's operand is MOV with immediate. + MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); + if (!MovMI) + return false; + + // If it is SUBREG_TO_REG, check its operand. + SubregToRegMI = nullptr; + if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { + SubregToRegMI = MovMI; + MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); + if (!MovMI) + return false; + } + + if (MovMI->getOpcode() != AArch64::MOVi32imm && + MovMI->getOpcode() != AArch64::MOVi64imm) + return false; + + // If the MOV has multiple uses, do not split the immediate because it causes + // more instructions. + if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) + return false; + if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) + return false; + + // It is OK to perform this peephole optimization. + return true; +} + bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -278,6 +404,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { case AArch64::ORRWrs: Changed = visitORR(MI, ToBeRemoved); break; + case AArch64::ADDWrr: + Changed = visitADDSUB(MI, ToBeRemoved, true); + break; + case AArch64::SUBWrr: + Changed = visitADDSUB(MI, ToBeRemoved, false); + break; + case AArch64::ADDXrr: + Changed = visitADDSUB(MI, ToBeRemoved, true); + break; + case AArch64::SUBXrr: + Changed = visitADDSUB(MI, ToBeRemoved, false); + break; } } } diff --git a/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir b/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir new file mode 100644 index 0000000000000..b114e617cbd87 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir @@ -0,0 +1,63 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s + +# Main intention is to verify machine instructions have valid register classes. +# Use of UBFM[W|X]ri is used as an arbitrary instruction that requires GPR[32|64]RegClass. +# If the ADD/SUB optimization generates invalid register classes, this test will fail. +--- +name: addi +body: | + bb.0.entry: + liveins: $w0 + ; CHECK-LABEL: name: addi + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri [[ADDWri]], 3549, 0 + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[ADDWri1]], 28, 31 + ; CHECK-NEXT: $w0 = COPY [[UBFMWri]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr32 = ADDWrr %0, %1 + %3:gpr32 = UBFMWri %2, 28, 31 + $w0 = COPY %3 + RET_ReallyLR implicit $w0 +... +--- +name: addl +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri [[ADDXri]], 3549, 0 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[ADDXri1]], 28, 31 + ; CHECK-NEXT: $x0 = COPY [[UBFMXri]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr32 = MOVi32imm 1121757 + %2:gpr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32 + %3:gpr64 = ADDXrr %0, killed %2 + %4:gpr64 = UBFMXri %3, 28, 31 + $x0 = COPY %4 + RET_ReallyLR implicit $x0 +... +--- +name: addl_negate +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: addl_negate + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64sp = SUBXri [[COPY]], 273, 12 + ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri [[SUBXri]], 3549, 0 + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBXri1]], 28, 31 + ; CHECK-NEXT: $x0 = COPY [[UBFMXri]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:gpr64 = COPY $x0 + %1:gpr64 = MOVi64imm -1121757 + %2:gpr64 = ADDXrr %0, killed %1 + %3:gpr64 = UBFMXri %2, 28, 31 + $x0 = COPY %3 + RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll index f0857fe2d9660..37c9e4c5c6fe1 100644 --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -152,9 +152,8 @@ define void @sub_med() { define i64 @add_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, 11183445 ret i64 %b @@ -163,9 +162,8 @@ define i64 @add_two_parts_imm_i64(i64 %a) { define i32 @add_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, 11183445 ret i32 %b @@ -174,9 +172,8 @@ define i32 @add_two_parts_imm_i32(i32 %a) { define i64 @add_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: add_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = add i64 %a, -11183445 ret i64 %b @@ -185,9 +182,8 @@ define i64 @add_two_parts_imm_i64_neg(i64 %a) { define i32 @add_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: add_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = add i32 %a, -11183445 ret i32 %b @@ -196,9 +192,8 @@ define i32 @add_two_parts_imm_i32_neg(i32 %a) { define i64 @sub_two_parts_imm_i64(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-42325 -; CHECK-NEXT: movk x8, #65365, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: sub x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, 11183445 ret i64 %b @@ -207,9 +202,8 @@ define i64 @sub_two_parts_imm_i64(i64 %a) { define i32 @sub_two_parts_imm_i32(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23211 -; CHECK-NEXT: movk w8, #65365, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: sub w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: sub w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, 11183445 ret i32 %b @@ -218,9 +212,8 @@ define i32 @sub_two_parts_imm_i32(i32 %a) { define i64 @sub_two_parts_imm_i64_neg(i64 %a) { ; CHECK-LABEL: sub_two_parts_imm_i64_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: add x8, x0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add x0, x8, #1365 ; CHECK-NEXT: ret %b = sub i64 %a, -11183445 ret i64 %b @@ -229,14 +222,57 @@ define i64 @sub_two_parts_imm_i64_neg(i64 %a) { define i32 @sub_two_parts_imm_i32_neg(i32 %a) { ; CHECK-LABEL: sub_two_parts_imm_i32_neg: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42325 -; CHECK-NEXT: movk w8, #170, lsl #16 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w8, w0, #2730, lsl #12 // =11182080 +; CHECK-NEXT: add w0, w8, #1365 ; CHECK-NEXT: ret %b = sub i32 %a, -11183445 ret i32 %b } +define i32 @add_27962026(i32 %a) { +; CHECK-LABEL: add_27962026: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #43690 +; CHECK-NEXT: movk w8, #426, lsl #16 +; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ret + %b = add i32 %a, 27962026 + ret i32 %b +} + +define i32 @add_65534(i32 %a) { +; CHECK-LABEL: add_65534: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #65534 +; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ret + %b = add i32 %a, 65534 + ret i32 %b +} + +declare i32 @foox(i32) + +define void @add_in_loop(i32 %0) { +; CHECK-LABEL: add_in_loop: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov w19, #43690 +; CHECK-NEXT: movk w19, #170, lsl #16 +; CHECK-NEXT: .LBB15_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add w0, w0, w19 +; CHECK-NEXT: bl foox +; CHECK-NEXT: b .LBB15_1 + br label %2 +2: + %3 = phi i32 [ %0, %1 ], [ %5, %2 ] + %4 = add nsw i32 %3, 11184810 + %5 = tail call i32 @foox(i32 %4) #2 + br label %2 +} + define void @testing() { ; CHECK-LABEL: testing: ; CHECK: // %bb.0: @@ -244,7 +280,7 @@ define void @testing() { ; CHECK-NEXT: ldr x8, [x8, :got_lo12:var_i32] ; CHECK-NEXT: ldr w9, [x8] ; CHECK-NEXT: cmp w9, #4095 -; CHECK-NEXT: b.ne .LBB13_6 +; CHECK-NEXT: b.ne .LBB16_6 ; CHECK-NEXT: // %bb.1: // %test2 ; CHECK-NEXT: adrp x10, :got:var2_i32 ; CHECK-NEXT: add w11, w9, #1 @@ -252,26 +288,26 @@ define void @testing() { ; CHECK-NEXT: str w11, [x8] ; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: cmp w10, #3567, lsl #12 // =14610432 -; CHECK-NEXT: b.lo .LBB13_6 +; CHECK-NEXT: b.lo .LBB16_6 ; CHECK-NEXT: // %bb.2: // %test3 ; CHECK-NEXT: add w11, w9, #2 ; CHECK-NEXT: cmp w9, #123 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: b.lt .LBB13_6 +; CHECK-NEXT: b.lt .LBB16_6 ; CHECK-NEXT: // %bb.3: // %test4 ; CHECK-NEXT: add w11, w9, #3 ; CHECK-NEXT: cmp w10, #321 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: b.gt .LBB13_6 +; CHECK-NEXT: b.gt .LBB16_6 ; CHECK-NEXT: // %bb.4: // %test5 ; CHECK-NEXT: add w11, w9, #4 ; CHECK-NEXT: cmn w10, #443 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: b.ge .LBB13_6 +; CHECK-NEXT: b.ge .LBB16_6 ; CHECK-NEXT: // %bb.5: // %test6 ; CHECK-NEXT: add w9, w9, #5 ; CHECK-NEXT: str w9, [x8] -; CHECK-NEXT: .LBB13_6: // %common.ret +; CHECK-NEXT: .LBB16_6: // %common.ret ; CHECK-NEXT: ret %val = load i32, i32* @var_i32 %val2 = load i32, i32* @var2_i32 diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll index 1c587080f4b68..97e877211b120 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -214,10 +214,9 @@ define void @test5([65536 x i32]** %s, i32 %n) { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #14464 -; CHECK-NEXT: movk w10, #1, lsl #16 ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: add x9, x9, #19, lsl #12 // =77824 +; CHECK-NEXT: add x9, x9, #2176 ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.ge .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %while_body From 0283b07746e879961fd9361579e0da2a62430696 Mon Sep 17 00:00:00 2001 From: luxufan <932494295@qq.com> Date: Sat, 22 Jan 2022 16:07:17 +0800 Subject: [PATCH 252/946] reapply de872382951 "[JITLink] Add anonymous symbols in LinkGraph..." with fixes This reapply `de872382951572b70dfaefe8d77eb98d15586115`, which was reverted in `fdb6578514dd3799ad23c8bbb7699577c0fb414d` Add `# REQUIRES: asserts` in test file `anonymous_symbol.s` to disable this test for non-debug build --- .../llvm/ExecutionEngine/JITLink/riscv.h | 12 +++++------ .../JITLink/ELFLinkGraphBuilder.h | 12 +++++++---- llvm/lib/ExecutionEngine/JITLink/riscv.cpp | 4 ++-- .../JITLink/RISCV/anonymous_symbol.s | 21 +++++++++++++++++++ 4 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h index ed874c53d269b..5abd4cf11deaf 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h @@ -79,6 +79,12 @@ enum EdgeKind_riscv : Edge::Kind { /// Fixup <- (Target - Fixup + Addend) R_RISCV_CALL, + /// 32 bits PC relative relocation + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) + R_RISCV_32_PCREL, + /// PC relative GOT offset /// /// Fixup expression: @@ -162,12 +168,6 @@ enum EdgeKind_riscv : Edge::Kind { /// Fixup expression: /// Fixup <- (Target + Addend) R_RISCV_SET32, - - /// Local label assignment - /// - /// Fixup expression: - /// Fixup <- (Target - Fixup + Addend) - R_RISCV_32_PCREL, }; /// Returns a string name for the given riscv edge. For debugging purposes diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 931a60224ee2f..2ab7ed61f71b4 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -441,11 +441,15 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { << "\"\n"; }); - // Model the section symbols as anonymous symbol. + // In RISCV, temporary symbols (Used to generate dwarf, eh_frame + // sections...) will appear in object code's symbol table, and LLVM does + // not use names on these temporary symbols (RISCV gnu toolchain uses + // names on these temporary symbols). If the symbol is unnamed, add an + // anonymous symbol. auto &GSym = - Sym.getType() == ELF::STT_SECTION - ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, false, - false) + Name->empty() + ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, + false, false) : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L, S, Sym.getType() == ELF::STT_FUNC, false); setGraphSymbol(SymIndex, GSym); diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp index 6efd7abd85ddf..3ce2cf10a24cb 100644 --- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp @@ -38,6 +38,8 @@ const char *getEdgeKindName(Edge::Kind K) { return "R_RISCV_PCREL_LO12_S"; case R_RISCV_CALL: return "R_RISCV_CALL"; + case R_RISCV_32_PCREL: + return "R_RISCV_32_PCREL"; case R_RISCV_ADD64: return "R_RISCV_ADD64"; case R_RISCV_ADD32: @@ -62,8 +64,6 @@ const char *getEdgeKindName(Edge::Kind K) { return "R_RISCV_SET16"; case R_RISCV_SET32: return "R_RISCV_SET32"; - case R_RISCV_32_PCREL: - return "R_RISCV_32_PCREL"; } return getGenericEdgeKindName(K); } diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s new file mode 100644 index 0000000000000..fc1c006095444 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s @@ -0,0 +1,21 @@ +# REQUIRES: asserts +# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t %s +# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s +# +# Because of the exist of cfi directive, sections like eh_frame section will be emitted +# in llvm's object code emission phase. Anonymous symbols will also be emitted to indicate +# the section start and section end. So that by relocating these symbol, the section length +# can be calculated. +# +# CHECK: Creating defined graph symbol for ELF symbol "" +# CHECK: Creating defined graph symbol for ELF symbol "" + .text + .globl main + .p2align 2 + .type main,@function +main: + .cfi_startproc + ret + .Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc From b27e5459d51fd5ba80a1182e5bd8c0fd5e2e6a49 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 22 Jan 2022 13:20:36 +0000 Subject: [PATCH 253/946] [DAG] Convert truncstore(extend(x)) back to store(x) Pulled out of D106237, this folds truncstore(extend(x)) back to store(x) if the original store was legal. This can come up due to the order we fold nodes. A fold from X86 needs to be adjusted to prevent infinite loops, to have it pick the operand of a trunc more directly. Differential Revision: https://reviews.llvm.org/D117901 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 +++++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 3 ++- llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll | 15 +++++++-------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d1f75b40e79db..861beee6386bf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18396,6 +18396,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Value.getValueType().isInteger() && (!isa(Value) || !cast(Value)->isOpaque())) { + // Convert a truncating store of a extension into a standard store. + if ((Value.getOpcode() == ISD::ZERO_EXTEND || + Value.getOpcode() == ISD::SIGN_EXTEND || + Value.getOpcode() == ISD::ANY_EXTEND) && + Value.getOperand(0).getValueType() == ST->getMemoryVT() && + TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT())) + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getMemOperand()); + APInt TruncDemandedBits = APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits()); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 71c80d518f998..17079116a6ae1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48170,7 +48170,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getValue().getOperand(0).getValueType() == MVT::v16i16 && TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) && St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) { - SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue()); + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, + St->getValue().getOperand(0)); return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), MVT::v16i8, St->getMemOperand()); } diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll index 80802658b2a04..2660c3162b107 100644 --- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll +++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll @@ -543,17 +543,16 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s8, s2 ; VI-NEXT: s_mov_b32 s9, s3 -; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 -; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1 -; VI-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 offset:2 +; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2 ; VI-NEXT: s_mov_b32 s4, s0 ; VI-NEXT: s_mov_b32 s5, s1 -; VI-NEXT: s_waitcnt vmcnt(2) +; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:1 -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:2 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:2 +; VI-NEXT: v_lshrrev_b16_e32 v0, 8, v0 +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 offset:1 ; VI-NEXT: s_endpgm %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1 store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1 From 8dedf9b58bff3589bff8cb422e449c4ee7f11499 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Sat, 22 Jan 2022 23:20:14 +0800 Subject: [PATCH 254/946] [PowerPC] Change CTR clobber estimation for 128-bit floating types Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D117459 --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 10 +++- llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll | 52 ++++++++++++++++++- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index ed28731b8ef20..707c1396e5728 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -653,11 +653,17 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, } return true; - } else if (isa(J) && - (J->getType()->getScalarType()->isFP128Ty() || + } else if ((J->getType()->getScalarType()->isFP128Ty() || J->getType()->getScalarType()->isPPC_FP128Ty())) { // Most operations on f128 or ppc_f128 values become calls. return true; + } else if (isa(J) && + J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) { + return true; + } else if ((isa(J) || isa(J)) && + (cast(J)->getSrcTy()->getScalarType()->isFP128Ty() || + cast(J)->getDestTy()->getScalarType()->isFP128Ty())) { + return true; } else if (isa(J) || isa(J) || isa(J) || isa(J)) { CastInst *CI = cast(J); diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll index 57095413cdb26..fde8e20212c03 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \ -; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-unknown | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-unknown | FileCheck %s @a = internal global fp128 0xL00000000000000000000000000000000, align 16 @x = internal global [4 x fp128] zeroinitializer, align 16 @@ -29,4 +31,50 @@ for.end: ; preds = %for.body ; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1) } +define void @fpext_ctrloop_fp128(double* %a) { +entry: + br label %for.body + +for.body: + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %i.06 + %0 = load double, double* %arrayidx, align 8 + %ext = fpext double %0 to fp128 + %arrayidx1 = getelementptr inbounds [4 x fp128], [4 x fp128]* @y, i64 0, i64 %i.06 + store fp128 %ext, fp128* %arrayidx1, align 16 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, 4 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void + +; CHECK-LABEL: fpext_ctrloop_fp128 +; CHECK-NOT: call void @llvm.set.loop.iterations.i64(i64 4) +; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1) +} + +define void @fptrunc_ctrloop_fp128(double* %a) { +entry: + br label %for.body + +for.body: + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [4 x fp128], [4 x fp128]* @x, i64 0, i64 %i.06 + %0 = load fp128, fp128* %arrayidx, align 16 + %trunc = fptrunc fp128 %0 to double + %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.06 + store double %trunc, double* %arrayidx1, align 16 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, 4 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void + +; CHECK-LABEL: fptrunc_ctrloop_fp128 +; CHECK-NOT: call void @llvm.set.loop.iterations.i64(i64 4) +; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1) +} + declare void @obfuscate(i8*, ...) local_unnamed_addr #2 From 00d68c3824bfcb103783b94c4cd8df353e4ee85d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Sat, 22 Jan 2022 23:29:34 +0800 Subject: [PATCH 255/946] [PowerPC] Support parsing GNU attributes in MC This patch is the first step to enable support of GNU attribute in LLVM PowerPC, enabling it for PowerPC targets, otherwise llvm-mc raises error when seeing the attribute section. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D115854 --- llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 12 ++++++++++++ llvm/test/MC/PowerPC/gnu-attribute.s | 11 +++++++++++ 2 files changed, 23 insertions(+) create mode 100644 llvm/test/MC/PowerPC/gnu-attribute.s diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index a640e63b5df84..715cff72dcab4 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -121,6 +121,7 @@ class PPCAsmParser : public MCTargetAsmParser { bool ParseDirectiveMachine(SMLoc L); bool ParseDirectiveAbiVersion(SMLoc L); bool ParseDirectiveLocalEntry(SMLoc L); + bool ParseGNUAttribute(SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -1605,6 +1606,8 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { ParseDirectiveAbiVersion(DirectiveID.getLoc()); else if (IDVal == ".localentry") ParseDirectiveLocalEntry(DirectiveID.getLoc()); + else if (IDVal.startswith(".gnu_attribute")) + ParseGNUAttribute(DirectiveID.getLoc()); else return true; return false; @@ -1720,7 +1723,16 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) { return false; } +bool PPCAsmParser::ParseGNUAttribute(SMLoc L) { + int64_t Tag; + int64_t IntegerValue; + if (!getParser().parseGNUAttribute(L, Tag, IntegerValue)) + return false; + + getParser().getStreamer().emitGNUAttribute(Tag, IntegerValue); + return true; +} /// Force static initialization. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() { diff --git a/llvm/test/MC/PowerPC/gnu-attribute.s b/llvm/test/MC/PowerPC/gnu-attribute.s new file mode 100644 index 0000000000000..98a558d52c6fe --- /dev/null +++ b/llvm/test/MC/PowerPC/gnu-attribute.s @@ -0,0 +1,11 @@ +# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu < %s | FileCheck %s +# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu < %s | FileCheck %s + + .text +add: + add 3, 4, 3 + blr + .gnu_attribute 4, 13 + +# CHECK-LABEL: add: +# CHECK: .gnu_attribute 4, 13 From 5f2854f1daa79373ef67211bebce5e08f087c3b3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 22 Jan 2022 15:34:10 +0000 Subject: [PATCH 256/946] [LV] Always create VPWidenCanonicalIVRecipe, optimize away later. This patch updates createBlockInMask to always generate VPWidenCanonicalIVRecipe and adds a transform to optimize it away later, if it is not needed. This is a step towards breaking up VPWidenIntOrFpInductionRecipe and explicitly distinguishing between vector phis and scalarizing. Split off from D116123. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D117140 --- .../Transforms/Vectorize/LoopVectorize.cpp | 12 +++------ llvm/lib/Transforms/Vectorize/VPlan.h | 27 +++++++++++++++++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 27 +++++++++++++++++++ .../Transforms/Vectorize/VPlanTransforms.h | 4 +++ 4 files changed, 61 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d186ae59a74a2..7b90dcff7bc1e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8418,15 +8418,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { assert(CM.foldTailByMasking() && "must fold the tail"); VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock(); auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi(); - - VPValue *IV = nullptr; - if (Legal->getPrimaryInduction()) - IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); - else { - auto *IVRecipe = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV()); - HeaderVPBB->insert(IVRecipe, NewInsertionPoint); - IV = IVRecipe; - } + auto *IV = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV()); + HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi()); VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint); @@ -9201,6 +9194,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( } } + VPlanTransforms::removeRedundantCanonicalIVs(*Plan); VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 10d5c1b3409a5..824440f98a8b4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1076,6 +1076,12 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { /// Returns true if the induction is canonical, i.e. starting at 0 and /// incremented by UF * VF (= the original IV is incremented by 1). bool isCanonical() const; + + /// Returns the scalar type of the induction. + const Type *getScalarType() const { + const TruncInst *TruncI = getTruncInst(); + return TruncI ? TruncI->getType() : IV->getType(); + } }; /// A pure virtual base class for all recipes modeling header phis, including @@ -1675,6 +1681,11 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns the scalar type of the induction. + const Type *getScalarType() const { + return getOperand(0)->getLiveInIRValue()->getType(); + } }; /// A Recipe for widening the canonical induction variable of the vector loop. @@ -1691,6 +1702,16 @@ class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; } + /// Extra classof implementations to allow directly casting from VPUser -> + /// VPWidenCanonicalIVRecipe. + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; + } + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; + } + /// Generate a canonical vector induction variable of the vector loop, with /// start = { for 0 <= Part < UF}, and /// step = . @@ -1701,6 +1722,12 @@ class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns the scalar type of the induction. + const Type *getScalarType() const { + return cast(getOperand(0)->getDef()) + ->getScalarType(); + } }; /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d2daf558c2c56..fb5f3d4281896 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -324,3 +324,30 @@ void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { E.first->eraseFromParent(); } } + +void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { + VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; + for (VPUser *U : CanonicalIV->users()) { + WidenNewIV = dyn_cast(U); + if (WidenNewIV) + break; + } + + if (!WidenNewIV) + return; + + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *WidenOriginalIV = dyn_cast(&Phi); + + // If the induction recipe is canonical and the types match, use it + // directly. + if (WidenOriginalIV && WidenOriginalIV->isCanonical() && + WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) { + WidenNewIV->replaceAllUsesWith(WidenOriginalIV); + WidenNewIV->eraseFromParent(); + return; + } + } +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index a82a562d5e353..e74409a86466f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -45,6 +45,10 @@ struct VPlanTransforms { /// in the vectorized loop. There is no need to vectorize the cast - the same /// value can be used for both the phi and casts in the vector loop. static void removeRedundantInductionCasts(VPlan &Plan); + + /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV + /// recipe, if it exists. + static void removeRedundantCanonicalIVs(VPlan &Plan); }; } // namespace llvm From 26fffc1b8e755c60727d206a1f07a1ca1d299f48 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sat, 22 Jan 2022 11:53:00 -0500 Subject: [PATCH 257/946] [libc++] [test] {cpo,niebloid}.compile.pass.cpp: Also test their constness. This will detect if someone writes `inline auto cpo =` instead of `inline constexpr auto cpo =`. I don't know how that'd be possible, but it's easy to test, so let's test it. --- .../customization.point.object/cpo.compile.pass.cpp | 3 +++ .../customization.point.object/niebloid.compile.pass.cpp | 1 + 2 files changed, 4 insertions(+) diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp index 4cd461631838d..819aeb454a45c 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp @@ -13,13 +13,16 @@ // [range.adaptor.object] "A range adaptor object is a customization point object..." #include +#include #include #include +#include #include // Test for basic properties of C++20 16.3.3.3.6 [customization.point.object]. template constexpr bool test(CPO& o, Args&&...) { + static_assert(std::is_const_v); static_assert(std::is_class_v); static_assert(std::is_trivial_v); diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp index fcca5813dcb31..532fa5786f8e1 100644 --- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp @@ -32,6 +32,7 @@ template constexpr bool test(CPO& o, Args&&...) { + static_assert(std::is_const_v); static_assert(std::is_class_v); static_assert(std::is_trivial_v); From e9d0f8baf2361b190b0ffde67cad62828fda8ce6 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Fri, 14 Jan 2022 13:34:10 -0800 Subject: [PATCH 258/946] [flang] Don't drop format string for external child I/O In user-defined derived type I/O to an external unit, don't omit the format string from the constructor of ChildFormattedIoStatement. And include any user IOMSG text in the crash message of the parent, if it doesn't catch errors. Differential Revision: https://reviews.llvm.org/D117903 --- flang/runtime/format-implementation.h | 2 +- flang/runtime/io-api.cpp | 2 +- flang/runtime/io-error.cpp | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/flang/runtime/format-implementation.h b/flang/runtime/format-implementation.h index b9c1b8427afe3..6b1a64b96a851 100644 --- a/flang/runtime/format-implementation.h +++ b/flang/runtime/format-implementation.h @@ -353,7 +353,7 @@ DataEdit FormatControl::GetNextDataEdit( ++offset_; } } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') { - // DT'iotype'(v_list) user-defined derived type I/O + // DT['iotype'][(v_list)] user-defined derived type I/O edit.descriptor = DataEdit::DefinedDerivedType; ++offset_; if (auto quote{static_cast(PeekNext())}; diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 64c798d7ff8ae..9c50adf184898 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -215,7 +215,7 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, : "formatted input", false, DIR) ? &child->BeginIoStatement>( - *child, sourceFile, sourceLine) + *child, format, formatLength, sourceFile, sourceLine) : nullptr; } else { IoErrorHandler handler{terminator}; diff --git a/flang/runtime/io-error.cpp b/flang/runtime/io-error.cpp index 48647ec8805e7..e139e0649e503 100644 --- a/flang/runtime/io-error.cpp +++ b/flang/runtime/io-error.cpp @@ -59,10 +59,14 @@ void IoErrorHandler::SignalError(int iostatOrErrno) { void IoErrorHandler::Forward( int ioStatOrErrno, const char *msg, std::size_t length) { - SignalError(ioStatOrErrno); - if (ioStat_ != IostatOk && (flags_ & hasIoMsg)) { + if (ioStat_ != IostatOk && msg && (flags_ & hasIoMsg)) { ioMsg_ = SaveDefaultCharacter(msg, length, *this); } + if (ioStatOrErrno != IostatOk && msg) { + SignalError(ioStatOrErrno, "%.*s", static_cast(length), msg); + } else { + SignalError(ioStatOrErrno); + } } void IoErrorHandler::SignalErrno() { SignalError(errno); } From 896a543e72fd3ab044e64d1140a37c7f4876fc71 Mon Sep 17 00:00:00 2001 From: Peter Klausler Date: Wed, 19 Jan 2022 16:25:41 -0800 Subject: [PATCH 259/946] [flang] Support DECIMAL='COMMA' mode in namelist I/O DECIMAL='COMMA' mode affects item separators, real editing, and complex editing. Differential Revision: https://reviews.llvm.org/D117906 --- flang/runtime/descriptor-io.h | 1 + flang/runtime/edit-input.cpp | 10 ++++++--- flang/runtime/io-stmt.cpp | 5 +++-- flang/runtime/io-stmt.h | 7 ++++++- flang/runtime/namelist.cpp | 17 +++++++++++---- flang/unittests/Runtime/Namelist.cpp | 31 ++++++++++++++++++++++++++++ 6 files changed, 61 insertions(+), 10 deletions(-) diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h index 03b7e798af431..1e78a826f04db 100644 --- a/flang/runtime/descriptor-io.h +++ b/flang/runtime/descriptor-io.h @@ -124,6 +124,7 @@ inline bool FormattedComplexIO( DataEdit rEdit, iEdit; rEdit.descriptor = DataEdit::ListDirectedRealPart; iEdit.descriptor = DataEdit::ListDirectedImaginaryPart; + rEdit.modes = iEdit.modes = io.mutableModes(); if (!RealOutputEditing{io, x[0]}.Edit(rEdit) || !RealOutputEditing{io, x[1]}.Edit(iEdit)) { return false; diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index 7fe4bfaf9a89e..dff79841cf18c 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -48,6 +48,10 @@ static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n, return true; } +static inline char32_t GetDecimalPoint(const DataEdit &edit) { + return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; +} + // Prepares input from a field, and consumes the sign, if any. // Returns true if there's a '-' sign. static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, @@ -59,7 +63,7 @@ static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, if (negative || *next == '+') { io.GotChar(); io.SkipSpaces(remaining); - next = io.NextInField(remaining); + next = io.NextInField(remaining, GetDecimalPoint(edit)); } } return negative; @@ -154,7 +158,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, Put('0'); return got; } - char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.'; + char32_t decimal{GetDecimalPoint(edit)}; char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; if (first == 'N' || first == 'I') { // NaN or infinity - convert to upper case @@ -179,7 +183,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, Put('.'); // input field is normalized to a fraction auto start{got}; bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; - for (; next; next = io.NextInField(remaining)) { + for (; next; next = io.NextInField(remaining, decimal)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { if (bzMode) { diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 0a544d69958bf..52d0a1ebe6a39 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -580,13 +580,13 @@ ListDirectedStatementState::GetNextDataEdit( DataEdit edit; edit.descriptor = DataEdit::ListDirected; edit.repeat = 1; // may be overridden below - edit.modes = connection.modes; + edit.modes = io.mutableModes(); if (hitSlash_) { // everything after '/' is nullified edit.descriptor = DataEdit::ListDirectedNullValue; return edit; } char32_t comma{','}; - if (io.mutableModes().editingFlags & decimalComma) { + if (edit.modes.editingFlags & decimalComma) { comma = ';'; } if (remaining_ > 0 && !realPart_) { // "r*c" repetition in progress @@ -619,6 +619,7 @@ ListDirectedStatementState::GetNextDataEdit( // Consume comma & whitespace after previous item. // This includes the comma between real and imaginary components // in list-directed/NAMELIST complex input. + // (When DECIMAL='COMMA', the comma is actually a semicolon.) io.HandleRelativePosition(1); ch = io.GetNextNonBlank(); } diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index baf038dbdd245..32e546e5c082d 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -163,9 +163,14 @@ class IoStatementState { return std::nullopt; } - std::optional NextInField(std::optional &remaining) { + std::optional NextInField( + std::optional &remaining, char32_t decimal = '.') { if (!remaining) { // list-directed or NAMELIST: check for separators if (auto next{GetCurrentChar()}) { + if (*next == decimal) { // can be ',' + HandleRelativePosition(1); + return next; + } switch (*next) { case ' ': case '\t': diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp index fde828fddf443..8d291619b8f5c 100644 --- a/flang/runtime/namelist.cpp +++ b/flang/runtime/namelist.cpp @@ -20,11 +20,17 @@ namespace Fortran::runtime::io { // NAMELIST input, plus a byte for NUL termination. static constexpr std::size_t nameBufferSize{201}; +static inline char32_t GetComma(IoStatementState &io) { + return io.mutableModes().editingFlags & decimalComma ? char32_t{';'} + : char32_t{','}; +} + bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { IoStatementState &io{*cookie}; io.CheckFormattedStmtType("OutputNamelist"); ConnectionState &connection{io.GetConnectionState()}; connection.modes.inNamelist = true; + char comma{static_cast(GetComma(io))}; // Internal functions to advance records and convert case const auto EmitWithAdvance{[&](char ch) -> bool { return (!connection.NeedAdvance(1) || io.AdvanceRecord()) && @@ -51,7 +57,7 @@ bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { for (std::size_t j{0}; j < group.items; ++j) { // [,]ITEM=... const NamelistGroup::Item &item{group.item[j]}; - if (!(EmitWithAdvance(j == 0 ? ' ' : ',') && EmitUpperCase(item.name) && + if (!(EmitWithAdvance(j == 0 ? ' ' : comma) && EmitUpperCase(item.name) && EmitWithAdvance('=') && descr::DescriptorIO(io, item.descriptor))) { return false; @@ -137,6 +143,7 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, std::size_t contiguousStride{source.ElementBytes()}; bool ok{true}; std::optional ch{io.GetNextNonBlank()}; + char32_t comma{GetComma(io)}; for (; ch && *ch != ')'; ++j) { SubscriptValue dimLower{0}, dimUpper{0}, dimStride{0}; if (j < maxRank && j < source.rank()) { @@ -197,7 +204,7 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, dimUpper = dimLower; dimStride = 0; } - if (ch && *ch == ',') { + if (ch && *ch == comma) { io.HandleRelativePosition(1); ch = io.GetNextNonBlank(); } @@ -358,6 +365,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { std::optional next; char name[nameBufferSize]; RUNTIME_CHECK(handler, group.groupName != nullptr); + char32_t comma{GetComma(io)}; while (true) { next = io.GetNextNonBlank(); while (next && *next != '&') { @@ -391,7 +399,8 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { } if (!GetLowerCaseName(io, name, sizeof name)) { handler.SignalError( - "NAMELIST input group '%s' was not terminated", group.groupName); + "NAMELIST input group '%s' was not terminated at '%c'", + group.groupName, static_cast(*next)); return false; } std::size_t itemIndex{0}; @@ -461,7 +470,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { return false; } next = io.GetNextNonBlank(); - if (next && *next == ',') { + if (next && *next == comma) { io.HandleRelativePosition(1); } } diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp index 38305f729b145..ba0bae6468a03 100644 --- a/flang/unittests/Runtime/Namelist.cpp +++ b/flang/unittests/Runtime/Namelist.cpp @@ -274,4 +274,35 @@ TEST(NamelistTests, Skip) { EXPECT_EQ(got, expect); } +// Tests DECIMAL=COMMA mode +TEST(NamelistTests, Comma) { + OwningPtr scDesc{ + MakeArray(sizeof(float))>( + std::vector{2}, std::vector>{{}, {}})}; + const NamelistGroup::Item items[]{{"z", *scDesc}}; + const NamelistGroup group{"nml", 1, items}; + static char t1[]{"&nml z=(-1,0;2,0);(-3,0;0,5)/"}; + StaticDescriptor<1, true> statDesc; + Descriptor &internalDesc{statDesc.descriptor()}; + internalDesc.Establish(TypeCode{CFI_type_char}, + /*elementBytes=*/std::strlen(t1), t1, 0, nullptr, CFI_attribute_pointer); + auto inCookie{IONAME(BeginInternalArrayListInput)( + internalDesc, nullptr, 0, __FILE__, __LINE__)}; + ASSERT_TRUE(IONAME(SetDecimal)(inCookie, "COMMA", 5)); + ASSERT_TRUE(IONAME(InputNamelist)(inCookie, group)); + ASSERT_EQ(IONAME(EndIoStatement)(inCookie), IostatOk) + << "namelist input with skipping"; + char out[30]; + internalDesc.Establish(TypeCode{CFI_type_char}, /*elementBytes=*/sizeof out, + out, 0, nullptr, CFI_attribute_pointer); + auto outCookie{IONAME(BeginInternalArrayListOutput)( + internalDesc, nullptr, 0, __FILE__, __LINE__)}; + ASSERT_TRUE(IONAME(SetDecimal)(outCookie, "COMMA", 5)); + ASSERT_TRUE(IONAME(OutputNamelist)(outCookie, group)); + ASSERT_EQ(IONAME(EndIoStatement)(outCookie), IostatOk) << "namelist output"; + std::string got{out, sizeof out}; + static const std::string expect{"&NML Z= (-1,;2,) (-3,;,5)/ "}; + EXPECT_EQ(got, expect); +} + // TODO: Internal NAMELIST error tests From 0a6b4258ab0e773b7b2b06db1317faf556931481 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 22 Jan 2022 01:33:49 -0500 Subject: [PATCH 260/946] [openmp][cmake] Use `GNUInstallDirs` to support custom installation dirs I am breaking apart D99484 so the cause of build failures is easier to understand. Differential Revision: https://reviews.llvm.org/D117945 --- openmp/CMakeLists.txt | 5 +++++ openmp/libompd/src/CMakeLists.txt | 2 +- openmp/runtime/cmake/LibompCheckLinkerFlag.cmake | 2 ++ openmp/runtime/src/CMakeLists.txt | 6 +++--- openmp/tools/multiplex/CMakeLists.txt | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index 7f11a05f56227..4530281aff268 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -7,7 +7,12 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) if (OPENMP_STANDALONE_BUILD OR "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") set(OPENMP_STANDALONE_BUILD TRUE) project(openmp C CXX) +endif() + +# Must go below project(..) +include(GNUInstallDirs) +if (OPENMP_STANDALONE_BUILD) # CMAKE_BUILD_TYPE was not set, default to Release. if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) diff --git a/openmp/libompd/src/CMakeLists.txt b/openmp/libompd/src/CMakeLists.txt index 9c203bdd6b4fc..25a850ed457d7 100644 --- a/openmp/libompd/src/CMakeLists.txt +++ b/openmp/libompd/src/CMakeLists.txt @@ -47,4 +47,4 @@ include_directories ( INSTALL( TARGETS ompd LIBRARY DESTINATION ${OPENMP_INSTALL_LIBDIR} ARCHIVE DESTINATION ${OPENMP_INSTALL_LIBDIR} - RUNTIME DESTINATION bin ) + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" ) diff --git a/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake b/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake index fb284599ca386..4c30514af88e5 100644 --- a/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake +++ b/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake @@ -8,6 +8,8 @@ #//===----------------------------------------------------------------------===// # +include(GNUInstallDirs) + # Checking a linker flag to build a shared library # There is no real trivial way to do this in CMake, so we implement it here # this will have ${boolean} = TRUE if the flag succeeds, otherwise FALSE. diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index e4f4e6e1e73ff..27e748c4d6dc1 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -346,19 +346,19 @@ add_dependencies(libomp-micro-tests libomp-test-deps) # We want to install libomp in DESTDIR/CMAKE_INSTALL_PREFIX/lib # We want to install headers in DESTDIR/CMAKE_INSTALL_PREFIX/include if(${OPENMP_STANDALONE_BUILD}) - set(LIBOMP_HEADERS_INSTALL_PATH include) + set(LIBOMP_HEADERS_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}") else() string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION ${PACKAGE_VERSION}) set(LIBOMP_HEADERS_INSTALL_PATH "${OPENMP_INSTALL_LIBDIR}/clang/${CLANG_VERSION}/include") endif() if(WIN32) - install(TARGETS omp RUNTIME DESTINATION bin) + install(TARGETS omp RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") install(TARGETS ${LIBOMP_IMP_LIB_TARGET} ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") # Create aliases (regular copies) of the library for backwards compatibility set(LIBOMP_ALIASES "libiomp5md") foreach(alias IN LISTS LIBOMP_ALIASES) install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_LIB_FILE}\" - \"${alias}${LIBOMP_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\${CMAKE_INSTALL_PREFIX}/bin\")") + \"${alias}${LIBOMP_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\${CMAKE_INSTALL_BINDIR}\")") install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_IMP_LIB_FILE}\" \"${alias}${CMAKE_STATIC_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\${CMAKE_INSTALL_PREFIX}/${OPENMP_INSTALL_LIBDIR}\")") endforeach() diff --git a/openmp/tools/multiplex/CMakeLists.txt b/openmp/tools/multiplex/CMakeLists.txt index 64317c112176c..8b50e95899cc4 100644 --- a/openmp/tools/multiplex/CMakeLists.txt +++ b/openmp/tools/multiplex/CMakeLists.txt @@ -4,7 +4,7 @@ if(LIBOMP_OMPT_SUPPORT) add_library(ompt-multiplex INTERFACE) target_include_directories(ompt-multiplex INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) - install(FILES ompt-multiplex.h DESTINATION include) + install(FILES ompt-multiplex.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") add_subdirectory(tests) endif() From d44b6be6eaa8c165d3526d61dcc0f1c459e5722f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 22 Jan 2022 11:55:13 -0800 Subject: [PATCH 261/946] [RISCV] Don't Custom legalize f16/f32/f64 bitcasts if those types aren't Legal. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e1f1c49094424..411191343cf04 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1025,9 +1025,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::i32, Custom); setOperationAction(ISD::BITCAST, MVT::i64, Custom); - setOperationAction(ISD::BITCAST, MVT::f16, Custom); - setOperationAction(ISD::BITCAST, MVT::f32, Custom); - setOperationAction(ISD::BITCAST, MVT::f64, Custom); + if (Subtarget.hasStdExtZfh()) + setOperationAction(ISD::BITCAST, MVT::f16, Custom); + if (Subtarget.hasStdExtF()) + setOperationAction(ISD::BITCAST, MVT::f32, Custom); + if (Subtarget.hasStdExtD()) + setOperationAction(ISD::BITCAST, MVT::f64, Custom); } } From 39e602b6c4335b2572c74eaf9a666bebde7fd8b5 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 22 Jan 2022 12:36:12 -0500 Subject: [PATCH 262/946] [InstCombine] try to fold binop with phi operands This is an alternate version of D115914 that handles/tests all binary opcodes. I suspect that we don't see these patterns too often because -simplifycfg would convert the minimal cases into selects rather than leave them in phi form (note: instcombine has logic holes for combining the select patterns too though, so that's another potential patch). We only create a new binop in a predecessor that unconditionally branches to the final block. https://alive2.llvm.org/ce/z/C57M2F https://alive2.llvm.org/ce/z/WHwAoU (not safe to speculate an sdiv for example) https://alive2.llvm.org/ce/z/rdVUvW (but it is ok on this path) Differential Revision: https://reviews.llvm.org/D117110 --- .../InstCombine/InstCombineAddSub.cpp | 12 ++ .../InstCombine/InstCombineAndOrXor.cpp | 9 ++ .../InstCombine/InstCombineInternal.h | 10 ++ .../InstCombine/InstCombineMulDivRem.cpp | 18 +++ .../InstCombine/InstCombineShifts.cpp | 3 + .../InstCombine/InstructionCombining.cpp | 64 +++++++++ .../InstCombine/binop-phi-operands.ll | 122 ++++++++---------- .../Transforms/InstCombine/zext-or-icmp.ll | 12 +- 8 files changed, 171 insertions(+), 79 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index a2e875638d7d6..0598f751febe2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1288,6 +1288,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); @@ -1536,6 +1539,9 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I)) return FoldedFAdd; @@ -1751,6 +1757,9 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // If this is a 'B = x-(-A)', change to B = x+A. @@ -2313,6 +2322,9 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + // Subtraction from -0.0 is the canonical form of fneg. // fsub -0.0, X ==> fneg X // fsub nsz 0.0, X ==> fneg nsz X diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index fe6a6c1203fd2..6bbb0251f2bc0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1872,6 +1872,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -2665,6 +2668,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -3553,6 +3559,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + if (Instruction *NewXor = foldXorToXor(I, Builder)) return NewXor; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index f92ee31a3de26..a7d1ff202e5e7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -608,6 +608,16 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final /// only possible if all operands to the PHI are constants). Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN); + /// For a binary operator with 2 phi operands, try to hoist the binary + /// operation before the phi. This can result in fewer instructions in + /// patterns where at least one set of phi operands simplifies. + /// Example: + /// BB3: binop (phi [X, BB1], [C1, BB2]), (phi [Y, BB1], [C2, BB2]) + /// --> + /// BB1: BO = binop X, Y + /// BB3: phi [BO, BB1], [(binop C1, C2), BB2] + Instruction *foldBinopWithPhiOperands(BinaryOperator &BO); + /// Given an instruction with a select as one operand and a constant as the /// other operand, try to fold the binary operator into the select arguments. /// This also works for Cast instructions, which obviously do not have a diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 076c3134d0782..1aa10b550fc40 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -155,6 +155,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); @@ -450,6 +453,9 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I)) return FoldedMul; @@ -750,6 +756,9 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient, /// division instructions. /// Common integer divide transforms Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) { + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); bool IsSigned = I.getOpcode() == Instruction::SDiv; Type *Ty = I.getType(); @@ -1367,6 +1376,9 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + if (Instruction *R = foldFDivConstantDivisor(I)) return R; @@ -1468,6 +1480,9 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { /// remainder instructions. /// Common integer remainder transforms Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) { + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. @@ -1646,5 +1661,8 @@ Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) { if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 9acad19df9df5..17f0c5c4cff0e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -369,6 +369,9 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I, } Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) { + if (Instruction *Phi = foldBinopWithPhiOperands(I)) + return Phi; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 89c5fef18eca8..32f6a980afa8d 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1287,6 +1287,70 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { return replaceInstUsesWith(I, NewPN); } +Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { + // TODO: This should be similar to the incoming values check in foldOpIntoPhi: + // we are guarding against replicating the binop in >1 predecessor. + // This could miss matching a phi with 2 constant incoming values. + auto *Phi0 = dyn_cast(BO.getOperand(0)); + auto *Phi1 = dyn_cast(BO.getOperand(1)); + if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() || + Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) + return nullptr; + + // TODO: Remove the restriction for binop being in the same block as the phis. + if (BO.getParent() != Phi0->getParent() || + BO.getParent() != Phi1->getParent()) + return nullptr; + + // Match a pair of incoming constants for one of the predecessor blocks. + BasicBlock *ConstBB, *OtherBB; + Constant *C0, *C1; + if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) { + ConstBB = Phi0->getIncomingBlock(0); + OtherBB = Phi0->getIncomingBlock(1); + } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) { + ConstBB = Phi0->getIncomingBlock(1); + OtherBB = Phi0->getIncomingBlock(0); + } else { + return nullptr; + } + if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1))) + return nullptr; + + // The block that we are hoisting to must reach here unconditionally. + // Otherwise, we could be speculatively executing an expensive or + // non-speculative op. + auto *PredBlockBranch = dyn_cast(OtherBB->getTerminator()); + if (!PredBlockBranch || PredBlockBranch->isConditional() || + !DT.isReachableFromEntry(OtherBB)) + return nullptr; + + // TODO: This check could be tightened to only apply to binops (div/rem) that + // are not safe to speculatively execute. But that could allow hoisting + // potentially expensive instructions (fdiv for example). + for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter) + if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter)) + return nullptr; + + // Make a new binop in the predecessor block with the non-constant incoming + // values. + Builder.SetInsertPoint(PredBlockBranch); + Value *NewBO = Builder.CreateBinOp(BO.getOpcode(), + Phi0->getIncomingValueForBlock(OtherBB), + Phi1->getIncomingValueForBlock(OtherBB)); + if (auto *NotFoldedNewBO = dyn_cast(NewBO)) + NotFoldedNewBO->copyIRFlags(&BO); + + // Fold constants for the predecessor block with constant incoming values. + Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1); + + // Replace the binop with a phi of the new values. The old phis are dead. + PHINode *NewPhi = PHINode::Create(BO.getType(), 2); + NewPhi->addIncoming(NewBO, OtherBB); + NewPhi->addIncoming(NewC, ConstBB); + return NewPhi; +} + Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { if (!isa(I.getOperand(1))) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/binop-phi-operands.ll b/llvm/test/Transforms/InstCombine/binop-phi-operands.ll index 81428e898eda2..b43081383156f 100644 --- a/llvm/test/Transforms/InstCombine/binop-phi-operands.ll +++ b/llvm/test/Transforms/InstCombine/binop-phi-operands.ll @@ -4,6 +4,8 @@ declare void @use(i32) declare void @sideeffect() +; negative test (but we could allow this?) - don't hoist to conditional predecessor block + define i32 @add_const_incoming0_speculative(i1 %b, i32 %x, i32 %y) { ; CHECK-LABEL: @add_const_incoming0_speculative( ; CHECK-NEXT: entry: @@ -34,11 +36,10 @@ define i32 @add_const_incoming0_nonspeculative(i1 %b, i32 %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[X:%.*]], [[IF]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i32 [ 17, [[ENTRY]] ], [ [[Y:%.*]], [[IF]] ] -; CHECK-NEXT: [[R:%.*]] = add i32 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 59, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -54,6 +55,8 @@ then: ret i32 %r } +; negative test (but we could allow this?) - don't hoist to conditional predecessor block + define i32 @sub_const_incoming0(i1 %b, i32 %x, i32 %y) { ; CHECK-LABEL: @sub_const_incoming0( ; CHECK-NEXT: entry: @@ -84,11 +87,10 @@ define i32 @sub_const_incoming1(i1 %b, i32 %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i32 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i32 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = sub i32 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 25, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: @@ -109,11 +111,10 @@ define i8 @mul_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = mul i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ -54, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -134,11 +135,10 @@ define i8 @and_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = and i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -159,11 +159,10 @@ define i8 @xor_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 59, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -184,11 +183,10 @@ define i64 @or_const_incoming1(i1 %b, i64 %x, i64 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i64 [ [[X:%.*]], [[IF]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i64 [ [[Y:%.*]], [[IF]] ], [ 16, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = or i64 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP0]], [[IF]] ], [ 19, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i64 [[R]] ; entry: @@ -209,11 +207,10 @@ define i64 @or_const_incoming01(i1 %b, i64 %x, i64 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ], [ [[X:%.*]], [[IF]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i64 [ 16, [[ENTRY]] ], [ [[Y:%.*]], [[IF]] ] -; CHECK-NEXT: [[R:%.*]] = or i64 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP0]], [[IF]] ], [ 19, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i64 [[R]] ; entry: @@ -234,11 +231,10 @@ define i64 @or_const_incoming10(i1 %b, i64 %x, i64 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i64 [ [[Y:%.*]], [[IF]] ], [ 16, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i64 [ [[X:%.*]], [[IF]] ], [ 3, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = or i64 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP0]], [[IF]] ], [ 19, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i64 [[R]] ; entry: @@ -254,6 +250,8 @@ then: ret i64 %r } +; negative test (but we could allow this?) - don't hoist to conditional predecessor block + define i8 @ashr_const_incoming0_speculative(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @ashr_const_incoming0_speculative( ; CHECK-NEXT: entry: @@ -284,11 +282,10 @@ define i8 @ashr_const_incoming0(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = ashr i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ 42, [[ENTRY:%.*]] ], [ [[X:%.*]], [[IF]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ 3, [[ENTRY]] ], [ [[Y:%.*]], [[IF]] ] -; CHECK-NEXT: [[R:%.*]] = ashr i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 5, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -309,11 +306,10 @@ define i8 @lshr_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 3, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = lshr i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 5, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -334,11 +330,10 @@ define i8 @shl_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 3, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = shl nuw nsw i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 80, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -384,11 +379,10 @@ define i8 @sdiv_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ -42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ -2, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -409,11 +403,10 @@ define i8 @udiv_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ -42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = udiv i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 12, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -434,11 +427,10 @@ define i8 @srem_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = srem i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ -17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = srem i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 8, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -459,11 +451,10 @@ define i8 @urem_const_incoming1(i1 %b, i8 %x, i8 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ -17, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = urem i8 [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 42, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i8 [[R]] ; entry: @@ -484,11 +475,10 @@ define float @fmul_const_incoming1(i1 %b, float %x, float %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = fmul float [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = fmul float [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 7.140000e+02, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[R]] ; entry: @@ -509,11 +499,10 @@ define float @fadd_const_incoming1(i1 %b, float %x, float %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = fadd fast float [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 5.900000e+01, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[R]] ; entry: @@ -534,11 +523,10 @@ define float @fsub_const_incoming1(i1 %b, float %x, float %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = fsub nnan ninf float [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = fsub nnan ninf float [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 2.500000e+01, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[R]] ; entry: @@ -559,11 +547,10 @@ define float @frem_const_incoming1(i1 %b, float %x, float %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]] ; CHECK: if: +; CHECK-NEXT: [[TMP0:%.*]] = frem nsz float [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: br label [[THEN]] ; CHECK: then: -; CHECK-NEXT: [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ] -; CHECK-NEXT: [[R:%.*]] = frem nsz float [[P0]], [[P1]] +; CHECK-NEXT: [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 8.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[R]] ; entry: @@ -658,6 +645,9 @@ then: ret i64 %r } +; The mul could be hoisted before the call that may not return +; if we are ok with speculating a potentially expensive op. + define i8 @mul_const_incoming0_speculatable(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @mul_const_incoming0_speculatable( ; CHECK-NEXT: entry: @@ -685,6 +675,8 @@ then: ret i8 %r } +; The udiv should never be hoisted before the call that may not return. + define i8 @udiv_const_incoming0_not_speculatable(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @udiv_const_incoming0_not_speculatable( ; CHECK-NEXT: entry: @@ -712,6 +704,8 @@ then: ret i8 %r } +; TODO: It is ok to hoist the udiv even though it is not in the same block as the phis. + define i8 @udiv_const_incoming0_different_block(i1 %b, i8 %x, i8 %y) { ; CHECK-LABEL: @udiv_const_incoming0_different_block( ; CHECK-NEXT: entry: @@ -750,20 +744,10 @@ define { i64, i32 } @ParseRetVal(i1 %b, { i64, i32 } ()* %x) { ; CHECK-NEXT: [[T4:%.*]] = tail call { i64, i32 } [[X:%.*]]() ; CHECK-NEXT: [[T5:%.*]] = extractvalue { i64, i32 } [[T4]], 0 ; CHECK-NEXT: [[T6:%.*]] = extractvalue { i64, i32 } [[T4]], 1 -; CHECK-NEXT: [[T7:%.*]] = and i64 [[T5]], -4294967296 -; CHECK-NEXT: [[T8:%.*]] = and i64 [[T5]], 4294901760 -; CHECK-NEXT: [[T9:%.*]] = and i64 [[T5]], 65280 -; CHECK-NEXT: [[T10:%.*]] = and i64 [[T5]], 255 ; CHECK-NEXT: br label [[F]] ; CHECK: f: -; CHECK-NEXT: [[T12:%.*]] = phi i64 [ [[T10]], [[T]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[T13:%.*]] = phi i64 [ [[T9]], [[T]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[T14:%.*]] = phi i64 [ [[T8]], [[T]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[T15:%.*]] = phi i64 [ [[T7]], [[T]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[T16:%.*]] = phi i32 [ [[T6]], [[T]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[T17:%.*]] = or i64 [[T13]], [[T12]] -; CHECK-NEXT: [[T18:%.*]] = or i64 [[T17]], [[T14]] -; CHECK-NEXT: [[T19:%.*]] = or i64 [[T18]], [[T15]] +; CHECK-NEXT: [[T16:%.*]] = phi i32 [ [[T6]], [[T]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[T19:%.*]] = phi i64 [ [[T5]], [[T]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[T20:%.*]] = insertvalue { i64, i32 } poison, i64 [[T19]], 0 ; CHECK-NEXT: [[T21:%.*]] = insertvalue { i64, i32 } [[T20]], i32 [[T16]], 1 ; CHECK-NEXT: ret { i64, i32 } [[T21]] diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index 6b7f1d029cb67..68d1e8addd35a 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -46,11 +46,7 @@ define i32 @dont_widen_undef() { ; CHECK: block1: ; CHECK-NEXT: br label [[BLOCK2]] ; CHECK: block2: -; CHECK-NEXT: [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ] -; CHECK-NEXT: [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]] -; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[CMP1]] to i32 -; CHECK-NEXT: ret i32 [[CONV2]] +; CHECK-NEXT: ret i32 1 ; entry: br label %block2 @@ -76,11 +72,7 @@ define i32 @dont_widen_undef_logical() { ; CHECK: block1: ; CHECK-NEXT: br label [[BLOCK2]] ; CHECK: block2: -; CHECK-NEXT: [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ] -; CHECK-NEXT: [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]] -; CHECK-NEXT: [[CONV2:%.*]] = zext i1 [[CMP1]] to i32 -; CHECK-NEXT: ret i32 [[CONV2]] +; CHECK-NEXT: ret i32 1 ; entry: br label %block2 From 7c16647c3676587391f6bb80ec87d9621ca9472f Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 16 Jan 2022 05:52:22 +0000 Subject: [PATCH 263/946] [clang-tools-extra][cmake] Use `GNUInstallDirs` to support custom installation dirs. This is the original patch in my GNUInstallDirs series, now last to merge as the final piece! It arose as a new draft of D28234. I initially did the unorthodox thing of pushing to that when I wasn't the original author, but since I ended up - Using `GNUInstallDirs`, rather than mimicking it, as the original author was hesitant to do but others requested. - Converting all the packages, not just LLVM, effecting many more projects than LLVM itself. I figured it was time to make a new revision. I have used this patch series (and many back-ports) as the basis of https://github.com/NixOS/nixpkgs/pull/111487 for my distro (NixOS), which was merged last spring (2021). It looked like people were generally on board in D28234, but I make note of this here in case extra motivation is useful. --- As pointed out in the original issue, a central tension is that LLVM already has some partial support for these sorts of things. Variables like `COMPILER_RT_INSTALL_PATH` have already been dealt with. Variables like `LLVM_LIBDIR_SUFFIX` however, will require further work, so that we may use `CMAKE_INSTALL_LIBDIR`. These remaining items will be addressed in further patches. What is here is now rote and so we should get it out of the way before dealing more intricately with the remainder. Reviewed By: #libunwind, #libc, #libc_abi, compnerd Differential Revision: https://reviews.llvm.org/D99484 --- clang-tools-extra/CMakeLists.txt | 1 + clang-tools-extra/clang-doc/tool/CMakeLists.txt | 4 ++-- .../clang-include-fixer/find-all-symbols/tool/CMakeLists.txt | 2 +- clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt | 4 ++-- clang-tools-extra/clang-tidy/CMakeLists.txt | 2 +- clang-tools-extra/clang-tidy/tool/CMakeLists.txt | 4 ++-- clang-tools-extra/modularize/CMakeLists.txt | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt index 2e73b6ba81d2e..7b8274a97336b 100644 --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -1,4 +1,5 @@ include(CMakeDependentOption) +include(GNUInstallDirs) option(CLANG_TIDY_ENABLE_STATIC_ANALYZER "Include static analyzer checks in clang-tidy" ON) diff --git a/clang-tools-extra/clang-doc/tool/CMakeLists.txt b/clang-tools-extra/clang-doc/tool/CMakeLists.txt index 7e71478869160..fb8317b272932 100644 --- a/clang-tools-extra/clang-doc/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/tool/CMakeLists.txt @@ -19,9 +19,9 @@ target_link_libraries(clang-doc ) install(FILES ../assets/clang-doc-default-stylesheet.css - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-doc) install(FILES ../assets/index.js - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-doc) diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt index 8f5509d22e24a..e6926a0d5bd10 100644 --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt @@ -20,5 +20,5 @@ target_link_libraries(find-all-symbols ) install(PROGRAMS run-find-all-symbols.py - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT find-all-symbols) diff --git a/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt index 3936ac1e8a5a5..5b9e00ab87cd8 100644 --- a/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt @@ -21,8 +21,8 @@ target_link_libraries(clang-include-fixer ) install(PROGRAMS clang-include-fixer.el - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-include-fixer) install(PROGRAMS clang-include-fixer.py - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-include-fixer) diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 455645050d93d..075e9f9909d65 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -113,7 +113,7 @@ add_subdirectory(utils) if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY . - DESTINATION include/clang-tidy + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/clang-tidy" COMPONENT clang-tidy-headers FILES_MATCHING PATTERN "*.h" diff --git a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt index ad3255b024fc6..4b8c93801501a 100644 --- a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt @@ -52,9 +52,9 @@ target_link_libraries(clang-tidy install(PROGRAMS clang-tidy-diff.py - DESTINATION share/clang + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" COMPONENT clang-tidy) install(PROGRAMS run-clang-tidy.py - DESTINATION bin + DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clang-tidy RENAME run-clang-tidy) diff --git a/clang-tools-extra/modularize/CMakeLists.txt b/clang-tools-extra/modularize/CMakeLists.txt index 4caae81c49b62..fb17e353c39fd 100644 --- a/clang-tools-extra/modularize/CMakeLists.txt +++ b/clang-tools-extra/modularize/CMakeLists.txt @@ -23,5 +23,5 @@ clang_target_link_libraries(modularize ) install(TARGETS modularize - RUNTIME DESTINATION bin + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clang-extras) From c1988dbf2d191d771e3a396eaa6396500965787d Mon Sep 17 00:00:00 2001 From: Malhar Jajoo Date: Sat, 22 Jan 2022 22:05:38 +0000 Subject: [PATCH 264/946] [openmp] Allow x87 fp functions only in Openmp runtime for x86. This patch allows Openmp runtime atomic functions operating on x87 high-precision to be present only in Openmp runtime for x86 architectures The functions affected are: __kmpc_atomic_10 __kmpc_atomic_20 __kmpc_atomic_cmplx10_add __kmpc_atomic_cmplx10_div __kmpc_atomic_cmplx10_mul __kmpc_atomic_cmplx10_sub __kmpc_atomic_float10_add __kmpc_atomic_float10_div __kmpc_atomic_float10_mul __kmpc_atomic_float10_sub __kmpc_atomic_float10_add_fp __kmpc_atomic_float10_div_fp __kmpc_atomic_float10_mul_fp __kmpc_atomic_float10_sub_fp __kmpc_atomic_float10_max __kmpc_atomic_float10_min Differential Revision: https://reviews.llvm.org/D117473 --- openmp/runtime/src/kmp_atomic.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp index 83d646054f633..0bd7b1a41ac46 100644 --- a/openmp/runtime/src/kmp_atomic.cpp +++ b/openmp/runtime/src/kmp_atomic.cpp @@ -1235,10 +1235,12 @@ MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86) // __kmpc_atomic_float8_max MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86) // __kmpc_atomic_float8_min +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 MIN_MAX_CRITICAL(float10, max, long double, <, 10r, 1) // __kmpc_atomic_float10_max MIN_MAX_CRITICAL(float10, min, long double, >, 10r, 1) // __kmpc_atomic_float10_min +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 #if KMP_HAVE_QUAD MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1) // __kmpc_atomic_float16_max @@ -1317,6 +1319,7 @@ ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, } /* ------------------------------------------------------------------------- */ +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 // routines for long double type ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1) // __kmpc_atomic_float10_add @@ -1326,6 +1329,7 @@ ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1) // __kmpc_atomic_float10_mul ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1) // __kmpc_atomic_float10_div +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 #if KMP_HAVE_QUAD // routines for _Quad type ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, @@ -1371,6 +1375,7 @@ ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1) // __kmpc_atomic_cmplx10_add ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, @@ -1379,6 +1384,7 @@ ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1) // __kmpc_atomic_cmplx10_mul ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1) // __kmpc_atomic_cmplx10_div +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 #if KMP_HAVE_QUAD ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1) // __kmpc_atomic_cmplx16_add @@ -1797,6 +1803,7 @@ ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1) // __kmpc_atomic_float10_add_fp ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, @@ -1806,7 +1813,6 @@ ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1) // __kmpc_atomic_float10_div_fp -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 // Reverse operations ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp @@ -3594,7 +3600,7 @@ void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); } } - +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, void (*f)(void *, void *, void *)) { KMP_DEBUG_ASSERT(__kmp_init_serial); @@ -3615,6 +3621,7 @@ void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, #endif /* KMP_GOMP_COMPAT */ __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); } +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, void (*f)(void *, void *, void *)) { @@ -3636,7 +3643,7 @@ void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, #endif /* KMP_GOMP_COMPAT */ __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); } - +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, void (*f)(void *, void *, void *)) { KMP_DEBUG_ASSERT(__kmp_init_serial); @@ -3657,7 +3664,7 @@ void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, #endif /* KMP_GOMP_COMPAT */ __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); } - +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, void (*f)(void *, void *, void *)) { KMP_DEBUG_ASSERT(__kmp_init_serial); From b8467952404c3598c9c901332607eb1886e1721c Mon Sep 17 00:00:00 2001 From: Dave Date: Sat, 22 Jan 2022 17:30:27 -0800 Subject: [PATCH 265/946] [docs] [clang] Small documentation change for compilation databases We have an page dedicated to compliation databases including various ways to generate them, but we don't mention that clang has a built in method to do this. This addresses that. Reviewed By: joerg Differential Revision: https://reviews.llvm.org/D116882 --- clang/docs/JSONCompilationDatabase.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/docs/JSONCompilationDatabase.rst b/clang/docs/JSONCompilationDatabase.rst index 3595cf452f4ca..6fd17fe440add 100644 --- a/clang/docs/JSONCompilationDatabase.rst +++ b/clang/docs/JSONCompilationDatabase.rst @@ -29,6 +29,10 @@ system is not necessarily the best solution: Supported Systems ================= +Clang has the ablity to generate compilation database fragments via +the :option:`-MJ argument >`. You can concatenate those +fragments together between ``[`` and ``]`` to create a compilation database. + Currently `CMake `_ (since 2.8.5) supports generation of compilation databases for Unix Makefile builds (Ninja builds in the works) with the option ``CMAKE_EXPORT_COMPILE_COMMANDS``. From 37d1d02200b9472082304c191f396f0489d00e05 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Sun, 23 Jan 2022 09:14:58 +0800 Subject: [PATCH 266/946] [X86][MS] Change the alignment of f80 to 16 bytes on Windows 32bits to match with ICC MSVC currently doesn't support 80 bits long double. ICC supports it when the option `/Qlong-double` is specified. Changing the alignment of f80 to 16 bytes so that we can be compatible with ICC's option. Reviewed By: rnk, craig.topper Differential Revision: https://reviews.llvm.org/D115942 --- clang/lib/Basic/Targets/X86.h | 11 +- clang/test/CodeGen/target-data.c | 2 +- llvm/lib/IR/AutoUpgrade.cpp | 31 ++- llvm/lib/Target/X86/X86TargetMachine.cpp | 2 +- llvm/test/Bitcode/upgrade-datalayout3.ll | 2 +- llvm/test/Bitcode/upgrade-datalayout4.ll | 8 + .../test/CodeGen/X86/long-double-abi-align.ll | 4 +- llvm/test/CodeGen/X86/scalar-fp-to-i32.ll | 219 ++++++++++++------ llvm/test/CodeGen/X86/scalar-fp-to-i64.ll | 32 +-- .../Bitcode/DataLayoutUpgradeTest.cpp | 2 +- 10 files changed, 209 insertions(+), 104 deletions(-) create mode 100644 llvm/test/Bitcode/upgrade-datalayout4.ll diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index c952b8c9a3369..d1b66432e38b4 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -533,11 +533,12 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_32TargetInfo DoubleAlign = LongLongAlign = 64; bool IsWinCOFF = getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF(); - resetDataLayout(IsWinCOFF ? "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:" - "64-i64:64-f80:32-n8:16:32-a:0:32-S32" - : "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:" - "64-i64:64-f80:32-n8:16:32-a:0:32-S32", - IsWinCOFF ? "_" : ""); + bool IsMSVC = getTriple().isWindowsMSVCEnvironment(); + std::string Layout = IsWinCOFF ? "e-m:x" : "e-m:e"; + Layout += "-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-"; + Layout += IsMSVC ? "f80:128" : "f80:32"; + Layout += "-n8:16:32-a:0:32-S32"; + resetDataLayout(Layout, IsWinCOFF ? "_" : ""); } }; diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index d702f845112bd..e4150837279ce 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -8,7 +8,7 @@ // RUN: %clang_cc1 -triple i686-unknown-win32 -emit-llvm -o - %s | \ // RUN: FileCheck --check-prefix=I686-WIN32 %s -// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32" +// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32" // RUN: %clang_cc1 -triple i686-unknown-cygwin -emit-llvm -o - %s | \ // RUN: FileCheck --check-prefix=I686-CYGWIN %s diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 5aa3b3008581f..b820eabf173d7 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -4576,18 +4576,31 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { return DL.empty() ? std::string("G1") : (DL + "-G1").str(); } + std::string Res = DL.str(); + if (!T.isX86()) + return Res; + + // If the datalayout matches the expected format, add pointer size address + // spaces to the datalayout. std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; - // If X86, and the datalayout matches the expected format, add pointer size - // address spaces to the datalayout. - if (!T.isX86() || DL.contains(AddrSpaces)) - return std::string(DL); + if (!DL.contains(AddrSpaces)) { + SmallVector Groups; + Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)"); + if (R.match(DL, &Groups)) + Res = (Groups[1] + AddrSpaces + Groups[3]).str(); + } - SmallVector Groups; - Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)"); - if (!R.match(DL, &Groups)) - return std::string(DL); + // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes. + // Raising the alignment is safe because Clang did not produce f80 values in + // the MSVC environment before this upgrade was added. + if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) { + StringRef Ref = Res; + auto I = Ref.find("-f80:32-"); + if (I != StringRef::npos) + Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str(); + } - return (Groups[1] + AddrSpaces + Groups[3]).str(); + return Res; } void llvm::UpgradeAttributes(AttrBuilder &B) { diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 78bc5519c23ff..e3d0128dd73da 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -127,7 +127,7 @@ static std::string computeDataLayout(const Triple &TT) { // Some ABIs align long double to 128 bits, others to 32. if (TT.isOSNaCl() || TT.isOSIAMCU()) ; // No f80 - else if (TT.isArch64Bit() || TT.isOSDarwin()) + else if (TT.isArch64Bit() || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment()) Ret += "-f80:128"; else Ret += "-f80:32"; diff --git a/llvm/test/Bitcode/upgrade-datalayout3.ll b/llvm/test/Bitcode/upgrade-datalayout3.ll index 526ba6069dc6f..6d95f2407acf4 100644 --- a/llvm/test/Bitcode/upgrade-datalayout3.ll +++ b/llvm/test/Bitcode/upgrade-datalayout3.ll @@ -5,4 +5,4 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32" target triple = "i686-pc-windows-msvc" -; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-S32" +; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-S32" diff --git a/llvm/test/Bitcode/upgrade-datalayout4.ll b/llvm/test/Bitcode/upgrade-datalayout4.ll new file mode 100644 index 0000000000000..ee0e5fe3bf6fa --- /dev/null +++ b/llvm/test/Bitcode/upgrade-datalayout4.ll @@ -0,0 +1,8 @@ +; Test to make sure datalayout is automatically upgraded. +; +; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s + +target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc" + +; CHECK: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32" diff --git a/llvm/test/CodeGen/X86/long-double-abi-align.ll b/llvm/test/CodeGen/X86/long-double-abi-align.ll index ca45886db5705..6a409f1b1dce1 100644 --- a/llvm/test/CodeGen/X86/long-double-abi-align.ll +++ b/llvm/test/CodeGen/X86/long-double-abi-align.ll @@ -11,7 +11,7 @@ define void @foo(i32 %0, x86_fp80 %1, i32 %2) nounwind { ; MSVC-NEXT: movl %esp, %ebp ; MSVC-NEXT: andl $-16, %esp ; MSVC-NEXT: subl $32, %esp -; MSVC-NEXT: fldt 12(%ebp) +; MSVC-NEXT: fldt 24(%ebp) ; MSVC-NEXT: fstpt (%esp) ; MSVC-NEXT: leal 8(%ebp), %eax ; MSVC-NEXT: pushl %eax @@ -21,7 +21,7 @@ define void @foo(i32 %0, x86_fp80 %1, i32 %2) nounwind { ; MSVC-NEXT: pushl %eax ; MSVC-NEXT: calll _escape ; MSVC-NEXT: addl $4, %esp -; MSVC-NEXT: leal 24(%ebp), %eax +; MSVC-NEXT: leal 40(%ebp), %eax ; MSVC-NEXT: pushl %eax ; MSVC-NEXT: calll _escape ; MSVC-NEXT: addl $4, %esp diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll index 469c05d44813f..b22533a8c8ee2 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll @@ -344,8 +344,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { ; X86-AVX512-WIN: # %bb.0: ; X86-AVX512-WIN-NEXT: pushl %ebp ; X86-AVX512-WIN-NEXT: movl %esp, %ebp -; X86-AVX512-WIN-NEXT: andl $-8, %esp -; X86-AVX512-WIN-NEXT: subl $8, %esp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $16, %esp ; X86-AVX512-WIN-NEXT: fldt 8(%ebp) ; X86-AVX512-WIN-NEXT: fisttpll (%esp) ; X86-AVX512-WIN-NEXT: movl (%esp), %eax @@ -382,8 +382,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { ; X86-SSE3-WIN: # %bb.0: ; X86-SSE3-WIN-NEXT: pushl %ebp ; X86-SSE3-WIN-NEXT: movl %esp, %ebp -; X86-SSE3-WIN-NEXT: andl $-8, %esp -; X86-SSE3-WIN-NEXT: subl $8, %esp +; X86-SSE3-WIN-NEXT: andl $-16, %esp +; X86-SSE3-WIN-NEXT: subl $16, %esp ; X86-SSE3-WIN-NEXT: fldt 8(%ebp) ; X86-SSE3-WIN-NEXT: fisttpll (%esp) ; X86-SSE3-WIN-NEXT: movl (%esp), %eax @@ -420,8 +420,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { ; X86-SSE2-WIN: # %bb.0: ; X86-SSE2-WIN-NEXT: pushl %ebp ; X86-SSE2-WIN-NEXT: movl %esp, %ebp -; X86-SSE2-WIN-NEXT: andl $-8, %esp -; X86-SSE2-WIN-NEXT: subl $16, %esp +; X86-SSE2-WIN-NEXT: andl $-16, %esp +; X86-SSE2-WIN-NEXT: subl $32, %esp ; X86-SSE2-WIN-NEXT: fldt 8(%ebp) ; X86-SSE2-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE2-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -482,8 +482,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { ; X86-SSE1-WIN: # %bb.0: ; X86-SSE1-WIN-NEXT: pushl %ebp ; X86-SSE1-WIN-NEXT: movl %esp, %ebp -; X86-SSE1-WIN-NEXT: andl $-8, %esp -; X86-SSE1-WIN-NEXT: subl $16, %esp +; X86-SSE1-WIN-NEXT: andl $-16, %esp +; X86-SSE1-WIN-NEXT: subl $32, %esp ; X86-SSE1-WIN-NEXT: fldt 8(%ebp) ; X86-SSE1-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE1-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -516,8 +516,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { ; X87-WIN: # %bb.0: ; X87-WIN-NEXT: pushl %ebp ; X87-WIN-NEXT: movl %esp, %ebp -; X87-WIN-NEXT: andl $-8, %esp -; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $32, %esp ; X87-WIN-NEXT: fldt 8(%ebp) ; X87-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -550,14 +550,27 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { } define i32 @x_to_s32(x86_fp80 %a) nounwind { -; X86-AVX512-LABEL: x_to_s32: -; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: pushl %eax -; X86-AVX512-NEXT: fldt {{[0-9]+}}(%esp) -; X86-AVX512-NEXT: fisttpl (%esp) -; X86-AVX512-NEXT: movl (%esp), %eax -; X86-AVX512-NEXT: popl %ecx -; X86-AVX512-NEXT: retl +; X86-AVX512-WIN-LABEL: x_to_s32: +; X86-AVX512-WIN: # %bb.0: +; X86-AVX512-WIN-NEXT: pushl %ebp +; X86-AVX512-WIN-NEXT: movl %esp, %ebp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $16, %esp +; X86-AVX512-WIN-NEXT: fldt 8(%ebp) +; X86-AVX512-WIN-NEXT: fisttpl {{[0-9]+}}(%esp) +; X86-AVX512-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX512-WIN-NEXT: movl %ebp, %esp +; X86-AVX512-WIN-NEXT: popl %ebp +; X86-AVX512-WIN-NEXT: retl +; +; X86-AVX512-LIN-LABEL: x_to_s32: +; X86-AVX512-LIN: # %bb.0: +; X86-AVX512-LIN-NEXT: pushl %eax +; X86-AVX512-LIN-NEXT: fldt {{[0-9]+}}(%esp) +; X86-AVX512-LIN-NEXT: fisttpl (%esp) +; X86-AVX512-LIN-NEXT: movl (%esp), %eax +; X86-AVX512-LIN-NEXT: popl %ecx +; X86-AVX512-LIN-NEXT: retl ; ; X64-AVX512-WIN-LABEL: x_to_s32: ; X64-AVX512-WIN: # %bb.0: @@ -575,14 +588,27 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind { ; X64-AVX512-LIN-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-AVX512-LIN-NEXT: retq ; -; X86-SSE3-LABEL: x_to_s32: -; X86-SSE3: # %bb.0: -; X86-SSE3-NEXT: pushl %eax -; X86-SSE3-NEXT: fldt {{[0-9]+}}(%esp) -; X86-SSE3-NEXT: fisttpl (%esp) -; X86-SSE3-NEXT: movl (%esp), %eax -; X86-SSE3-NEXT: popl %ecx -; X86-SSE3-NEXT: retl +; X86-SSE3-WIN-LABEL: x_to_s32: +; X86-SSE3-WIN: # %bb.0: +; X86-SSE3-WIN-NEXT: pushl %ebp +; X86-SSE3-WIN-NEXT: movl %esp, %ebp +; X86-SSE3-WIN-NEXT: andl $-16, %esp +; X86-SSE3-WIN-NEXT: subl $16, %esp +; X86-SSE3-WIN-NEXT: fldt 8(%ebp) +; X86-SSE3-WIN-NEXT: fisttpl {{[0-9]+}}(%esp) +; X86-SSE3-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE3-WIN-NEXT: movl %ebp, %esp +; X86-SSE3-WIN-NEXT: popl %ebp +; X86-SSE3-WIN-NEXT: retl +; +; X86-SSE3-LIN-LABEL: x_to_s32: +; X86-SSE3-LIN: # %bb.0: +; X86-SSE3-LIN-NEXT: pushl %eax +; X86-SSE3-LIN-NEXT: fldt {{[0-9]+}}(%esp) +; X86-SSE3-LIN-NEXT: fisttpl (%esp) +; X86-SSE3-LIN-NEXT: movl (%esp), %eax +; X86-SSE3-LIN-NEXT: popl %ecx +; X86-SSE3-LIN-NEXT: retl ; ; X64-SSE3-WIN-LABEL: x_to_s32: ; X64-SSE3-WIN: # %bb.0: @@ -600,20 +626,39 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind { ; X64-SSE3-LIN-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE3-LIN-NEXT: retq ; -; X86-SSE2-LABEL: x_to_s32: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: subl $8, %esp -; X86-SSE2-NEXT: fldt {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fnstcw (%esp) -; X86-SSE2-NEXT: movzwl (%esp), %eax -; X86-SSE2-NEXT: orl $3072, %eax # imm = 0xC00 -; X86-SSE2-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fistpl {{[0-9]+}}(%esp) -; X86-SSE2-NEXT: fldcw (%esp) -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: addl $8, %esp -; X86-SSE2-NEXT: retl +; X86-SSE2-WIN-LABEL: x_to_s32: +; X86-SSE2-WIN: # %bb.0: +; X86-SSE2-WIN-NEXT: pushl %ebp +; X86-SSE2-WIN-NEXT: movl %esp, %ebp +; X86-SSE2-WIN-NEXT: andl $-16, %esp +; X86-SSE2-WIN-NEXT: subl $16, %esp +; X86-SSE2-WIN-NEXT: fldt 8(%ebp) +; X86-SSE2-WIN-NEXT: fnstcw (%esp) +; X86-SSE2-WIN-NEXT: movzwl (%esp), %eax +; X86-SSE2-WIN-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-SSE2-WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-SSE2-WIN-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-SSE2-WIN-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-SSE2-WIN-NEXT: fldcw (%esp) +; X86-SSE2-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-WIN-NEXT: movl %ebp, %esp +; X86-SSE2-WIN-NEXT: popl %ebp +; X86-SSE2-WIN-NEXT: retl +; +; X86-SSE2-LIN-LABEL: x_to_s32: +; X86-SSE2-LIN: # %bb.0: +; X86-SSE2-LIN-NEXT: subl $8, %esp +; X86-SSE2-LIN-NEXT: fldt {{[0-9]+}}(%esp) +; X86-SSE2-LIN-NEXT: fnstcw (%esp) +; X86-SSE2-LIN-NEXT: movzwl (%esp), %eax +; X86-SSE2-LIN-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-SSE2-LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-SSE2-LIN-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-SSE2-LIN-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-SSE2-LIN-NEXT: fldcw (%esp) +; X86-SSE2-LIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-LIN-NEXT: addl $8, %esp +; X86-SSE2-LIN-NEXT: retl ; ; X64-SSE2-WIN-LABEL: x_to_s32: ; X64-SSE2-WIN: # %bb.0: @@ -643,35 +688,73 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind { ; X64-SSE2-LIN-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE2-LIN-NEXT: retq ; -; X86-SSE1-LABEL: x_to_s32: -; X86-SSE1: # %bb.0: -; X86-SSE1-NEXT: subl $8, %esp -; X86-SSE1-NEXT: fldt {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: fnstcw (%esp) -; X86-SSE1-NEXT: movzwl (%esp), %eax -; X86-SSE1-NEXT: orl $3072, %eax # imm = 0xC00 -; X86-SSE1-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: fistpl {{[0-9]+}}(%esp) -; X86-SSE1-NEXT: fldcw (%esp) -; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE1-NEXT: addl $8, %esp -; X86-SSE1-NEXT: retl +; X86-SSE1-WIN-LABEL: x_to_s32: +; X86-SSE1-WIN: # %bb.0: +; X86-SSE1-WIN-NEXT: pushl %ebp +; X86-SSE1-WIN-NEXT: movl %esp, %ebp +; X86-SSE1-WIN-NEXT: andl $-16, %esp +; X86-SSE1-WIN-NEXT: subl $16, %esp +; X86-SSE1-WIN-NEXT: fldt 8(%ebp) +; X86-SSE1-WIN-NEXT: fnstcw (%esp) +; X86-SSE1-WIN-NEXT: movzwl (%esp), %eax +; X86-SSE1-WIN-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-SSE1-WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-SSE1-WIN-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-SSE1-WIN-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-SSE1-WIN-NEXT: fldcw (%esp) +; X86-SSE1-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-WIN-NEXT: movl %ebp, %esp +; X86-SSE1-WIN-NEXT: popl %ebp +; X86-SSE1-WIN-NEXT: retl ; -; X87-LABEL: x_to_s32: -; X87: # %bb.0: -; X87-NEXT: subl $8, %esp -; X87-NEXT: fldt {{[0-9]+}}(%esp) -; X87-NEXT: fnstcw (%esp) -; X87-NEXT: movzwl (%esp), %eax -; X87-NEXT: orl $3072, %eax # imm = 0xC00 -; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) -; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: fistpl {{[0-9]+}}(%esp) -; X87-NEXT: fldcw (%esp) -; X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X87-NEXT: addl $8, %esp -; X87-NEXT: retl +; X86-SSE1-LIN-LABEL: x_to_s32: +; X86-SSE1-LIN: # %bb.0: +; X86-SSE1-LIN-NEXT: subl $8, %esp +; X86-SSE1-LIN-NEXT: fldt {{[0-9]+}}(%esp) +; X86-SSE1-LIN-NEXT: fnstcw (%esp) +; X86-SSE1-LIN-NEXT: movzwl (%esp), %eax +; X86-SSE1-LIN-NEXT: orl $3072, %eax # imm = 0xC00 +; X86-SSE1-LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-SSE1-LIN-NEXT: fldcw {{[0-9]+}}(%esp) +; X86-SSE1-LIN-NEXT: fistpl {{[0-9]+}}(%esp) +; X86-SSE1-LIN-NEXT: fldcw (%esp) +; X86-SSE1-LIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE1-LIN-NEXT: addl $8, %esp +; X86-SSE1-LIN-NEXT: retl +; +; X87-WIN-LABEL: x_to_s32: +; X87-WIN: # %bb.0: +; X87-WIN-NEXT: pushl %ebp +; X87-WIN-NEXT: movl %esp, %ebp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: fldt 8(%ebp) +; X87-WIN-NEXT: fnstcw (%esp) +; X87-WIN-NEXT: movzwl (%esp), %eax +; X87-WIN-NEXT: orl $3072, %eax # imm = 0xC00 +; X87-WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-WIN-NEXT: fldcw {{[0-9]+}}(%esp) +; X87-WIN-NEXT: fistpl {{[0-9]+}}(%esp) +; X87-WIN-NEXT: fldcw (%esp) +; X87-WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-WIN-NEXT: movl %ebp, %esp +; X87-WIN-NEXT: popl %ebp +; X87-WIN-NEXT: retl +; +; X87-LIN-LABEL: x_to_s32: +; X87-LIN: # %bb.0: +; X87-LIN-NEXT: subl $8, %esp +; X87-LIN-NEXT: fldt {{[0-9]+}}(%esp) +; X87-LIN-NEXT: fnstcw (%esp) +; X87-LIN-NEXT: movzwl (%esp), %eax +; X87-LIN-NEXT: orl $3072, %eax # imm = 0xC00 +; X87-LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-LIN-NEXT: fldcw {{[0-9]+}}(%esp) +; X87-LIN-NEXT: fistpl {{[0-9]+}}(%esp) +; X87-LIN-NEXT: fldcw (%esp) +; X87-LIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-LIN-NEXT: addl $8, %esp +; X87-LIN-NEXT: retl %r = fptosi x86_fp80 %a to i32 ret i32 %r } diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll index 0ce9c87057467..718b8b558c9b2 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -909,8 +909,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X86-AVX512-WIN: # %bb.0: ; X86-AVX512-WIN-NEXT: pushl %ebp ; X86-AVX512-WIN-NEXT: movl %esp, %ebp -; X86-AVX512-WIN-NEXT: andl $-8, %esp -; X86-AVX512-WIN-NEXT: subl $8, %esp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $16, %esp ; X86-AVX512-WIN-NEXT: fldt 8(%ebp) ; X86-AVX512-WIN-NEXT: flds __real@5f000000 ; X86-AVX512-WIN-NEXT: xorl %edx, %edx @@ -985,8 +985,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X86-SSE3-WIN: # %bb.0: ; X86-SSE3-WIN-NEXT: pushl %ebp ; X86-SSE3-WIN-NEXT: movl %esp, %ebp -; X86-SSE3-WIN-NEXT: andl $-8, %esp -; X86-SSE3-WIN-NEXT: subl $8, %esp +; X86-SSE3-WIN-NEXT: andl $-16, %esp +; X86-SSE3-WIN-NEXT: subl $16, %esp ; X86-SSE3-WIN-NEXT: fldt 8(%ebp) ; X86-SSE3-WIN-NEXT: flds __real@5f000000 ; X86-SSE3-WIN-NEXT: xorl %edx, %edx @@ -1061,8 +1061,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X86-SSE2-WIN: # %bb.0: ; X86-SSE2-WIN-NEXT: pushl %ebp ; X86-SSE2-WIN-NEXT: movl %esp, %ebp -; X86-SSE2-WIN-NEXT: andl $-8, %esp -; X86-SSE2-WIN-NEXT: subl $16, %esp +; X86-SSE2-WIN-NEXT: andl $-16, %esp +; X86-SSE2-WIN-NEXT: subl $32, %esp ; X86-SSE2-WIN-NEXT: fldt 8(%ebp) ; X86-SSE2-WIN-NEXT: flds __real@5f000000 ; X86-SSE2-WIN-NEXT: xorl %edx, %edx @@ -1161,8 +1161,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X87-WIN: # %bb.0: ; X87-WIN-NEXT: pushl %ebp ; X87-WIN-NEXT: movl %esp, %ebp -; X87-WIN-NEXT: andl $-8, %esp -; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $32, %esp ; X87-WIN-NEXT: fldt 8(%ebp) ; X87-WIN-NEXT: flds __real@5f000000 ; X87-WIN-NEXT: fucom %st(1) @@ -1235,8 +1235,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind { ; X86-AVX512-WIN: # %bb.0: ; X86-AVX512-WIN-NEXT: pushl %ebp ; X86-AVX512-WIN-NEXT: movl %esp, %ebp -; X86-AVX512-WIN-NEXT: andl $-8, %esp -; X86-AVX512-WIN-NEXT: subl $8, %esp +; X86-AVX512-WIN-NEXT: andl $-16, %esp +; X86-AVX512-WIN-NEXT: subl $16, %esp ; X86-AVX512-WIN-NEXT: fldt 8(%ebp) ; X86-AVX512-WIN-NEXT: fisttpll (%esp) ; X86-AVX512-WIN-NEXT: movl (%esp), %eax @@ -1275,8 +1275,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind { ; X86-SSE3-WIN: # %bb.0: ; X86-SSE3-WIN-NEXT: pushl %ebp ; X86-SSE3-WIN-NEXT: movl %esp, %ebp -; X86-SSE3-WIN-NEXT: andl $-8, %esp -; X86-SSE3-WIN-NEXT: subl $8, %esp +; X86-SSE3-WIN-NEXT: andl $-16, %esp +; X86-SSE3-WIN-NEXT: subl $16, %esp ; X86-SSE3-WIN-NEXT: fldt 8(%ebp) ; X86-SSE3-WIN-NEXT: fisttpll (%esp) ; X86-SSE3-WIN-NEXT: movl (%esp), %eax @@ -1315,8 +1315,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind { ; X86-SSE2-WIN: # %bb.0: ; X86-SSE2-WIN-NEXT: pushl %ebp ; X86-SSE2-WIN-NEXT: movl %esp, %ebp -; X86-SSE2-WIN-NEXT: andl $-8, %esp -; X86-SSE2-WIN-NEXT: subl $16, %esp +; X86-SSE2-WIN-NEXT: andl $-16, %esp +; X86-SSE2-WIN-NEXT: subl $32, %esp ; X86-SSE2-WIN-NEXT: fldt 8(%ebp) ; X86-SSE2-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE2-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -1379,8 +1379,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind { ; X87-WIN: # %bb.0: ; X87-WIN-NEXT: pushl %ebp ; X87-WIN-NEXT: movl %esp, %ebp -; X87-WIN-NEXT: andl $-8, %esp -; X87-WIN-NEXT: subl $16, %esp +; X87-WIN-NEXT: andl $-16, %esp +; X87-WIN-NEXT: subl $32, %esp ; X87-WIN-NEXT: fldt 8(%ebp) ; X87-WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp index ec900471a833e..0c835744e4fa7 100644 --- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -24,7 +24,7 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { EXPECT_EQ(DL1, "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64" "-f80:128-n8:16:32:64-S128"); EXPECT_EQ(DL2, "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64" - "-f80:32-n8:16:32-S32"); + "-f80:128-n8:16:32-S32"); EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128" "-n32:64-S128"); From 2513b79030636e62a785019bb9477920c8140d85 Mon Sep 17 00:00:00 2001 From: Joe Loser Date: Wed, 19 Jan 2022 15:16:15 -0500 Subject: [PATCH 267/946] [libc++] Implement LWG3549: view_interface need not inherit from view_base Implement LWG3549 by making `view_interface` not inherit from `view_base`. Types are still views if they have a public and unambiguous derivation from `view_interface`, so adjust the `enable_view` machinery as such to account for that. Differential Revision: https://reviews.llvm.org/D117714 --- libcxx/docs/Status/Cxx2bIssues.csv | 2 +- libcxx/include/__ranges/enable_view.h | 12 +++++++++++- libcxx/include/__ranges/view_interface.h | 3 +-- .../view.interface/view.interface.pass.cpp | 9 +++++++++ 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv index 726668f3bdb2a..a91e3b52a1705 100644 --- a/libcxx/docs/Status/Cxx2bIssues.csv +++ b/libcxx/docs/Status/Cxx2bIssues.csv @@ -91,7 +91,7 @@ `3544 `__,"``format-arg-store::args`` is unintentionally not exposition-only","June 2021","","","|format|" `3546 `__,"``common_iterator``'s postfix-proxy is not quite right","June 2021","","","|ranges|" `3548 `__,"``shared_ptr`` construction from ``unique_ptr`` should move (not copy) the deleter","June 2021","","" -`3549 `__,"``view_interface`` is overspecified to derive from ``view_base``","June 2021","","","|ranges|" +`3549 `__,"``view_interface`` is overspecified to derive from ``view_base``","June 2021","|Complete|","14.0","|ranges|" `3551 `__,"``borrowed_{iterator,subrange}_t`` are overspecified","June 2021","","","|ranges|" `3552 `__,"Parallel specialized memory algorithms should require forward iterators","June 2021","","" `3553 `__,"Useless constraint in ``split_view::outer-iterator::value_type::begin()``","June 2021","","","|ranges|" diff --git a/libcxx/include/__ranges/enable_view.h b/libcxx/include/__ranges/enable_view.h index a09de11da81ed..e1daec046fc0c 100644 --- a/libcxx/include/__ranges/enable_view.h +++ b/libcxx/include/__ranges/enable_view.h @@ -12,6 +12,7 @@ #include <__config> #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -25,8 +26,17 @@ namespace ranges { struct view_base { }; +template + requires is_class_v<_Derived> && same_as<_Derived, remove_cv_t<_Derived>> +class view_interface; + +template + requires is_convertible_v<_Op*, view_interface<_Yp>*> +void __is_derived_from_view_interface(const _Op*, const view_interface<_Yp>*); + template -inline constexpr bool enable_view = derived_from<_Tp, view_base>; +inline constexpr bool enable_view = derived_from<_Tp, view_base> || + requires { ranges::__is_derived_from_view_interface((_Tp*)nullptr, (_Tp*)nullptr); }; } // end namespace ranges diff --git a/libcxx/include/__ranges/view_interface.h b/libcxx/include/__ranges/view_interface.h index 8a1f5d8c9251c..c5215cbcb8e39 100644 --- a/libcxx/include/__ranges/view_interface.h +++ b/libcxx/include/__ranges/view_interface.h @@ -18,7 +18,6 @@ #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/empty.h> -#include <__ranges/enable_view.h> #include #include @@ -40,7 +39,7 @@ void __implicitly_convert_to(type_identity_t<_Tp>) noexcept; template requires is_class_v<_Derived> && same_as<_Derived, remove_cv_t<_Derived>> -class view_interface : public view_base { +class view_interface { _LIBCPP_HIDE_FROM_ABI constexpr _Derived& __derived() noexcept { static_assert(sizeof(_Derived) && derived_from<_Derived, view_interface> && view<_Derived>); diff --git a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp index ebc443ebe08d3..b3f9b972a0cb2 100644 --- a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp +++ b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp @@ -79,6 +79,11 @@ struct MoveOnlyForwardRange : std::ranges::view_interface }; static_assert(std::ranges::view); +struct MI : std::ranges::view_interface, + std::ranges::view_interface { +}; +static_assert(!std::ranges::view); + struct EmptyIsTrue : std::ranges::view_interface { int buff[8] = {0, 1, 2, 3, 4, 5, 6, 7}; constexpr ForwardIter begin() const { return ForwardIter(const_cast(buff)); } @@ -300,6 +305,10 @@ constexpr bool testFrontBack() { return true; } +struct V1 : std::ranges::view_interface { }; +struct V2 : std::ranges::view_interface { V1 base_; }; +static_assert(sizeof(V2) == sizeof(V1)); + int main(int, char**) { testEmpty(); static_assert(testEmpty()); From 3cf15af2daa9177a5604d122a9c5cbcf86f7fe33 Mon Sep 17 00:00:00 2001 From: eopXD Date: Fri, 21 Jan 2022 00:39:23 -0800 Subject: [PATCH 268/946] [RISCV] Remove experimental prefix from rvv-related extensions. Extensions affected: +v, +zve*, +zvl* Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117860 --- .../RISCV/riscv-attr-builtin-alias-err.c | 2 +- .../CodeGen/RISCV/riscv-attr-builtin-alias.c | 2 +- .../test/CodeGen/RISCV/riscv-inline-asm-rvv.c | 4 +- clang/test/CodeGen/RISCV/riscv-v-debuginfo.c | 2 +- clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp | 2 +- .../RISCV/rvv-intrinsics-overloaded/vaadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vadc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vand.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vasub.c | 2 +- .../rvv-intrinsics-overloaded/vcompress.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vcpop.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vdiv.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfabs.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfclass.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfcvt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfdiv.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfirst.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmacc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmax.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmerge.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmin.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmsac.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmul.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfmv.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfncvt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfneg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfnmacc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfnmadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfnmsac.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfnmsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfrdiv.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfrec7.c | 2 +- .../rvv-intrinsics-overloaded/vfredmax.c | 2 +- .../rvv-intrinsics-overloaded/vfredmin.c | 2 +- .../rvv-intrinsics-overloaded/vfredsum.c | 2 +- .../rvv-intrinsics-overloaded/vfrsqrt7.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfrsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfsgnj.c | 2 +- .../rvv-intrinsics-overloaded/vfslide1down.c | 2 +- .../rvv-intrinsics-overloaded/vfslide1up.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfsqrt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfwadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfwcvt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfwmacc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfwmsac.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfwmul.c | 2 +- .../rvv-intrinsics-overloaded/vfwnmacc.c | 2 +- .../rvv-intrinsics-overloaded/vfwnmsac.c | 2 +- .../rvv-intrinsics-overloaded/vfwredsum.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vfwsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vget.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vid.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/viota.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vle.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vlmul.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vloxei.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vloxseg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vlse.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vlseg.c | 4 +- .../RISCV/rvv-intrinsics-overloaded/vlsegff.c | 4 +- .../RISCV/rvv-intrinsics-overloaded/vlsseg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vluxei.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vluxseg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmacc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmadc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmand.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmax.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmerge.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmfeq.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmfge.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmfgt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmfle.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmflt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmfne.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmin.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmmv.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmnand.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmnor.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmnot.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmor.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsbc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsbf.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmseq.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsge.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsgt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsif.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsle.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmslt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsne.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmsof.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmul.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmv.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmxnor.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vmxor.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vnclip.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vncvt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vneg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vnmsac.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vnmsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vnot.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vnsra.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vnsrl.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vor.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vredand.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vredmax.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vredmin.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vredor.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vredsum.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vredxor.c | 2 +- .../rvv-intrinsics-overloaded/vreinterpret.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vrem.c | 2 +- .../rvv-intrinsics-overloaded/vrgather.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vrsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsbc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vse.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vset.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsext.c | 2 +- .../rvv-intrinsics-overloaded/vslide1down.c | 2 +- .../rvv-intrinsics-overloaded/vslide1up.c | 2 +- .../rvv-intrinsics-overloaded/vslidedown.c | 2 +- .../rvv-intrinsics-overloaded/vslideup.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsll.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsmul.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsoxei.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsoxseg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsra.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsrl.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsse.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsseg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vssra.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vssrl.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vssseg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vssub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsuxei.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vsuxseg.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vwadd.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vwcvt.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vwmacc.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vwmul.c | 2 +- .../rvv-intrinsics-overloaded/vwredsum.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vwsub.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vxor.c | 2 +- .../RISCV/rvv-intrinsics-overloaded/vzext.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vaadd.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vadc.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vadd.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vand.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vasub.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vcompress.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vcpop.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vdiv.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfabs.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfadd.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfclass.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfirst.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfmacc.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfmadd.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfmax.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfmerge.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfmin.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfmsac.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfmsub.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfmul.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfmv.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfncvt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfneg.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfnmacc.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfnmadd.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfnmsac.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfnmsub.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfrdiv.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfrec7.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfredmax.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfredmin.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfredsum.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfrsub.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfsgnj.c | 2 +- .../RISCV/rvv-intrinsics/vfslide1down.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfslide1up.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfsqrt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vfsub.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwadd.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwcvt.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwmacc.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwmsac.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwmul.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwredsum.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vfwsub.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vget.c | 2 +- clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/viota.c | 2 +- clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vleff.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vlmul.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vloxei.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vloxseg.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vlse.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vlseg.c | 4 +- .../CodeGen/RISCV/rvv-intrinsics/vlsegff.c | 4 +- .../CodeGen/RISCV/rvv-intrinsics/vlsseg.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vluxei.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vluxseg.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmacc.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmadc.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmadd.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmand.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmax.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmclr.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vmerge.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmfge.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmfle.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmflt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmfne.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmin.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmmv.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vmnand.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmnor.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmnot.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmor.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmseq.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmset.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsge.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsif.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsle.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmslt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsne.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmsof.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmul.c | 2 +- clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vmxnor.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vmxor.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vnclip.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vncvt.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vneg.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vnmsac.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vnmsub.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vnot.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vnsra.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c | 2 +- clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vredand.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vredmax.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vredmin.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vredor.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vredsum.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vredxor.c | 2 +- .../RISCV/rvv-intrinsics/vreinterpret.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vrem.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vrgather.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vrsub.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsadd.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsbc.c | 2 +- clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vset.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vsetvl.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsext.c | 2 +- .../RISCV/rvv-intrinsics/vslide1down.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vslide1up.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vslidedown.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vslideup.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsll.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsmul.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vsoxei.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vsoxseg.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsra.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsrl.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsse.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsseg.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vssra.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vssrl.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vssseg.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vssub.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vsub.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vsuxei.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vsuxseg.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vundefined.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vwadd.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vwmacc.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vwmul.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/vwredsum.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vwsub.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vxor.c | 2 +- .../test/CodeGen/RISCV/rvv-intrinsics/vzext.c | 2 +- clang/test/CodeGen/RISCV/rvv_errors.c | 2 +- clang/test/Driver/riscv-arch.c | 41 +++---- clang/test/Headers/riscv-vector-header.c | 2 +- .../test/Preprocessor/riscv-target-features.c | 34 +++--- clang/test/Sema/riscv-types.c | 2 +- clang/utils/TableGen/RISCVVEmitter.cpp | 2 +- llvm/lib/Support/RISCVISAInfo.cpp | 18 +-- llvm/lib/Target/RISCV/RISCV.td | 16 +-- .../CostModel/RISCV/fixed-vector-gather.ll | 2 +- .../CostModel/RISCV/fixed-vector-scatter.ll | 2 +- .../Analysis/CostModel/RISCV/rvv-shuffle.ll | 2 +- llvm/test/CodeGen/RISCV/attributes.ll | 8 +- llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll | 4 +- .../RISCV/rvv/access-fixed-objects-by-rvv.ll | 2 +- .../RISCV/rvv/addi-scalable-offset.mir | 2 +- .../CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll | 2 +- .../CodeGen/RISCV/rvv/bitreverse-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll | 4 +- .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/calling-conv.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/combine-sats.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/combine-splats.ll | 4 +- .../CodeGen/RISCV/rvv/combine-store-fp.ll | 4 +- .../RISCV/rvv/common-shuffle-patterns.ll | 2 +- .../rvv/commuted-op-indices-regression.mir | 2 +- .../CodeGen/RISCV/rvv/constant-folding.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 8 +- llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 8 +- .../RISCV/rvv/debug-info-rvv-dbg-value.mir | 4 +- .../test/CodeGen/RISCV/rvv/emergency-slot.mir | 2 +- .../CodeGen/RISCV/rvv/extload-truncstore.ll | 4 +- .../CodeGen/RISCV/rvv/extract-subvector.ll | 4 +- .../CodeGen/RISCV/rvv/extractelt-fp-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/extractelt-fp-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll | 4 +- .../CodeGen/RISCV/rvv/extractelt-int-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/extractelt-int-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll | 4 +- ...ixed-vector-strided-load-store-negative.ll | 2 +- .../rvv/fixed-vector-strided-load-store.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll | 8 +- .../rvv/fixed-vectors-bitcast-large-vector.ll | 6 +- .../RISCV/rvv/fixed-vectors-bitcast.ll | 8 +- .../RISCV/rvv/fixed-vectors-bitreverse.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 8 +- .../rvv/fixed-vectors-calling-conv-fastcc.ll | 4 +- .../RISCV/rvv/fixed-vectors-calling-conv.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 20 ++-- .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 20 ++-- .../CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 8 +- .../rvv/fixed-vectors-emergency-slot.mir | 2 +- .../rvv/fixed-vectors-extload-truncstore.ll | 8 +- .../RISCV/rvv/fixed-vectors-extract-i1.ll | 4 +- .../rvv/fixed-vectors-extract-subvector.ll | 4 +- .../RISCV/rvv/fixed-vectors-extract.ll | 4 +- .../RISCV/rvv/fixed-vectors-fp-bitcast.ll | 4 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 8 +- .../RISCV/rvv/fixed-vectors-fp-conv.ll | 8 +- .../RISCV/rvv/fixed-vectors-fp-interleave.ll | 8 +- .../RISCV/rvv/fixed-vectors-fp-setcc.ll | 4 +- .../RISCV/rvv/fixed-vectors-fp-shuffles.ll | 4 +- .../RISCV/rvv/fixed-vectors-fp-splat.ll | 8 +- .../RISCV/rvv/fixed-vectors-fp-vrgather.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll | 8 +- .../RISCV/rvv/fixed-vectors-insert-i1.ll | 4 +- .../rvv/fixed-vectors-insert-subvector.ll | 18 +-- .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 4 +- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 4 +- .../RISCV/rvv/fixed-vectors-int-exttrunc.ll | 12 +- .../RISCV/rvv/fixed-vectors-int-interleave.ll | 8 +- .../RISCV/rvv/fixed-vectors-int-setcc.ll | 4 +- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 4 +- .../RISCV/rvv/fixed-vectors-int-splat.ll | 12 +- .../RISCV/rvv/fixed-vectors-int-vrgather.ll | 8 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 8 +- .../RISCV/rvv/fixed-vectors-marith-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 28 ++--- .../rvv/fixed-vectors-mask-load-store.ll | 8 +- .../RISCV/rvv/fixed-vectors-mask-logic.ll | 8 +- .../RISCV/rvv/fixed-vectors-mask-splat.ll | 8 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 4 +- .../RISCV/rvv/fixed-vectors-masked-load-fp.ll | 4 +- .../rvv/fixed-vectors-masked-load-int.ll | 4 +- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 4 +- .../rvv/fixed-vectors-masked-store-fp.ll | 4 +- .../rvv/fixed-vectors-masked-store-int.ll | 4 +- .../rvv/fixed-vectors-reduction-fp-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 4 +- .../rvv/fixed-vectors-reduction-int-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-reduction-int.ll | 4 +- .../rvv/fixed-vectors-reduction-mask-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-select-fp.ll | 4 +- .../RISCV/rvv/fixed-vectors-select-int.ll | 4 +- .../rvv/fixed-vectors-stepvector-rv32.ll | 4 +- .../rvv/fixed-vectors-stepvector-rv64.ll | 4 +- .../rvv/fixed-vectors-store-merge-crash.ll | 2 +- .../RISCV/rvv/fixed-vectors-unaligned.ll | 4 +- .../RISCV/rvv/fixed-vectors-vadd-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vand-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vdiv-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vdivu-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfadd-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfdiv-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfmul-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfrdiv-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfrsub-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vfsub-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vmul-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vpgather.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 4 +- .../RISCV/rvv/fixed-vectors-vpscatter.ll | 4 +- .../RISCV/rvv/fixed-vectors-vpstore.ll | 4 +- .../rvv/fixed-vectors-vreductions-mask.ll | 8 +- .../RISCV/rvv/fixed-vectors-vrem-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vremu-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vrsub-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll | 4 +- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vselect.ll | 4 +- .../RISCV/rvv/fixed-vectors-vshl-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vsra-vp.ll | 4 +- .../RISCV/rvv/fixed-vectors-vsrl-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vssub.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll | 4 +- .../RISCV/rvv/fixed-vectors-vsub-vp.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll | 4 +- .../RISCV/rvv/fixed-vectors-vwmaccu.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll | 4 +- .../RISCV/rvv/fixed-vectors-vxor-vp.ll | 4 +- .../test/CodeGen/RISCV/rvv/frameindex-addr.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll | 4 +- .../CodeGen/RISCV/rvv/get-vlen-debugloc.mir | 2 +- llvm/test/CodeGen/RISCV/rvv/inline-asm.ll | 2 +- .../CodeGen/RISCV/rvv/insert-subvector.ll | 4 +- .../CodeGen/RISCV/rvv/insertelt-fp-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/insertelt-fp-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll | 4 +- .../CodeGen/RISCV/rvv/insertelt-int-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/insertelt-int-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/interleave-crash.ll | 4 +- .../RISCV/rvv/large-rvv-stack-size.mir | 2 +- .../CodeGen/RISCV/rvv/legalize-load-sdnode.ll | 4 +- .../RISCV/rvv/legalize-scalable-vectortype.ll | 4 +- .../RISCV/rvv/legalize-store-sdnode.ll | 4 +- .../CodeGen/RISCV/rvv/load-add-store-16.ll | 4 +- .../CodeGen/RISCV/rvv/load-add-store-32.ll | 4 +- .../CodeGen/RISCV/rvv/load-add-store-64.ll | 4 +- .../CodeGen/RISCV/rvv/load-add-store-8.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/load-mask.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/localvar.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/marith-vp.ll | 4 +- .../RISCV/rvv/mask-exts-truncs-rv32.ll | 2 +- .../RISCV/rvv/mask-exts-truncs-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/mask-reg-alloc.mir | 2 +- llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll | 4 +- .../test/CodeGen/RISCV/rvv/masked-load-int.ll | 4 +- .../test/CodeGen/RISCV/rvv/masked-store-fp.ll | 4 +- .../CodeGen/RISCV/rvv/masked-store-int.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/memory-args.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll | 4 +- .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll | 4 +- .../RISCV/rvv/named-vector-shuffle-reverse.ll | 12 +- .../CodeGen/RISCV/rvv/no-reserved-frame.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/pr52475.ll | 4 +- .../CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll | 2 +- .../test/CodeGen/RISCV/rvv/reg-coalescing.mir | 2 +- .../CodeGen/RISCV/rvv/regalloc-fast-crash.ll | 2 +- .../RISCV/rvv/rv32-spill-vector-csr.ll | 4 +- .../CodeGen/RISCV/rvv/rv32-spill-vector.ll | 4 +- .../CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll | 4 +- .../RISCV/rvv/rv32-vsetvli-intrinsics.ll | 2 +- .../RISCV/rvv/rv64-spill-vector-csr.ll | 4 +- .../CodeGen/RISCV/rvv/rv64-spill-vector.ll | 4 +- .../CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll | 4 +- .../RISCV/rvv/rv64-vsetvli-intrinsics.ll | 2 +- .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 2 +- .../test/CodeGen/RISCV/rvv/rvv-framelayout.ll | 2 +- .../CodeGen/RISCV/rvv/rvv-out-arguments.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/select-fp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/select-int.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/select-sra.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/setcc-integer-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/setcc-integer-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/stepvector.ll | 4 +- .../RISCV/rvv/tail-agnostic-impdef-copy.mir | 6 +- llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll | 2 +- .../RISCV/rvv/unaligned-loads-stores.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vand-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll | 4 +- .../test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfslide1down-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfslide1down-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vor-vp.ll | 4 +- .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vpload.ll | 4 +- .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll | 2 +- .../RISCV/rvv/vreductions-fp-sdnode.ll | 4 +- .../CodeGen/RISCV/rvv/vreductions-fp-vp.ll | 4 +- .../CodeGen/RISCV/rvv/vreductions-int-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vreductions-int-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vreductions-int-vp.ll | 4 +- .../CodeGen/RISCV/rvv/vreductions-mask-vp.ll | 4 +- .../CodeGen/RISCV/rvv/vreductions-mask.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vrgatherei16-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vrgatherei16-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll | 2 +- .../CodeGen/RISCV/rvv/vselect-int-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vselect-int-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll | 2 +- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 2 +- .../RISCV/rvv/vsetvli-insert-crossbb.mir | 4 +- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 2 +- .../test/CodeGen/RISCV/rvv/vsetvli-insert.mir | 4 +- .../CodeGen/RISCV/rvv/vsetvli-regression.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll | 4 +- .../CodeGen/RISCV/rvv/vslide1down-rv32.ll | 2 +- .../CodeGen/RISCV/rvv/vslide1down-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll | 2 +- .../test/CodeGen/RISCV/rvv/vslidedown-rv32.ll | 2 +- .../test/CodeGen/RISCV/rvv/vslidedown-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll | 2 +- .../RISCV/rvv/wrong-stack-slot-rv32.mir | 2 +- .../RISCV/rvv/wrong-stack-slot-rv64.mir | 2 +- llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir | 2 +- llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 2 +- .../test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll | 2 +- .../CodeGen/RISCV/scalable-vector-struct.ll | 2 +- llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll | 2 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 4 +- .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 4 +- llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll | 4 +- llvm/test/MC/RISCV/attribute-arch-invalid.s | 6 - llvm/test/MC/RISCV/attribute-arch.s | 109 +++++++++--------- llvm/test/MC/RISCV/rvv/add.s | 8 +- llvm/test/MC/RISCV/rvv/aliases.s | 4 +- llvm/test/MC/RISCV/rvv/and.s | 8 +- llvm/test/MC/RISCV/rvv/clip.s | 8 +- llvm/test/MC/RISCV/rvv/compare.s | 8 +- llvm/test/MC/RISCV/rvv/convert.s | 8 +- llvm/test/MC/RISCV/rvv/div.s | 8 +- llvm/test/MC/RISCV/rvv/ext.s | 8 +- llvm/test/MC/RISCV/rvv/fadd.s | 8 +- llvm/test/MC/RISCV/rvv/fcompare.s | 8 +- llvm/test/MC/RISCV/rvv/fdiv.s | 8 +- llvm/test/MC/RISCV/rvv/fmacc.s | 8 +- llvm/test/MC/RISCV/rvv/fminmax.s | 8 +- llvm/test/MC/RISCV/rvv/fmul.s | 8 +- llvm/test/MC/RISCV/rvv/fmv.s | 8 +- llvm/test/MC/RISCV/rvv/fothers.s | 8 +- llvm/test/MC/RISCV/rvv/freduction.s | 8 +- llvm/test/MC/RISCV/rvv/fsub.s | 8 +- llvm/test/MC/RISCV/rvv/invalid-eew.s | 2 +- llvm/test/MC/RISCV/rvv/invalid.s | 2 +- llvm/test/MC/RISCV/rvv/load.s | 8 +- llvm/test/MC/RISCV/rvv/macc.s | 8 +- llvm/test/MC/RISCV/rvv/mask.s | 8 +- llvm/test/MC/RISCV/rvv/minmax.s | 8 +- llvm/test/MC/RISCV/rvv/mul.s | 8 +- llvm/test/MC/RISCV/rvv/mv.s | 8 +- llvm/test/MC/RISCV/rvv/or.s | 8 +- llvm/test/MC/RISCV/rvv/others.s | 8 +- llvm/test/MC/RISCV/rvv/reduction.s | 8 +- llvm/test/MC/RISCV/rvv/shift.s | 8 +- llvm/test/MC/RISCV/rvv/sign-injection.s | 8 +- llvm/test/MC/RISCV/rvv/snippet.s | 4 +- llvm/test/MC/RISCV/rvv/store.s | 8 +- llvm/test/MC/RISCV/rvv/sub.s | 8 +- llvm/test/MC/RISCV/rvv/vsetvl-invalid.s | 4 +- llvm/test/MC/RISCV/rvv/vsetvl.s | 8 +- llvm/test/MC/RISCV/rvv/xor.s | 8 +- llvm/test/MC/RISCV/rvv/zvlsseg.s | 8 +- .../RISCV/masked_gather_scatter.ll | 4 +- .../LoopVectorize/RISCV/reg-usage.ll | 8 +- .../LoopVectorize/RISCV/riscv-interleaved.ll | 2 +- .../LoopVectorize/RISCV/riscv-unroll.ll | 8 +- .../RISCV/scalable-reductions.ll | 2 +- .../LoopVectorize/RISCV/scalable-vf-hint.ll | 2 +- .../RISCV/unroll-in-loop-vectorizer.ll | 2 +- .../RISCV/rvv-min-vector-size.ll | 6 +- 1078 files changed, 1638 insertions(+), 1662 deletions(-) diff --git a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c index 9ee8ce68ebc56..e80ac5e26e8cf 100644 --- a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c +++ b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c @@ -1,6 +1,6 @@ // REQUIRES: riscv-registered-target // RUN: not %clang_cc1 -triple riscv64 -fsyntax-only -verify \ -// RUN: -target-feature +experimental-v %s 2>&1 \ +// RUN: -target-feature +v %s 2>&1 \ // RUN: | FileCheck %s #include diff --git a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c index 9db7eaa3bcd11..dab41f1695e2e 100644 --- a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c +++ b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-feature +v \ // RUN: %s -o - \ // RUN: | FileCheck %s diff --git a/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c b/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c index 146ed23386028..59adc5c45521a 100644 --- a/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c +++ b/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c @@ -1,9 +1,9 @@ // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv32 -target-feature +v \ // RUN: -O2 -emit-llvm %s -o - \ // RUN: | FileCheck %s -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ // RUN: -O2 -emit-llvm %s -o - \ // RUN: | FileCheck %s diff --git a/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c b/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c index f0e405aa79c66..2217e6ea8d07e 100644 --- a/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c +++ b/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ // RUN: -dwarf-version=4 -debug-info-kind=limited -emit-llvm -o - %s \ // RUN: | FileCheck --check-prefix=DEBUGINFO %s #include diff --git a/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp b/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp index f82389487f582..c6aa2a56133f2 100644 --- a/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp +++ b/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp @@ -1,5 +1,5 @@ // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -std=c++11 -triple riscv64 -target-feature +experimental-v \ +// RUN: %clang_cc1 -std=c++11 -triple riscv64 -target-feature +v \ // RUN: -O1 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c index b3912101ddde9..9b6b966723e39 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c index 5c90d3e970b5b..db3ea976f4803 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c index 9f436c5001200..e3ff19a319483 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c index 41dfe0bd7a1c8..909da393f7665 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c index e1fbd75e0f4e6..8def45998e1b8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c index cd2c0b04288ef..bd787d9b1263f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c index ba29104b15afc..f134a7784b759 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c index 7c1e7607c8d06..e5f8d3f645c04 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c index 3afe6a35edf17..f784db297373a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c index 0d0649afcee51..0d7c709183905 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c index 7df3f86f4b0d5..3b172d9261eaf 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c index 8fad4dfc00f9a..b763497c6cf10 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c index 5431a0d53c6bf..f8c882ee23f31 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c index 4fbd882f805a8..9a222f9ff7c35 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c index b7b5ab2b1f825..4c44b69ca654a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c index 4fe2854f28e14..1a81935f88490 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c index 58577091d3563..1ae5f289ca667 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c index a4284be12b05c..a945c0bafdfa5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c index 5435e9441ab40..560d03030b8ba 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c index 60d839b982241..e42979c08f349 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c index 814989fea75dd..b3dace5e66bd4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c index a8efe8da248b2..6051fcf8dceec 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c index 5b8935e1e230d..3702b7c7b63e0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c index 22b72bbb1e702..b6c28840157a0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c index a3b7cdc97f117..b2f4fb383db44 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c index cf04131ef16ba..c4a534d11ad41 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c index 28bd617ba1567..42a257ad9c0d1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c index 6ccdd00b41837..8360fa5fa3006 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c index 6f64cbaeb9f7c..b75872317f473 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c index 0810b8204e2cc..8c19f7bd147f9 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c index cb65d923b4f4a..bff86dec3b518 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c index cf09fa1e44492..c61e6ade90eb8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c index bb66a84074138..735e0f620aa2e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c index 4ecd8afc87de1..e5d3da2c8e913 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c index 403a83c95486e..6486ded04427a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c index f350d56cfe1b3..e407104b992a2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c index 1165b32360014..e5fa14a92604b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c index d6e6a29cbc65e..e70c63f05770a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c index df8a60cc2eda0..989fd283b920f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c index 3c42e76eba88f..17615694da722 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c index 449985b16cb52..2df9853fdfe54 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c index 7bd209b1c968b..2a5bf9f06f114 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c index 624235efb8950..ef65d95f6556a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c index aeb2a27df958a..4e55316a3eeca 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c index e550c07b0dec5..c4086f06beb53 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c index 03f4275e358bc..8d5396c32933e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c index d714fb520a826..4b5ccba44758f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c index 1f8160fec2c3f..16895d5f4b48d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c index c185fadb650f0..b3eb1b5408bc8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c index 150ffa14ca803..2dd4350fa9c7a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c index bf8d1e46b0353..09728162391c4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c index 061096c9f9a0b..f5d01e1de1240 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c index ec51c9169854f..ff2acb39fd91d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c index b9839ef6752f7..60a3b74918b86 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c index 3d27463efbfa4..b6fa36f8416b6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c index 8b69c187e0c1a..395c22636a51f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c index 0df229dc803da..f40a89bfe45dc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v \ +// RUN: -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c index c2d2838027ec3..a5ef9176120be 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c index ac49608830570..bf13040961424 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c index 22df2b948634b..1a2a49f60228d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c index 4f79022bcfd92..ac1bbb169033f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v \ +// RUN: -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c index aec874cb20a25..f1a1081aacfb7 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c index 8db5e8039cafe..72068e7a822ad 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v \ +// RUN: -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c index 7c6c217a8f6c9..b42dd449964f4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c index 1c24617f7d91b..c138e62df3083 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c index 80cb56780f5f8..197fdce0d4f11 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c index 6e966a8040c8c..f03ae337bc52d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c index 744ea0fb1e32e..ee0948836af2a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c index 7fd693a4dd924..b36e8f8a153a5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c index 62d75c3d53b4d..1d1b50e60dd58 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c index 325f527dd6756..ed49ac9a0b698 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c index a0bf92a539ef1..8ac544f5caceb 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c index cad932776fe24..5ae7ade4e5df7 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c index 955cec4918891..74aaf22d9031e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c index 890682f03535d..efeacf95a675a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c index 4f6a5d68bae05..5c269c17f1ea5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c index b4d949babf665..604763c410444 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c index fea9524ec11a1..f6adc78cd8d3e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c index 073d0319a0f4a..ed28a2ccf1950 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c index 5bfe530f254d7..28a2020c525a0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c index ecb662c329265..baecf279a07ec 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c index e642a96bc8f91..33a228ef83ee1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c index 7270fd8b4fc56..7012f0ef5b822 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c index a20b73278dedd..b8657ae500932 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c index 3f7f4449cbeea..9ae2f3a7f34ab 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c index 6cba226fa9a4b..db63eef64203d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c index cade3990280db..5e16eb0a3497a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c index 39fb95f7dc8cc..2e4ccae0ca44f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c index 0ebee8ee46cc7..00fb31ecf1477 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c index 9916376f7bb30..ef05f1e825eef 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c index ed3ef05a7e3b6..7330930687e68 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c index d871e7a63c9c0..7d633ecde4265 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c index ed00804c40cf9..b6890fd8829d3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c index 886d1874a6f84..d1ca74c3da0c3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c index 89403a714b779..4b414bee7b912 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c index b5bf86b55dea5..29ccbe114c8b6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c index b547decc6124f..d25b1b4c414a3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c index 43c1db05aab72..a52e5677e01fc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c index b97e838ff1ec1..9c0683af889dc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c index ee2803020f0b0..9d3b6a0021b46 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c index fbe40dc59ec07..642055f09fab4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -target-feature +zfh -disable-O0-optnone -fallow-half-arguments-and-returns -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c index 4501e37e27fa9..c911a400be40a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c index 9ef1e0a81c3bc..315a6802b8ba0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c index ef455106e560e..526f78ad58630 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c index e1897f20e4245..1199d8fe4f79b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c index 0796133bc67eb..2b91901e0898c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c index 23767a815f6fe..ae1041584a91f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c index ad8c736225267..d5ba033a74e4a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c index 96c3e938164e9..25b7de13fa1e0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c index 247f67d69f018..d4cafe7c2b134 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c index 760e697d9398a..2308dc466cea6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c index 3d3dc87974809..ddff28ea9d6c1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c index 314c7eb81b35c..7e4ef5e0ba950 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c index fb8df2ec96217..d313ab0d0e0dc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c index 046cc4324b9e9..a5418573ac47c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c index 5c90af66cb0cd..79c0453a514ca 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c index 20a68c8ac4659..81f14546fd3fd 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c index 5db2ff82f5984..efc4ee494f7ec 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c index 714724e7ac683..573d929a62249 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c index 9bed5a4860723..0e200dc4cb546 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c index 03b3436b834ae..7d8056e097bbf 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c index 01088ee621947..c07a80a1bc776 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c index 93de0f4c364dc..415934c6e2e64 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c index 4cb7f1fb0fd95..5101560b0e4e0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c index ecafc5576da7a..6c1479515c36f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c index ba4149a34de6b..04ba0e688014a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c index d3454be64ca57..e11d591fc87cf 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v \ +// RUN: -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c index 750bb06e301c4..c7855bb0d02d0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c index 6b4fc7c0245a5..bf0e538fc331b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c index 10097d0bbba6a..026cd287e05be 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c index b72721984afa8..bae79c0c7b246 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v \ +// RUN: -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c index accf4688d452a..2427f374cdb39 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c index 0f1803a7eeb30..d393fa1214fc5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c index 2a95d46f3ae92..1b7f8eb453c92 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v \ +// RUN: -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c index cb7ed7338fc62..04b91eb785da2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c index ae661230493e6..720ddadf5123b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c index 77980ee4cac55..3ade610fb7092 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c index 3a338ad821e27..597f4442f94b6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v \ +// RUN: -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c index 74316ae7e7fc9..7d66d0940473e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c index 79a8c84ca0139..172ea75b07b42 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c index 1c245443da00c..05edc0718edb3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c index 88e5f6ca4c52a..3e6eaaa02da9f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c index f06985ca8bc88..78b5ac82be4ea 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c index 8ce7f077471df..9080a62a47bad 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c index 2f919f342b935..52d0d079453a1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c index c333b8a831517..80afbddc8d921 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c index 597db722f6141..cf6281261b421 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c index 03e4c29b552ec..c9134fc0d2bb7 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c index 131c168d76d57..f8070ce5a8312 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c index 65878d6e535b2..54f63cd2bf58d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c index 6a87d21e76e02..311881eb70192 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c index 9dbb59289abee..5e940fec0f9db 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c index 78a8f11c5aaff..f9b18186ac52b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c index 1934d1780e3a1..7d18db0cad686 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c index a97646011f4b0..18495dc881ca3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c index 138d2590f54ca..4757cd9827aa6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c index e22d4a0e78c51..a3fe942c8e875 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c index d0a7280f2f3f9..9369c03987613 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c index 8c2923167a5f7..c2c5b12aeae09 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c index 74a2b7ce000b2..84d3df136c29c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c index 17e00d7b3300b..5f01c59536145 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c index 1042f89f17059..79e8d32e85d9f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c index 1eeef95ba1b7e..052c02faa5897 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c index 350f0955822ed..48f0506267435 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c index af8b1ce8a9659..29c2b4c676021 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c index f72986fadb64c..d8e380ec8b20e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c index 8b42a34f92b8e..529cb80d3b8f8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c index 9b98bb5eb966b..0be537645af31 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c index acd398a4e501a..74148c846c436 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c index d5940d540effb..694f18bf80b87 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c index 6cf510fa781ce..50dc403299221 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c index fe5f3d104f0f6..192a5619332f2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c index 34def944086b4..0e0ae186b87dd 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c index 09f3ddaf8302b..11d3b9816df4d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c index 7ae33a87f434c..bdb90cb40e609 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c index 13c01502b1b26..c201f27cc32f3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c index ca509be0c6ebf..5c1e1c8ea9341 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c index 57fcd59d12b46..1f76f073ce36b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c index a0bbb655037d7..42717e110bd8b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c index 79f127753704d..c8de86b2b89a4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c index b7667e4f48195..1469b6addc7f2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c index 76fd1e08a8713..7428f43db11b7 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c index 2c297bf07f299..277b803b9d687 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c index e5846973e155a..9c01c32bbf96b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c index 9e5bac3347fdc..389c762266d88 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c index 13ab2517acbe2..9d4b8d9242253 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c index 97e3995012692..88a4c9aadb856 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c index d871740de6f47..87f59f8483305 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c index a5967cc844914..3cd938292e055 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c index cc32dd19d5793..51c3a6edaaa07 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c index 4ed20bd75ac0a..f247017eb3ebc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c index e9e86a8ea38a8..0254680b38d2e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c index 0ba22fcf44cbf..f9d4cb6b60b84 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c index 7bd147760fd47..afe3e32691feb 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c index eaeac9cde2a47..9db0ff78f301d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c index 30407af51d753..410b3c89738b1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c index 43721e970d0e4..dfced599a8e0f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c index 7c1f89b88daf1..be2734e47b235 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c index cf983abbbaf8b..8b0efa87d6d5c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c index 69d569a7972c8..ca7ab81f4ec8a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c index 01bd48544a73b..d88c6b40aedcb 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c index 5f5517299f701..8858fb4d862a4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c index 61c4df541d4bf..328b3241ec2e5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c index f278bb76ae630..e027e4fbe1ac6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +zfh -target-feature +experimental-v \ +// RUN: -target-feature +zfh -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c index addb7e99637b1..e35a9e8168bd0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c index deaaf0ed5ff93..d749bb880a743 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c index fba2c179a4446..0b226fd42f8d1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone \ // RUN: -fallow-half-arguments-and-returns -emit-llvm %s -o - \ // RUN: | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c index cfdf8275483f8..eadd275e76f33 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +zfh -target-feature +experimental-v \ +// RUN: -target-feature +zfh -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c index 8988dd7a6c4d7..d24890ff9c7a4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c index b83ff9833e7b9..5b39bc2957d48 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +zfh -target-feature +experimental-v \ +// RUN: -target-feature +zfh -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c index 2b729b22ceb22..478a67a16a5a0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c index cdfc612234c3f..59ba187f6a1d4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c index 7157c8e45003a..de7241136561b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c index 402280d7a8ca2..cda59a4232c54 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c index 17ace86d76fed..17a4c55b87bab 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c index 885c7cebdc76d..2b02c5f8928e5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c index 20ed58adb500e..a512ad402edb6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c index 89d83e30dac06..7f2c12cb1c9fa 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c index 0573895b22872..03fbd2d9aacc5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c index 3c098a1d3fd44..6e8d324174695 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c index d0c62282d5a7c..7ccbd7031a21b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c index 8a0c632d10a37..7700c7edf3875 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c index 44c07f8395b5a..277d463df6c6c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c index 3bf525539d4f9..bb31db3465e65 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c index 68b01c5829754..72537647164ef 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c index 67429dd14cf58..87b603119bdfd 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c index 2cbad2dd5cce9..74f23a17aebd3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c index 4e4456400139a..d6b8ee2d05edc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c index 416de0408ffa1..a6a9553fadc9e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c index 5f4947ae98b32..e6615766b0393 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c index 736045c1aa6f7..059e4fef4524a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c index 80d25a68af06e..f8e7800128fbc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c index 6f2f03940c232..42252132b8c3b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c index 04f1151644ac3..dcf5c30d25cf2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c index 8eeacfa1572c2..9e7892cd9bc90 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c index a0f3baefcd469..8cbb7b282b1a3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c index b481452032972..63d72e45a7b54 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c index 267e3671bd60b..486a15e0e3cc6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c index 7b830321cae81..5d3db18e50463 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c index ac6e551cf883c..70b142302d432 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c index 8f49944ed4ddc..5d7f7e504290e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c index 53836fbde0257..0f8517f447a13 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c index e64dd8e9224d8..4ffb4dc2efa02 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c index 1f32736c1c637..36a543923e5a8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c index 60ed6a2c9dec1..81dc0a7ee0d8f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c index f87538e3c3334..6bdb8baec8ab8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c index 674fe2ce90819..e83e29b7244c5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c index 52aea8a954556..65b97bdeccd40 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c index e8fae27f84a15..6bc003f150cda 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c index 8e843863aaae0..0736b3c35f461 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \ // RUN: -target-feature +zfh -disable-O0-optnone -fallow-half-arguments-and-returns -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c index 9fe59bbdc5604..f86d5353acd91 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c index 57e89636d702e..33d3479ec7abe 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c index 45ab3783fae5e..95630044f5e3b 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c index 2da8f15cdad75..a7c1db6c00472 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c index c886985708326..8663c837bc156 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c index 8cb4711e6b823..2f14a9c36ea2f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c index 45b23ac73553c..c262ed25567c4 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c index 004bbeca5533b..6e246ad7251e1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c index e3b2f72e5ecb4..65ab95bdf91c7 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c index 8f56bb5aff277..52097da1e6446 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c index aba47d40277af..01024f14a0407 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c index 246d62b30167b..afc7638555f82 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c index 2e277602f0fae..d7688d2642c4d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c index 9ee0cf06efcac..abfe106708dcd 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c index 00b3660c19a75..b3cac73c30f7e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c index 960a84d3ec614..10853e25db05c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c index 7c7f778d68db3..f76a5a327d0d2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c index 30ef1987507a0..29b59da5240e5 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -emit-llvm -o - %s \ // RUN: | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c index 0aef38ccfa018..d569eb425aa26 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -emit-llvm -o - %s \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -emit-llvm -o - %s \ // RUN: | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c index af844dcc44430..f8561f7d11c12 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c index a6b1b1948f98b..867fe098640d7 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c index 2709781a586f2..61d7023eb431c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c index 9089a9ac9428a..86b7f9f0ff4a3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c index 3e5e9e0c53950..ca7cada4e51b6 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c index ae3fa6b3fc0ff..a7f2dec736893 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c index 60c8465e9bac3..dd0ba266080a1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c index cba2fda30a4a3..642adfbf31c71 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c index ac8c4a412e7d0..1d077cb181647 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +zfh -target-feature +experimental-v \ +// RUN: -target-feature +zfh -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c index ee18e20b3236e..ddf1f8bce3c97 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c index b620f8812a38c..3150746b841dc 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c index dc2bcdd4d9241..48cd6f4ed0653 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c index 0829100f58ca4..47d256dc655f1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +zfh -target-feature +experimental-v \ +// RUN: -target-feature +zfh -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c index 67e727107ba69..95e9cf843fc66 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c index 2067fbba6144b..fccf57aecfb1d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c index b91c023aeede7..73af7a3061c8e 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +zfh -target-feature +experimental-v \ +// RUN: -target-feature +zfh -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c index f29d895b48c4e..dc16819e04db3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c index 4e8bdb3a7a80b..4275d13f9d714 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c index 9da42956d516e..0b8b23159954a 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c index a6bdc1ad883f3..cb4630dbb92ea 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +zfh -target-feature +experimental-v \ +// RUN: -target-feature +zfh -target-feature +v \ // RUN: -disable-O0-optnone -emit-llvm %s \ // RUN: -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c index 8815848b15186..a69c8e90952b0 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \ -// RUN: -target-feature +experimental-v -target-feature +zfh \ +// RUN: -target-feature +v -target-feature +zfh \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c index d5bef6564e0e3..77036c4d6e752 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c index 65f41d555698e..4c4c6a26861b3 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c index 8c74789447bd0..a0f993feb7c4d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c index 58a92db75c825..023133b8f331d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c index 8b0ff2bcc5f63..dd2295fccfdf9 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c index de13edb21caba..9422cd4d1b691 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c index 34ebdf43c7618..be076ea0b5039 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c index 29ea205de76c5..c4aad3d8d0491 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s #include diff --git a/clang/test/CodeGen/RISCV/rvv_errors.c b/clang/test/CodeGen/RISCV/rvv_errors.c index 40ec544d0b76b..8316a4313274a 100644 --- a/clang/test/CodeGen/RISCV/rvv_errors.c +++ b/clang/test/CodeGen/RISCV/rvv_errors.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple=riscv64 -target-feature +experimental-v -fsyntax-only -verify +// RUN: %clang_cc1 %s -triple=riscv64 -target-feature +v -fsyntax-only -verify void test() { __builtin_rvv_vsetvli(1, 7, 0); // expected-error {{argument value 7 is outside the valid range [0, 3]}} diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 36043124e4565..e81dc7b700ada 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -397,35 +397,20 @@ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ZBA %s // RV32-ZBA: "-target-feature" "+zba" -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv -### %s -c 2>&1 | \ -// RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-NOFLAG %s -// RV32-EXPERIMENTAL-V-NOFLAG: error: invalid arch name 'rv32iv' -// RV32-EXPERIMENTAL-V-NOFLAG: requires '-menable-experimental-extensions' +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p1 -### %s -c 2>&1 | \ +// RUN: FileCheck -check-prefix=RV32-V-BADVERS %s +// RV32-V-BADVERS: error: invalid arch name 'rv32iv0p1' +// RV32-V-BADVERS: unsupported version number 0.1 for extension 'v' -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv -menable-experimental-extensions -### %s -c 2>&1 | \ -// RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-NOVERS %s -// RV32-EXPERIMENTAL-V-NOVERS: error: invalid arch name 'rv32iv' -// RV32-EXPERIMENTAL-V-NOVERS: experimental extension requires explicit version number +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -### %s -c 2>&1 | \ +// RUN: FileCheck -check-prefix=RV32-V-GOODVERS %s +// RV32-V-GOODVERS: "-target-feature" "+v" -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p1 -menable-experimental-extensions -### %s -c 2>&1 | \ -// RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-BADVERS %s -// RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1' -// RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension 'v' - -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ -// RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s -// RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v" +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b0p1 -### %s -c 2>&1 | \ +// RUN: FileCheck -check-prefix=RV32-ZVL-BADVERS %s +// RV32-ZVL-BADVERS: error: invalid arch name 'rv32iv1p0_zvl32b0p1' +// RV32-ZVL-BADVERS: unsupported version number 0.1 for extension 'zvl32b' // RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -### %s -c 2>&1 | \ -// RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-NOFLAG %s -// RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv1p0_zvl32b1p0' -// RV32-EXPERIMENTAL-ZVL-NOFLAG: requires '-menable-experimental-extensions' - -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b0p1 -menable-experimental-extensions -### %s -c 2>&1 | \ -// RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-BADVERS %s -// RV32-EXPERIMENTAL-ZVL-BADVERS: error: invalid arch name 'rv32iv1p0_zvl32b0p1' -// RV32-EXPERIMENTAL-ZVL-BADVERS: unsupported version number 0.1 for experimental extension - -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -menable-experimental-extensions -### %s -c 2>&1 | \ -// RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-GOODVERS %s -// RV32-EXPERIMENTAL-ZVL-GOODVERS: "-target-feature" "+experimental-zvl32b" +// RUN: FileCheck -check-prefix=RV32-ZVL-GOODVERS %s +// RV32-ZVL-GOODVERS: "-target-feature" "+zvl32b" diff --git a/clang/test/Headers/riscv-vector-header.c b/clang/test/Headers/riscv-vector-header.c index ce618b5a717e1..70db4d63c276e 100644 --- a/clang/test/Headers/riscv-vector-header.c +++ b/clang/test/Headers/riscv-vector-header.c @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -triple riscv64 -fsyntax-only \ // RUN: -target-feature +m -target-feature +a -target-feature +f \ -// RUN: -target-feature +d -target-feature +experimental-v %s +// RUN: -target-feature +d -target-feature +v %s // expected-no-diagnostics #include diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index ec356cee7426e..73cb0b1a71c6f 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -211,10 +211,10 @@ // CHECK-ZBT-NOT: __riscv_b // CHECK-ZBT-EXT: __riscv_zbt 93000{{$}} -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv32iv1p0 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv64-unknown-linux-gnu \ // RUN: -march=rv64iv1p0 -x c -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s // CHECK-V-EXT: __riscv_v 1000000{{$}} @@ -236,59 +236,59 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZFH-EXT %s // CHECK-ZFH-EXT: __riscv_zfh 1000000{{$}} -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-V-MINVLEN %s // CHECK-V-MINVLEN: __riscv_v_elen 64 // CHECK-V-MINVLEN: __riscv_v_elen_fp 64 // CHECK-V-MINVLEN: __riscv_v_min_vlen 128 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl256b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL256b %s // CHECK-ZVL256b: __riscv_v_min_vlen 256 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl512b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL512b %s // CHECK-ZVL512b: __riscv_v_min_vlen 512 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl1024b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL1024b %s // CHECK-ZVL1024b: __riscv_v_min_vlen 1024 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl2048b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL2048b %s // CHECK-ZVL2048b: __riscv_v_min_vlen 2048 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl4096b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL4096b %s // CHECK-ZVL4096b: __riscv_v_min_vlen 4096 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl8192b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL8192b %s // CHECK-ZVL8192b: __riscv_v_min_vlen 8192 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl16384b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL16384b %s // CHECK-ZVL16384b: __riscv_v_min_vlen 16384 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl32768b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL32768b %s // CHECK-ZVL32768b: __riscv_v_min_vlen 32768 -// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv32-unknown-linux-gnu \ // RUN: -march=rv64iv1p0_zvl65536b1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVL65536b %s // CHECK-ZVL65536b: __riscv_v_min_vlen 65536 -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv64-unknown-linux-gnu \ // RUN: -march=rv64ifdzve64d1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE64D-EXT %s // CHECK-ZVE64D-EXT: __riscv_v_elen 64 @@ -301,7 +301,7 @@ // CHECK-ZVE64D-EXT: __riscv_zve64f 1000000{{$}} // CHECK-ZVE64D-EXT: __riscv_zve64x 1000000{{$}} -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv64-unknown-linux-gnu \ // RUN: -march=rv64ifzve64f1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE64F-EXT %s // CHECK-ZVE64F-EXT: __riscv_v_elen 64 @@ -313,7 +313,7 @@ // CHECK-ZVE64F-EXT: __riscv_zve64f 1000000{{$}} // CHECK-ZVE64F-EXT: __riscv_zve64x 1000000{{$}} -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv64-unknown-linux-gnu \ // RUN: -march=rv64izve64x1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE64X-EXT %s // CHECK-ZVE64X-EXT: __riscv_v_elen 64 @@ -323,7 +323,7 @@ // CHECK-ZVE64X-EXT: __riscv_zve32x 1000000{{$}} // CHECK-ZVE64X-EXT: __riscv_zve64x 1000000{{$}} -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv64-unknown-linux-gnu \ // RUN: -march=rv64ifzve32f1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE32F-EXT %s // CHECK-ZVE32F-EXT: __riscv_v_elen 32 @@ -333,7 +333,7 @@ // CHECK-ZVE32F-EXT: __riscv_zve32f 1000000{{$}} // CHECK-ZVE32F-EXT: __riscv_zve32x 1000000{{$}} -// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \ +// RUN: %clang -target riscv64-unknown-linux-gnu \ // RUN: -march=rv64izve32x1p0 -x c -E -dM %s -o - \ // RUN: | FileCheck --check-prefix=CHECK-ZVE32X-EXT %s // CHECK-ZVE32X-EXT: __riscv_v_elen 32 diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c index 0d09546603b66..1be20688cc7d5 100644 --- a/clang/test/Sema/riscv-types.c +++ b/clang/test/Sema/riscv-types.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -ast-print %s \ +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -ast-print %s \ // RUN: | FileCheck %s void bar(void) { diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index ea2d0b8d2f2f8..837226f4e2a54 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -1034,7 +1034,7 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) { OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n"; OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, " - "ATTRS, \"experimental-zve32x\")\n"; + "ATTRS, \"zve32x\")\n"; OS << "#endif\n"; for (auto &Def : Defs) { auto P = diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index c42f3604d67ff..f8a17f7440f6a 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -58,17 +58,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zbkb", RISCVExtensionVersion{1, 0}}, {"zbkc", RISCVExtensionVersion{1, 0}}, -}; -static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"v", RISCVExtensionVersion{1, 0}}, - {"zbe", RISCVExtensionVersion{0, 93}}, - {"zbf", RISCVExtensionVersion{0, 93}}, - {"zbm", RISCVExtensionVersion{0, 93}}, - {"zbp", RISCVExtensionVersion{0, 93}}, - {"zbr", RISCVExtensionVersion{0, 93}}, - {"zbt", RISCVExtensionVersion{0, 93}}, - {"zvl32b", RISCVExtensionVersion{1, 0}}, {"zvl64b", RISCVExtensionVersion{1, 0}}, {"zvl128b", RISCVExtensionVersion{1, 0}}, @@ -88,6 +79,15 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zve64d", RISCVExtensionVersion{1, 0}}, }; +static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { + {"zbe", RISCVExtensionVersion{0, 93}}, + {"zbf", RISCVExtensionVersion{0, 93}}, + {"zbm", RISCVExtensionVersion{0, 93}}, + {"zbp", RISCVExtensionVersion{0, 93}}, + {"zbr", RISCVExtensionVersion{0, 93}}, + {"zbt", RISCVExtensionVersion{0, 93}}, +}; + static bool stripExperimentalPrefix(StringRef &Ext) { return Ext.consume_front("experimental-"); } diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 2b6ea4067a8ea..dea042348d4db 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -185,48 +185,48 @@ def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">, AssemblerPredicate<(all_of(not FeatureNoRVCHints)), "RVC Hint Instructions">; -def FeatureStdExtZvl32b : SubtargetFeature<"experimental-zvl32b", "ZvlLen", "ExtZvl::Zvl32b", +def FeatureStdExtZvl32b : SubtargetFeature<"zvl32b", "ZvlLen", "ExtZvl::Zvl32b", "'Zvl' (Minimum Vector Length) 32">; foreach i = { 6-15 } in { defvar I = !shl(1, i); def FeatureStdExtZvl#I#b : - SubtargetFeature<"experimental-zvl"#I#"b", "ZvlLen", "ExtZvl::Zvl"#I#"b", + SubtargetFeature<"zvl"#I#"b", "ZvlLen", "ExtZvl::Zvl"#I#"b", "'Zvl' (Minimum Vector Length) "#I, [!cast("FeatureStdExtZvl"#!srl(I, 1)#"b")]>; } def FeatureStdExtZve32x - : SubtargetFeature<"experimental-zve32x", "HasStdExtZve32x", "true", + : SubtargetFeature<"zve32x", "HasStdExtZve32x", "true", "'Zve32x' (Vector Extensions for Embedded Processors " "with maximal 32 EEW)", [FeatureStdExtZvl32b]>; def FeatureStdExtZve32f - : SubtargetFeature<"experimental-zve32f", "HasStdExtZve32f", "true", + : SubtargetFeature<"zve32f", "HasStdExtZve32f", "true", "'Zve32f' (Vector Extensions for Embedded Processors " "with maximal 32 EEW and F extension)", [FeatureStdExtZve32x]>; def FeatureStdExtZve64x - : SubtargetFeature<"experimental-zve64x", "HasStdExtZve64x", "true", + : SubtargetFeature<"zve64x", "HasStdExtZve64x", "true", "'Zve64x' (Vector Extensions for Embedded Processors " "with maximal 64 EEW)", [FeatureStdExtZve32x, FeatureStdExtZvl64b]>; def FeatureStdExtZve64f - : SubtargetFeature<"experimental-zve64f", "HasStdExtZve64f", "true", + : SubtargetFeature<"zve64f", "HasStdExtZve64f", "true", "'Zve64f' (Vector Extensions for Embedded Processors " "with maximal 64 EEW and F extension)", [FeatureStdExtZve32f, FeatureStdExtZve64x]>; def FeatureStdExtZve64d - : SubtargetFeature<"experimental-zve64d", "HasStdExtZve64d", "true", + : SubtargetFeature<"zve64d", "HasStdExtZve64d", "true", "'Zve64d' (Vector Extensions for Embedded Processors " "with maximal 64 EEW, F and D extension)", [FeatureStdExtZve64f]>; def FeatureStdExtV - : SubtargetFeature<"experimental-v", "HasStdExtV", "true", + : SubtargetFeature<"v", "HasStdExtV", "true", "'V' (Vector Extension for Application Processors)", [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>; diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll index 012a02ff881a9..baa1d412abf26 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s ; Check that we don't crash querying costs when vectors are not enabled. ; RUN: opt -cost-model -analyze -mtriple=riscv64 diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll index fcb576c5ed385..d27712d155ef0 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s ; Check that we don't crash querying costs when vectors are not enabled. ; RUN: opt -cost-model -analyze -mtriple=riscv64 diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll index 56c175e38e9fd..497c16d2e2a6e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; Check getShuffleCost for scalable vector -; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s define void @vector_broadcast() { ; CHECK-LABEL: 'vector_broadcast' diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 19ba02d531567..d37d86fc2f04d 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -17,8 +17,8 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbr %s -o - | FileCheck --check-prefix=RV32ZBR %s ; RUN: llc -mtriple=riscv32 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV32ZBS %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV32ZBT %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s -; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s +; RUN: llc -mtriple=riscv32 -mattr=+v %s -o - | FileCheck --check-prefix=RV32V %s +; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s ; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s ; RUN: llc -mtriple=riscv32 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV32ZBKC %s ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s @@ -38,8 +38,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbr %s -o - | FileCheck --check-prefix=RV64ZBR %s ; RUN: llc -mtriple=riscv64 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV64ZBS %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV64ZBT %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s -; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s +; RUN: llc -mtriple=riscv64 -mattr=+v %s -o - | FileCheck --check-prefix=RV64V %s +; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s ; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s ; RUN: llc -mtriple=riscv64 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV64ZBKC %s diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll index 6a144032e866a..33dd10ad52013 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s | FileCheck %s ; i32 saturate diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll index 575a09b7922b9..c00df3ff4ff3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare @llvm.abs.nxv1i16(, i1) diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll index e46879448d6c2..4216c5952eae8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck %s -check-prefix=RV64IV define @access_fixed_object(i64 *%val) { diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir index b9be04a3306d3..8bef6530a0081 100644 --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s +# RUN: llc -march=riscv64 -mattr=+v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s --- | define void @add_scalable_offset( diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 065e5dee2f930..ae64fbc93e237 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s define void @lmul1() nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll index d9ef5cc385f93..8451aeec34c67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @bitreverse_nxv1i8( %va) { ; CHECK-LABEL: bitreverse_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll index 8c1fa06861681..2b81cac3d67ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @bswap_nxv1i16( %va) { ; CHECK-LABEL: bswap_nxv1i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index ca0abc5e40031..37fd1a7424c3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 define fastcc @ret_nxv4i8(* %p) { diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll index 009b71437bf9c..39fbdf32e6a52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV64 ; Check that we correctly scale the split part indirect offsets by VSCALE. define @callee_scalable_vector_split_indirect( %x, %y) { diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll index 5400f86e75752..8f362b6f4d19d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 ; fold (add (umax X, C), -C) --> (usubsat X, C) diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll index 1e0180f9ad97e..969f37b338d53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s ; fold (and (or x, C), D) -> D if (C & D) == D diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll index b7ef676a67258..fdbdc37df47c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s define void @combine_fp_zero_stores_crash(float* %ptr) { ; CHECK-LABEL: combine_fp_zero_stores_crash: diff --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll index 2b8accc38454c..daad5e051c9d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll +++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64-unknown-unknown-elf" diff --git a/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir b/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir index c319ba59c2967..870e99f1290b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir +++ b/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -mattr=+experimental-v -run-pass=simple-register-coalescing %s -o - 2>&1 | FileCheck %s +# RUN: llc -march=riscv64 -mattr=+v -run-pass=simple-register-coalescing %s -o - 2>&1 | FileCheck %s # This test used to crash in the register coalescer when the target would # return the out-of-bounds CommuteAnyOperandIndex for one of its commutable diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll index 31bde5a895315..5850c7c1937e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 ; These tests check that the scalable-vector version of this series of diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index b399de99ccd92..7fb412094054c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D define @ctlz_nxv1i8( %va) { ; RV32I-LABEL: ctlz_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll index 0a5b2bf3caa01..21de10f5f87ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @ctpop_nxv1i8( %va) { ; CHECK-LABEL: ctpop_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index f5148450e1c23..3326407581171 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D +; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D define @cttz_nxv1i8( %va) { ; RV32I-LABEL: cttz_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir b/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir index 574905662f8b5..67865ce5f433f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir +++ b/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir @@ -1,6 +1,6 @@ -# RUN: llc -march=riscv64 -mattr=+experimental-v -o %t0 -filetype=obj \ +# RUN: llc -march=riscv64 -mattr=+v -o %t0 -filetype=obj \ # RUN: -start-before=prologepilog %s -# RUN: llc -march=riscv64 -mattr=+experimental-v -o %t1 -filetype=obj \ +# RUN: llc -march=riscv64 -mattr=+v -o %t1 -filetype=obj \ # RUN: -frame-pointer=all -start-before=prologepilog %s # RUN: llvm-dwarfdump --name="value0" %t0 | FileCheck %s --check-prefix=CHECK0-PLUS # RUN: llvm-dwarfdump --name="value1" %t0 | FileCheck %s --check-prefix=CHECK1-PLUS diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index 8b33e981854d3..9c283bc3d6830 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v -run-pass=prologepilog -o - \ +# RUN: llc -mtriple riscv64 -mattr=+m,+v -run-pass=prologepilog -o - \ # RUN: -verify-machineinstrs %s | FileCheck %s --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" diff --git a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll index 596cc0a7681d4..5ecfae181a707 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s define @sextload_nxv1i1_nxv1i8(* %x) { ; CHECK-LABEL: sextload_nxv1i1_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index e99b7b0f79043..d8736937e6eba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s define @extract_nxv8i32_nxv4i32_0( %vec) { ; CHECK-LABEL: extract_nxv8i32_nxv4i32_0: diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll index 2964d2aea09b9..950ca3bde2c88 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define half @extractelt_nxv1f16_0( %v) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll index 1690f42ff036f..b5525d2337047 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define half @extractelt_nxv1f16_0( %v) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 27d609e6f8a7c..d712ced930116 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define i1 @extractelt_nxv1i1(* %x, i64 %idx) nounwind { ; CHECK-LABEL: extractelt_nxv1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll index c50881a4b63bf..2062aaa184125 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define signext i8 @extractelt_nxv1i8_0( %v) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll index 0c0685697eb11..281182ba33f09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define signext i8 @extractelt_nxv1i8_0( %v) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll index d8f16d90d5353..ccdbf4c08f484 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @ceil_nxv1f16( %x) { diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll index 210136734821b..f3e59dc4281fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @floor_nxv1f16( %x) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll index 1bc2adb71c720..376f87345494f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s +; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s ; This contains negative tests for the strided load/store recognition in ; RISCVGatherScatterLowering.cpp diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll index bde5c515a1425..bddbcdb5fb84d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll @@ -1,5 +1,5 @@ -; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefix=CHECK-ASM +; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefix=CHECK-ASM %struct.foo = type { i32, i32, i32, i32 } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll index 519f042fd92a0..fee2145d9c44a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 define void @abs_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: abs_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll index ba251f9fd3352..bbb2c2e6ae280 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024 define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) { ; VLEN256-LABEL: bitcast_1024B: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll index 3043745e9f927..11952a6e959a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: -riscv-v-vector-bits-min=128 -target-abi=ilp32d < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: -riscv-v-vector-bits-min=128 -target-abi=lp64d < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: -riscv-v-vector-bits-min=128 \ ; RUN: -riscv-v-fixed-length-vector-elen-max=32 -target-abi=ilp32d < %s \ ; RUN: | FileCheck %s --check-prefixes=ELEN32,RV32ELEN32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: -riscv-v-vector-bits-min=128 \ ; RUN: -riscv-v-fixed-length-vector-elen-max=32 -target-abi=lp64d < %s \ ; RUN: | FileCheck %s --check-prefixes=ELEN32,RV64ELEN32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll index bde7f07d1b53b..e97a7c98f4dcd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 define void @bitreverse_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; LMULMAX2-RV32-LABEL: bitreverse_v8i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index b27469939cf3f..b02c4f6383710 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 define void @bswap_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; LMULMAX2-RV32-LABEL: bswap_v8i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll index 1afa7b94221ae..90900c60ee44a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 define fastcc <4 x i8> @ret_v4i8(<4 x i8>* %p) { ; CHECK-LABEL: ret_v4i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll index 13b856e00a7c5..bc6599de8e676 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define <4 x i8> @ret_v4i8(<4 x i8>* %p) { ; CHECK-LABEL: ret_v4i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index b53641f5cb33c..a67d54f8e7275 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64 define void @ctlz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind { ; LMULMAX2-RV32-LABEL: ctlz_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index 24afb1d6ee5c6..afa391bc83e39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 define void @ctpop_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: ctpop_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 2a7680a22acab..3059d1777c362 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64 define void @cttz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind { ; LMULMAX2-RV32-LABEL: cttz_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll index d84679b8b0c45..99f64e73af2cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; Test that limiting ELEN, either through the command line or zve32, scalarizes ; elements larger than that and disables some fractional LMULs. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir index 21c01a82dba4e..c2fece9dbf6ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -# RUN: llc -mtriple riscv64 -mattr=+experimental-v -start-before=prologepilog -o - \ +# RUN: llc -mtriple riscv64 -mattr=+v -start-before=prologepilog -o - \ # RUN: -verify-machineinstrs %s | FileCheck %s --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index 063d4f380faf3..df657d83d5e7f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 define <2 x i16> @sextload_v2i1_v2i16(<2 x i1>* %x) { ; CHECK-LABEL: sextload_v2i1_v2i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index d9ade0f525469..e2236b3249d1f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define i1 @extractelt_v1i1(<1 x i8>* %x, i64 %idx) nounwind { ; CHECK-LABEL: extractelt_v1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index 633251e96362c..7eb5bd53df45c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @extract_v2i8_v4i8_0(<4 x i8>* %x, <2 x i8>* %y) { ; CHECK-LABEL: extract_v2i8_v4i8_0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index be851b3c2458f..124552d37a126 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define i8 @extractelt_v16i8(<16 x i8>* %x) nounwind { ; CHECK-LABEL: extractelt_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll index 33923f299a940..bc829dd463404 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32-FP -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64-FP +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32-FP +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64-FP define i16 @bitcast_v1f16_i16(<1 x half> %a) { ; CHECK-LABEL: bitcast_v1f16_i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index fb752e82a4ed6..d1a5b516d7f9c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 ; Tests that a floating-point build_vector doesn't try and generate a VID ; instruction diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll index 5050af573bfd5..a87128bb9b8de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @fpext_v2f16_v2f32(<2 x half>* %x, <2 x float>* %y) { ; CHECK-LABEL: fpext_v2f16_v2f32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index 3f57fcea6acc5..14ec588a640b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 ; Test optimizing interleaves to widening arithmetic. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll index f31cfe1bd5518..32d272512c0b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s define void @fcmp_oeq_vv_v8f16(<8 x half>* %x, <8 x half>* %y, <8 x i1>* %z) { ; CHECK-LABEL: fcmp_oeq_vv_v8f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 25a078cd5239b..ecd4c85b4f56f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) { ; CHECK-LABEL: shuffle_v4f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll index c6070436767a3..9fb7b4927f71f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @splat_v8f16(<8 x half>* %x, half %y) { ; CHECK-LABEL: splat_v8f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll index 9e3988caba6fd..1478249d2ec9c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @gather_const_v8f16(<8 x half>* %x) { ; CHECK-LABEL: gather_const_v8f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index fd1a5cb54192e..cb316141e5b72 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 define void @fadd_v8f16(<8 x half>* %x, <8 x half>* %y) { ; CHECK-LABEL: fadd_v8f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index f125dfd67c048..0f3efd2daaacd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @fp2si_v2f32_v2i32(<2 x float>* %x, <2 x i32>* %y) { ; CHECK-LABEL: fp2si_v2f32_v2i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll index 8d496e40a466a..9d0a00df4744d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @si2fp_v2i32_v2f32(<2 x i32>* %x, <2 x float>* %y) { ; CHECK-LABEL: si2fp_v2i32_v2f32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll index b375720256434..4bc46c60a87f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 define <1 x i1> @insertelt_v1i1(<1 x i1> %x, i1 %elt) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 01a8870ab2216..6bee4f83a47d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 - -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 + +; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define @insert_nxv8i32_v2i32_0( %vec, <2 x i32>* %svp) { ; CHECK-LABEL: insert_nxv8i32_v2i32_0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 79195533f43c7..af2737a765a79 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; FIXME: This codegen needs to be improved. These tests previously asserted ; type legalizing the i64 type on RV32. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 5a6b1f2126edb..f4bb61440f57d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @buildvec_vid_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: buildvec_vid_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll index 89dd2bded9402..4482c653ad4d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @sext_v4i8_v4i32(<4 x i8>* %x, <4 x i32>* %z) { ; CHECK-LABEL: sext_v4i8_v4i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index a17a83169373b..f42e327942f1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512 ; Test optimizing interleaves to widening arithmetic. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll index 40479b0fb343b..9d0fb925a46e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s ; FIXME: We use exclusively byte types here because the MVT we use for the ; stores is calculated assuming byte elements. We need to deal with mismatched diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 1b8f1d246cd20..b727a27d53a2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: shuffle_v4i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll index 9e3c5ff0e4854..194bf63ccc553 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 define void @splat_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: splat_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll index 7cd9807bff727..576de147329cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define void @gather_const_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: gather_const_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index faa8ff5cd2e95..2b3138cc76e73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64 define void @add_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: add_v16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll index 632fe1929512d..1b8da18a3d140 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index 56b3e3d89cb72..6dffc1bd6fd4d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -1,19 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX4 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX4 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8 ; Test with ELEN limited -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN32 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN16 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN16 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN8 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN8 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN32 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN16 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN16 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN8 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN8 define <1 x i1> @buildvec_mask_nonconst_v1i1(i1 %x) { ; CHECK-LABEL: buildvec_mask_nonconst_v1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll index d7ff4203017c4..924d20e8aff80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 define void @load_store_v1i1(<1 x i1>* %x, <1 x i1>* %y) { ; CHECK-LABEL: load_store_v1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll index 73968f06a963a..c37e4f8d882b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s define void @and_v8i1(<8 x i1>* %x, <8 x i1>* %y) { ; CHECK-LABEL: and_v8i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 644a39aada178..21cb21cae7671 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 define void @splat_ones_v1i1(<1 x i1>* %x) { ; CHECK-LABEL: splat_ones_v1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 4af789ee9963d..c3f7772878d44 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 declare <1 x i8> @llvm.masked.gather.v1i8.v1p0i8(<1 x i8*>, i32, <1 x i1>, <1 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index d75f1e3530b04..49569bbd23822 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @masked_load_v1f16(<1 x half>* %a, <1 x half>* %m_ptr, <1 x half>* %res_ptr) nounwind { ; CHECK-LABEL: masked_load_v1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index 55edce865c5fb..39bcc2dc83d41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @masked_load_v1i8(<1 x i8>* %a, <1 x i8>* %m_ptr, <1 x i8>* %res_ptr) nounwind { ; CHECK-LABEL: masked_load_v1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 5ffb77f09e2b8..f357ae3575122 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 declare void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8>, <1 x i8*>, i32, <1 x i1>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll index 33824b182ab91..79d9943422b6e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @masked_store_v1f16(<1 x half>* %val_ptr, <1 x half>* %a, <1 x half>* %m_ptr) nounwind { ; CHECK-LABEL: masked_store_v1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index 035f9021fd856..01492be088f06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @masked_store_v1i8(<1 x i8>* %val_ptr, <1 x i8>* %a, <1 x i8>* %m_ptr) nounwind { ; CHECK-LABEL: masked_store_v1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll index 3410a1a836167..54fae0066599a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare half @llvm.vp.reduce.fadd.v2f16(half, <2 x half>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index e37227ef22261..a2727051affec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 405d7aa6b98f9..2ec06885496e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 8ee206dd8e164..4844db616b6fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll index 59ebdcdbc0485..7577eaaba491d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s declare i1 @llvm.vp.reduce.and.v1i1(i1, <1 x i1>, <1 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll index 59c6197aadd97..994d2ace14b64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll index d452e525c6788..c38023a8d6488 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 define <1 x i1> @select_v1i1(i1 zeroext %c, <1 x i1> %a, <1 x i1> %b) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll index 129cbd1060beb..bc9a583dbbbe7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 declare <2 x i8> @llvm.experimental.stepvector.v2i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll index 3c3e70c6b49d3..b1cfc28e9ed3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 declare <2 x i8> @llvm.experimental.stepvector.v2i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll index 97a2e85f4f46e..b6b64d84bb3b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+experimental-v \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+v \ ; RUN: -riscv-v-vector-bits-min=128 | FileCheck %s ; This test loads to values and stores them in reversed order. This previously diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 6402c25d068cd..d6d1e20b7013e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 define <4 x i32> @load_v4i32_align1(<4 x i32>* %ptr) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index bd29f51a93919..6d451865124d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.add.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index 419a9d4b5b1bc..01a146d155b13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.and.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll index b7dac4e289044..8605aa1f2114e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll index 246dd3100cb1f..443af68b3eee7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.udiv.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll index 215daf4f92f51..86aa970c865f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fadd.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll index 2135fdfe96dbc..ad5e4a6031125 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fdiv.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll index ea76152cc2b74..b7d76815e7d32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll index 67e07b5b9f137..dfdfd8b42010f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll index 06c0619a80544..45274f8568aaa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fmul.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll index 836ccd5581d14..c7a4364b8d497 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fdiv.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll index 60027d26be99a..bdf7c6601d8d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fsub.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll index cfcf8c18f7a29..3d9a771958f20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fsub.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll index 976d12eab3d72..787e0c2426457 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.mul.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll index ad00d1e4e0abe..84f03cc78e601 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) { ; CHECK-LABEL: vnsra_v8i16_v8i8_scalar: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll index b1807c1b29de6..00604c38a6bae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.or.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index d41fd7bb068c5..7584a1507e706 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare <2 x i8> @llvm.vp.gather.v2i8.v2p0i8(<2 x i8*>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index ad21c399c15f6..cf858875d8285 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x i8> @llvm.vp.load.v2i8.p0v2i8(<2 x i8>*, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index 1e5db6e8ae907..1d5cad5d40212 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare void @llvm.vp.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index 9781843701362..08f659c2ffea2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.v2i8.p0v2i8(<2 x i8>, <2 x i8>*, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll index f519a81965aee..8bee712ce4782 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll index 9d58cb6bc49a5..380e0096b3f04 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.srem.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll index 30be5a51002e6..4585a8841008b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.urem.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll index 3bb9b61b949e3..f17feb93b84d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll index 293a94ff1e422..dac996a262594 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll index 057687fc41a52..f3a288c4b2aad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index f57d1ee56f059..a28bfa17144e5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 154629079ce7e..fc5a136633b4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s define void @vselect_vv_v8i32(<8 x i32>* %a, <8 x i32>* %b, <8 x i1>* %cc, <8 x i32>* %z) { ; CHECK-LABEL: vselect_vv_v8i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll index 2c288ecc9480c..ecbdaef193f96 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.shl.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll index 84f7a2e349033..54a4de66668ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.ashr.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll index 265acfbf82c0e..75fc113854fbc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.lshr.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll index f439cefb128b8..e9ef8b6971acb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll index 6eaadbd4c79e1..1a7cba285437f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll index e4d100c44407f..3f039c61f6230 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.sub.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll index 3bcd29572a494..86dd1a216cc53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define <2 x i16> @vwmacc_v2i16(<2 x i8>* %x, <2 x i8>* %y, <2 x i16> %z) { ; CHECK-LABEL: vwmacc_v2i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll index fe97f7894cfd7..358f7eb7b992a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define <2 x i16> @vwmaccu_v2i16(<2 x i8>* %x, <2 x i8>* %y, <2 x i16> %z) { ; CHECK-LABEL: vwmaccu_v2i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll index d2553b8accb20..e7180587f9779 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwmul_v2i16(<2 x i8>* %x, <2 x i8>* %y) { ; CHECK-LABEL: vwmul_v2i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll index 4251922284900..8e9a6598acd1c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define <2 x i16> @vwmulu_v2i16(<2 x i8>* %x, <2 x i8>* %y) { ; CHECK-LABEL: vwmulu_v2i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll index 207f12ff3822e..bd4a53f7b8d7e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.xor.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll index 31614ed25bbe1..dc2bbe8bfbb25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s ; This test makes sure we match FrameIndex into the base address. ; Done as a MIR test because eliminateFrameIndex will likely turn it diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll index dfc698ded242c..cdfdd889ed01c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @trunc_nxv1f16( %x) { diff --git a/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir b/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir index 8da8785956668..04ec1f3a24f48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir +++ b/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -mattr=+experimental-v -o - %s \ +# RUN: llc -march=riscv64 -mattr=+v -o - %s \ # RUN: -stop-after=prologepilog | FileCheck %s --- | diff --git a/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll b/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll index ea0d18ebdf46e..6dbebb656b66b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v < %s \ ; RUN: --verify-machineinstrs | FileCheck %s define @test_1xi1( %in, %in2) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index 5f495e7f4bab4..08c9fcdead77c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s define @insert_nxv8i32_nxv4i32_0( %vec, %subvec) { ; CHECK-LABEL: insert_nxv8i32_nxv4i32_0: diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll index 2fa9efbfbf51c..4c6ae4eac3b37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @insertelt_nxv1f16_0( %v, half %elt) { diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll index 402e566f573dd..baa9c1532071a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @insertelt_nxv1f16_0( %v, half %elt) { diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll index cbe815d3b31bc..da0f5c06c63ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @insertelt_nxv1i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll index 99dbe7e23c0da..e2045ce254a1f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @insertelt_nxv1i8_0( %v, i8 signext %elt) { diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 19253cf635ec8..5c147c60404ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @insertelt_nxv1i8_0( %v, i8 signext %elt) { diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll index 5a0a91d3ef3c6..e4dd3b8a6c9ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=RV64-1024 -; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=2048 < %s | FileCheck %s --check-prefix=RV64-2048 +; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=RV64-1024 +; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v -riscv-v-vector-bits-min=2048 < %s | FileCheck %s --check-prefix=RV64-2048 define void @interleave256(<256 x i16>* %agg.result, <128 x i16>* %0, <128 x i16>* %1) { ; RV64-1024-LABEL: interleave256: diff --git a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir index 0a23c11b54a15..1ef8f19c8b267 100644 --- a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir +++ b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -# RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v -start-before=prologepilog -o - \ +# RUN: llc -mtriple riscv64 -mattr=+m,+v -start-before=prologepilog -o - \ # RUN: -verify-machineinstrs %s | FileCheck %s --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll index 5ce9ee9589983..29428288b551c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s ; Check that we are able to legalize scalable-vector loads that require widening. diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll index 46e1c0a4d8a5a..7c423a20a0629 100644 --- a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll +++ b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @trunc_nxv4i32_to_nxv4i5( %a) { ; CHECK-LABEL: trunc_nxv4i32_to_nxv4i5: diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll index dfb58e2f6ab78..213fe1095515d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s ; Check that we are able to legalize scalable-vector stores that require widening. diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll index 4d0cf0fae6100..bfbceb4c04ca4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s define void @vadd_vint16m1( *%pc, *%pa, *%pb) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll index 73874e2782c16..22a59b3ade70e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s define void @vadd_vint32m1( *%pc, *%pa, *%pb) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll index 4edad9c1742e3..f091027e6bb7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s define void @vadd_vint64m1( *%pc, *%pa, *%pb) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll index 793c2dc8cdcb5..25131a2909ca9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s define void @vadd_vint8m1( *%pc, *%pa, *%pb) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/load-mask.ll b/llvm/test/CodeGen/RISCV/rvv/load-mask.ll index 4026061158943..6d73a2d9615ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/load-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/load-mask.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \ +; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \ ; RUN: -verify-machineinstrs | FileCheck %s define void @test_load_mask_64(* %pa, * %pb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll index 48f9bed5b6b59..0f7fa0b9a44ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v < %s \ ; RUN: | FileCheck %s -check-prefix=RV64IV define void @local_var_mf8() { diff --git a/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll index afd62aebf0a48..5ac40ad8a9235 100644 --- a/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll index 886f2c32b66ca..a123b2ee6be32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @sext_nxv1i1_nxv1i8( %v) { ; CHECK-LABEL: sext_nxv1i1_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll index c6801feaf5345..6a337320d04c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @sext_nxv1i1_nxv1i8( %v) { ; CHECK-LABEL: sext_nxv1i1_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir index 9cce462522d6b..60f02d0b139ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir +++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +# RUN: llc -march=riscv64 -mattr=+v -verify-machineinstrs \ # RUN: -start-after finalize-isel -stop-after prologepilog -o - %s | FileCheck %s --- | diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll index 39a9aad1b8931..3e79ae9dc3e75 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s define @masked_load_nxv1f16(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll index 3574e4312ed33..6e6d920a0723e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @masked_load_nxv1i8(* %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll index aad12b1c3d079..de21fcf167505 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s define void @masked_store_nxv1f16( %val, * %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll index 184b3af497525..8dde101486448 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define void @masked_store_nxv1i8( %val, * %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll index 97d1956ce1270..c8d55a2e65c86 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck %s -check-prefix=RV64IV declare @llvm.riscv.vmacc.nxv64i8.nxv64i8( diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index b1e55a3f2390b..31731af18b6b3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 declare @llvm.masked.gather.nxv1i8.nxv1p0i8(, i32, , ) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index a4f7dbd8e748f..f82dcca4ce65b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(, , i32, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll index b017d9a3b68be..40ede841d9391 100644 --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256 -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512 ; ; VECTOR_REVERSE - masks diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll index 8ac3073199689..de2bc6353c820 100644 --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s define signext i32 @foo(i32 signext %aa) #0 { diff --git a/llvm/test/CodeGen/RISCV/rvv/pr52475.ll b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll index 5e1e94965f1fa..e2199a6ae9366 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr52475.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \ +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \ ; RUN: -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \ +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \ ; RUN: -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) { diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll index 5f926efd9012c..bad1af3c0ac36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+f -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+f -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define void @foo(i32* nocapture noundef %p1) { diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir b/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir index 504dafc50d8e0..e7e0ec614172a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir +++ b/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -mattr=+experimental-v -run-pass=simple-register-coalescing -o - | FileCheck %s +# RUN: llc %s -mtriple=riscv64 -mattr=+v -run-pass=simple-register-coalescing -o - | FileCheck %s --- # Make sure that SrcReg & DstReg of PseudoVRGATHER are not coalesced name: test_earlyclobber diff --git a/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll index 3c1c5d372eef8..41275917bfefb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh,+m \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+m \ ; RUN: -regalloc=fast -verify-machineinstrs < %s | FileCheck %s ; This test previously crashed with an error "ran out of registers during register allocation" diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index b4db000233f34..314c41296c72a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -O0 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -O2 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll index 96699bb541209..cb21513f702c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -O0 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -O2 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s define @spill_lmul_mf2( %va) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll index 07ac76efd8693..2aa051522fa9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O0 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+m -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O2 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+m -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s define @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll index 67c5fccd71b68..081743b31b701 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.riscv.vsetvli.i32(i32, i32, i32) declare i32 @llvm.riscv.vsetvlimax.i32(i32, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll index 46c263b6c4fad..c2e44e7e4936f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O0 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll index a3254901bc6d9..74defdfe62b00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O0 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s define @spill_lmul_1( %va) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll index 8388c4160bef4..a7e83ed72453d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O0 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+m -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+m -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s define @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll index a37cb4d33e708..2b745cb5eddaa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64) declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll index af0e5a07862fe..35f7411a921ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s define @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %w, %x, %y, %z) { ; CHECK-LABEL: bar: diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll index 03d95ed99db99..858ce54610cf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s define void @rvv_vla(i64 %n, i64 %i) nounwind { ; CHECK-LABEL: rvv_vla: diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll index df561fe6c7fde..8b75cc3981a63 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s define dso_local void @lots_args(i32 signext %x0, i32 signext %x1, %v0, i32 signext %x2, i32 signext %x3, i32 signext %x4, i32 signext %x5, i32 signext %x6, i32 %x7, i32 %x8, i32 %x9) #0 { diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll index 32f05623149ef..838cd82156875 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \ +; RUN: llc -mtriple riscv32 -mattr=+m,+v < %s \ ; RUN: | FileCheck %s define i32 @vscale_zero() nounwind { diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll index d233e14ba7672..27d55aed5ad2e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v < %s \ +; RUN: llc -mtriple riscv64 -mattr=+m,+v < %s \ ; RUN: | FileCheck %s -check-prefix=RV64 -; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \ +; RUN: llc -mtriple riscv32 -mattr=+m,+v < %s \ ; RUN: | FileCheck %s -check-prefix=RV32 diff --git a/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll index e645593810308..1455703483acb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare { , } @llvm.sadd.with.overflow.nxv2i32(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll index 79d638b46a269..0796a10de07db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @select_nxv1f16(i1 zeroext %c, %a, %b) { diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll index 9ad7d41483987..a0945e96ca5fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 define @select_nxv1i1(i1 zeroext %c, %a, %b) { diff --git a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll index f1c4bddb26cd7..c7d5846f97034 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 ; This test checks a regression in the select-to-sra transform, which was ; asserting (without a precondition) when the vector constants implicitly diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll index daf5743c269cc..77e9baf86f37b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; FIXME: The scalar/vector operations ('fv' tests) should swap operands and diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll index 6adc6db413344..348a7cd661c07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; FIXME: The scalar/vector operations ('fv' tests) should swap operands and diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll index e3ff3ed0e0437..374df4eceedd7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s define @icmp_eq_vv_nxv8i8( %va, %vb) { ; CHECK-LABEL: icmp_eq_vv_nxv8i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll index 803ecdda84a7e..36d96ecec950e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s define @icmp_eq_vv_nxv8i8( %va, %vb) { ; CHECK-LABEL: icmp_eq_vv_nxv8i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index f6a348418f5b2..e902e1cd577a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-v,+f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f \ ; RUN: -riscv-v-vector-bits-min=128 | FileCheck %s define void @sink_splat_mul(i32* nocapture %a, i32 signext %x) { diff --git a/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll index f79480992fc32..0ea491c788f76 100644 --- a/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare { , } @llvm.smul.with.overflow.nxv1i8(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll index 9f4d075d63452..4dcf779e2e499 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64 declare @llvm.experimental.stepvector.nxv1i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir index afe2bc56c776a..4b13dfe2a78f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -mattr=experimental-v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s +# RUN: llc %s -mtriple=riscv64 -mattr=v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s # This test makes sure we peak through the COPY instruction between the # IMPLICIT_DEF and PseudoVLE64_V_M8_MASK in order to select the tail agnostic @@ -21,8 +21,8 @@ ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn declare @llvm.masked.load.nxv8i64.p0nxv8i64(*, i32 immarg, , ) #1 - attributes #0 = { nounwind "target-features"="+experimental-v" } - attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+experimental-v" } + attributes #0 = { nounwind "target-features"="+v" } + attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+v" } ... --- diff --git a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll index ddd9ffbd02de6..8344124edb02e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare { , } @llvm.umul.with.overflow.nxv1i8(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll index 72fa5f41de282..a8059db69be8b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+experimental-v < %s \ +; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+v < %s \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+experimental-v < %s \ +; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+v < %s \ ; RUN: -verify-machineinstrs | FileCheck %s define @unaligned_load_nxv1i32_a1(* %ptr) { diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll b/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll index 1796cbcde8755..9dfd10b712769 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; Test that we can remove trivially-undef VP operations of various kinds. diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll index 9defe1460df39..f6d5f71344565 100644 --- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV64 define @test_urem_vec_even_divisor_eq0( %x) nounwind { ; RV32-LABEL: test_urem_vec_even_divisor_eq0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll index 516476877b712..13eb36dd1a251 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vaadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll index d1724cf22079e..4d692eb089c3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vaadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll index 210abf8809624..4e796ebd4a32b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vaaddu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll index 8b97aa55c09b3..3f9ed340dede9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vaaddu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll index d46ec9cda5ac8..94c1f4dd52cc9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vadc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll index 1206136e5575a..40c4fe51c1e9b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vadc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll index 2a4bc62aaa3c3..57db5175df548 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: --riscv-no-aliases < %s | FileCheck %s declare @llvm.riscv.vadd.nxv8i8.nxv8i8( diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll index a10c0c7d38598..4e7b12753aeb2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll index 35f8d0c44bd21..a56bf1bf97e6e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll index 3ad55f6fbc0fa..e6a4da4552874 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vadd_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vadd_vx_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 4206edadadb85..1190f85852cd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.add.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll index e3239bd8b2c31..89b9dbcb57c0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vand.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll index feb5bc829a1aa..39a1d9bc4a081 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vand.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll index bea39f9740ca9..3175d4765f771 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vand_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vand_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll index b9782bb4b60db..49c64bb688509 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.and.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll index 24cbd43726c49..d1cfc9704c1e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vasub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll index 4668e2a096caf..6ad70daafb25d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vasub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll index aa5895ffff549..4707aff206d82 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vasubu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll index 552ecf1720614..74d4690a8c8db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vasubu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll index d8862c9c8e99d..5a55268171f23 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vcompress.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll index c03b3d1ee28b6..d77f0da267e55 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vcompress.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll index d21064962c665..fe5bba14eb6d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare i32 @llvm.riscv.vcpop.i32.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll index b42d679b52b6e..8d583b8df634a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare i64 @llvm.riscv.vcpop.i64.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll index b8bbb3d6024d2..e062069311956 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vdiv.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll index 102bce0efb78b..01f1100045567 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vdiv.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll index 6adaa2476659e..16fe495b981cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vdiv_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vdiv_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll index 97160b8e93457..3a1b0981b0d95 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.sdiv.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll index e80d871c2485f..b86b902962ea5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vdivu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll index 9af9d2ad6e419..7efea7cfe3fbf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vdivu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll index 5c3bcd40f4cf5..5bf80bbc34859 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vdivu_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vdivu_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll index 97e45f2f4e6ec..2f56348950f14 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.udiv.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll index 74e9553d0916f..e3d0654b60820 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vsext_nxv1i8_nxv1i16( %va) { ; CHECK-LABEL: vsext_nxv1i8_nxv1i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll index 0a607d3af630a..e5b1e8dcd2cb1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.fabs.nxv1f16() diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll index 3a34f6ba27fdf..cfdeba067718a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll index 03ff955623617..8e43295bf7ee3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll index 3fc160103aa39..002fcf15e1d76 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfadd_vv_nxv1f16( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 019a72b02be72..f5ab4df3992e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fadd.nxv1f16(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll index 35cb662fa6750..ae9df2aefa4d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfclass.nxv1i16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll index 7ccfcb8ca7e3b..c86e1f334712a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfclass.nxv1i16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll index 6e3802b6d3d9e..4c5fc2f32d7ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.copysign.nxv1f16(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll index f55c9908a4539..5549960bb7736 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll index 93977cf6c7f06..65270dc06336d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll index 22c09b22d71be..1c8c2a80c90db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll index 914926eb1a6bb..6fc87d15dac18 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll index d439b9d909fdc..75c0a7ff62a47 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll index 2d181fbecc41e..9c3e2a03f1141 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll index 98f9e330d446d..966a5d6f85a0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll index e52f1548e5963..3a309eea35dbd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll index cdc40c9c45968..26632717dfa9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll index 50bfe8b7ecc06..f5984a512e0fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll index 4316de95a1ceb..e76b0db05446e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll index 801d7b0d03f19..8fb44d2d1ecf0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll index 3f5ee058c566d..2f2bc40a0e6fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfdiv.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll index 97bc6adfb8c14..dc6760900bafb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfdiv.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll index a0293b624eefc..92d84e7b6f070 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfdiv_vv_nxv1f16( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 4e6c840077401..f785903d06786 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fdiv.nxv1f16(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll index dab1544783435..6d6d3a8c74616 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare i32 @llvm.riscv.vfirst.i32.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll index 005d023207bfe..0e20516a69c79 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare i64 @llvm.riscv.vfirst.i64.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll index b3a99448173d7..207c7ecf5a3db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll index f77030c1dac0b..f636fa36eb7c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll index de7d9ed40414b..dd12115fbb95b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll index 632fdf2f3f877..ae39ef8dd3260 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll index 46735fddc56e5..0c1a79804e167 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfmacc and vfmadd by using different operand orders to diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll index 197b564d058b2..d6a880c743dd2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll index c11b123fff6ff..aea557735e932 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll index 57f6f0f2d9381..ffa6c42ee795d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.maxnum.nxv1f16(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll index ffa4c3bceac7d..c1d91e208aaa1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll index 62187750e4525..cf4ad020e4cf1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll index 2b935c0b1744c..2b2d954a7cc1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll index e0af688748318..3f1ffd3e81a64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll index 41f4fe4fa7b37..86d5b76a7c6af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.minnum.nxv1f16(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll index 519ae7e204da9..ff51e6dab20a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll index ce757f11d02d6..08666a5dd51c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll index f5534895fe6f6..8a10800c748ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll index 2b6e35cc92161..eb0733b6e5cc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll index 9e3cfa87072f5..dd4fb4a321530 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfmsac and vfmsub by using different operand orders to diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll index e0757776acb22..9cab6de9e0b46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmul.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll index a584ad412124b..f260499700e6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmul.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll index 3ad4bf8210069..4aad6cd2c7c2a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfmul_vv_nxv1f16( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll index 2bff7a44c5213..6223c8d823b27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fmul.nxv1f16(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll index 9f09ff5f8ca1f..56fe5c3891097 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s declare half @llvm.riscv.vfmv.f.s.nxv1f16() diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll index 5c07fb600b662..4d47c000788fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s declare @llvm.riscv.vfmv.s.f.nxv1f16(, half, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll index db83f2df724ef..ee619309b9e3c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s declare @llvm.riscv.vfmv.s.f.nxv1f16(, half, i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll index eee0699137ad3..b4acc57dcd81e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -target-abi ilp32d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -target-abi ilp32d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmv.v.f.nxv1f16( half, diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll index 6172df66388a1..1c4d9f6e89dc4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -target-abi lp64d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -target-abi lp64d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfmv.v.f.nxv1f16( half, diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll index c79f6e0a9f8de..014ff81bada19 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll index 9eaff968ae960..2e35f75eb89c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll index 1f74edd54038c..ab5f66bf692fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll index 5c8f94ac66ac0..d8d55df8e4589 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll index 1ac2a36048763..4835d4e5c5916 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll index b8928d0a568b2..32a23932b074b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll index 351fea0637eba..b464fdde6db25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll index af53b02a8a666..4020c1d5d1a34 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll index b4fb1c2dd06e1..227210e6f2f01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll index 6bdd5208ea831..ad695704aae0a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll index 4a26e7d89b8b7..4bfe331db7e00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll index c875cbd31995f..f7f873dd05155 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll index c521dc30681e9..6c97455f1992e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll index 547f307332cba..d78d695f3df20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll index 0b960a127ae82..4981f8b16d741 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll index 7817455e14d68..c7bb913f3797f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll index 14318b8602f33..46014911449b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfneg_vv_nxv1f16( %va) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll index 704cd9ee01060..956ac22ede348 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll index 3a98ab5ace053..6d302b7f49d5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll index b4b0f17ba2f67..9d5d70498ab3c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll index 1f2813400c554..13c9484ad907f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll index c15245db3a167..e83a13b4221af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfnmacc and vfnmadd by using different operand orders to diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll index 357a871d2af7f..385b3397ab28b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll index 3f881110a0d8b..2a7a29a742f92 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll index 559a243b3d5c6..35e687c4fcbc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll index 7ddf5f79d5b25..977163f1383f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll index 0c8ab026dd978..d8cbbf1518cc2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfnmsac and vfnmsub by using different operand orders to diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll index ce9636fd7272a..b98eadbde1e5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 define @vfpext_nxv1f16_nxv1f32( %va) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll index 8c78a870f1d3e..6884ebc92565c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfptosi_nxv1f16_nxv1i1( %va) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll index f308f1a292c8c..0fb253dfe1861 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 define @vfptrunc_nxv1f32_nxv1f16( %va) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll index 6bd205a022e74..b117a120b04d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrdiv.nxv1f16.f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll index 5fa7b453e583b..6cc0b53443a8f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrdiv.nxv1f16.f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll index 56e4c91313dcd..3006acf443206 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fdiv.nxv1f16(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll index 9dfbde0d35f5a..30897b95deea2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrec7.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll index c0f6302b31e47..4e0fe7b9fc62f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrec7.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll index 10ea4493d5653..25ed3f1ab3676 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredmax.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll index 7c862c31193cf..bb173e91ecf31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredmax.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll index 21b4d8c636e7f..9561be7b6fc09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredmin.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll index b31eeb2d99c93..d04ef7a6707de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredmin.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll index 252b6c13765c6..1f1e68e0dbc9b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredosum.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll index 411c99111b474..8c42e43d9094d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredosum.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll index e16a55c6a6942..7b3691c74887c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredusum.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll index 896dfbabffd85..9264397afa0e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfredusum.nxv4f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll index c423723e44fc6..d0198a85b0c52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrsqrt7.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll index 190aa57cee66d..cf9d7a4c0af5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrsqrt7.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll index bd8e12488526f..8bbf0aac2b339 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrsub.nxv1f16.f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll index 77929c60d2614..85b7148c3c830 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfrsub.nxv1f16.f16( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll index 5a8f76f864077..4f6a1612fb4f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fsub.nxv1f16(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll index e6a08cc076b7d..a1dcc1fa26bdc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsgnj.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll index 9943b5dcf3cca..8ff4b4ba03e2a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsgnj.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll index 6e54a35a8ae2e..ac708fa2d9918 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll index 9272119d955ea..9ff02249165bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsgnjn.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll index 29b38b6449cb2..8f4fcaa73c42c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll index 92b17a254f012..77ebd259c6896 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsgnjx.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll index 3ca377023594a..a4833408acfff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfslide1down.nxv1f16.f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll index 9a2adbc6e0a79..84572f5dec0d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfslide1down.nxv1f16.f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll index 9517a4a255af8..0e5b566812011 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfslide1up.nxv1f16.f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll index 2c5ed6e5871df..37afacc6c7f27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfslide1up.nxv1f16.f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll index a9f7415c15c6c..3b86fd763f3c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsqrt.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll index 2e95f82bf9eab..c810a516f3b35 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsqrt.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll index 5acac37ac2c70..4b5a337414942 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.sqrt.nxv1f16() diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll index fd64fb3da6944..99e45214deea0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll index 15c008f28a424..c972ed358bf9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll index 7ac9df394bf2a..e127406aacf3a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfsub_vv_nxv1f16( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index ca52d476e247d..3f5dbc7384ece 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fsub.nxv1f16(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll index 20aa40dda0ee5..02ce6a1c85f90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll index 4f829606d14d3..135b56dff1b47 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll index 486d51182b65c..e7c93d765770a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfwadd_vv_nxv1f64( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll index 2c64eb26d074e..fde2d9f89d045 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll index bea142a2ba9a5..9235c3f251d41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll index 21d990ed987fc..460488388b5ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll index 4637218cbd3b8..e050090b7761c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll index e6dbecbe229c8..467b64c20fedd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll index 262a0f8788661..e294cbe085f73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll index f95130cd1f5e7..fc0af066c39ec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll index f0016e980c317..107813b7879bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll index c70fb1df935e9..d6cf1b3563105 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll index 317ac39ede925..4d551f62ec52f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll index 31cda39c700b9..c419e08471ca1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll index 971ed34a6895b..6b881aba8f652 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll index 1d2a542bb51d2..b8d88e1c64e55 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll index b433cd9b66a70..fd01c64df0d36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll index 54ace8228ee78..c2b0a222709e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll index 4bcc8a93ad976..dc461d60b0be9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll index d3485e58e3a76..ffcfd186c8e10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll index 6ec1b3fe99dc6..bd2dac9493022 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll index 9b66cfaf15258..8035ec77c03ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll index a4877537cbb06..01d2ce92c916c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll index ed3b540b2c69d..8e40ddc009af6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll index 6d36dcfb20caf..9b5a7b0641806 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll index ba29aacf595f3..bb36d96bd9668 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfwmul_vv_nxv1f64( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll index af468ffdc095a..8a5db70f5baf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll index fd0f6a2272444..0b21a7aa9b395 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll index 0bde93d0497c4..554234bb68a61 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll index c8e8f80302a82..44f2e66113939 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll index 73780360e9551..37240159bd908 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwredosum.nxv2f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll index 837b04fd29501..2282bd5fbc860 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwredosum.nxv2f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll index 175d073fc9442..fe56d0c6bd0d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll index 77d901fa04dfe..52bde877eb7d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll index 01c65629b4f5c..0d7c2e7fe0867 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll index fac3479c106c6..cdc77ffc55f8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll index 8e2ef3f76256f..a8b81fe8feb23 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfwsub_vv_nxv1f64( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll index 762ee60fb6eca..3234dec9db144 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll index 3127e5c5af694..545bc4e4a53de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll index 4884ccebad49f..58ed5cfb7620d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vid.nxv1i8( i32); diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll index 28b5809d5851a..1413c0ec4b443 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vid.nxv1i8( i64); diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll index bf6b235eeab40..6b3838cd5898c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.viota.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll index 8f795bc056007..0e37ff483fb4f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.viota.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll index b0c77864f4fc2..7145cb55d8f84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vsitofp_nxv1i1_nxv1f16( %va) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll index 4b0843094e8bc..51521f31726da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vle.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll index ba2ebb676129d..78e005648d746 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vle.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll index f016f0e23bead..e5ae807e6ff22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare { , i32 } @llvm.riscv.vleff.nxv1i64( *, diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll index 381e4c5c59b42..d588205f18702 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare { , i64 } @llvm.riscv.vleff.nxv1i64( *, diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll index b7933dd4c9b93..72cac45129035 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vlm.nxv1i1(*, i32); diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll index 2a969a7ae445a..21b3f7b0b0ad2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vlm.nxv1i1(*, i64); diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll index d987fe0f7437f..e3275457d2be5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vloxei.nxv1i8.nxv1i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll index 99132b8762347..138916c8bcb16 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vloxei.nxv1i8.nxv1i64( *, diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll index a2f1d1f190784..f702d9f5620bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vloxseg2.nxv16i16.nxv16i16(i16*, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll index ec06679f5ba2e..ce63a313cfec5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vloxseg2.nxv16i16.nxv16i16(i16*, , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll index b3ecb80d7a7ad..8b68a7f94cf94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vlse.nxv1i64( *, diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll index 32b5acb681818..62fa095e3a14b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vlse.nxv1i64( *, diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll index 03ba5f763340d..21ae83e985d05 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vlseg2.nxv16i16(i16* , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll index c385e412bc966..25a3d43b68061 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vlseg2.nxv16i16(i16* , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll index 1825c90057a53..28729e179ab40 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,, i32} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll index 71ff17ff0fb84..86da8a6ab1652 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,, i32} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll index 1aa3aaf579ada..af7e678205a5e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,, i64} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll index 4b66af34e776c..db03fbed879f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,, i64} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll index 422e64e23622e..069113cd15d58 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vlsseg2.nxv16i16(i16*, i32, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll index 69dd8a988602f..96f1f34c28c24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vlsseg2.nxv16i16(i16*, i64, i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll index 9637e0f327fd8..64d73a94e0042 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vluxei.nxv1i8.nxv1i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll index 168c7b2874cdf..efec1362cb9f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vluxei.nxv1i8.nxv1i64( *, diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll index 4232d1c2d11c8..54bf9d2a9ab56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vluxseg2.nxv16i16.nxv16i16(i16*, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll index 7094d305c8bd8..e14587c1d9b3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare {,} @llvm.riscv.vluxseg2.nxv16i16.nxv16i16(i16*, , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll index 0ecbc734de624..3aeaa11d2ff94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmacc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll index 9ce992e1b8487..cfb3a432b92f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmacc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll index b9d8ac8e3841c..e64a836313d3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmadc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll index 4df8881059e31..3739ad3adf406 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmadc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll index b178d910fa984..a924d04c902f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmadc.carry.in.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll index 0cafb81d85b48..9fdaf473312a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmadc.carry.in.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll index 8deb23a234760..fa46e6e8932a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll index b62e0ad82e7d6..fc401f3cba819 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll index b8b5c38605ffa..727cf1503b6c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; This tests a mix of vmacc and vmadd by using different operand orders to diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll index 16c1398f29115..4d0a65d0f892b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmand.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll index 835c30b50d36f..12107a960f87d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmand.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll index 8743551c89cd6..90d6cba3592d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmandn.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll index 3cbd68f5e39ba..5ad6aa2ee4d8f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmandn.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll index bc7699e5a902c..e12c10ae36daa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vmand_vv_nxv1i1( %va, %vb) { ; CHECK-LABEL: vmand_vv_nxv1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll index adcefbbc1088b..1d51eb6549cff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmax.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll index b05f95b256225..da40c60ff8a96 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmax.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll index 3cdda837dacce..c12259cb3de44 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vmax_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vmax_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll index 65287ee1ca82f..af080efa3358b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmaxu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll index a952e8b36bff8..bc63daea21f69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmaxu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll index 772ede13f50a0..26629320b90c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vmax_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vmax_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll index 886bfebb80f06..49fb55c901b2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmclr.nxv1i1( i32); diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll index 2468d7b1c2c1f..50516db35a4fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmclr.nxv1i1( i64); diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll index 970045f09e781..25ca3b0631c08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmerge.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll index 23140c523eafc..2360985310ec8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmerge.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll index 232294d3496fa..553e1b6930f65 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfeq.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll index ea61a95f3d96f..6d4c472a5295d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfeq.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll index be8799de1f7f2..e3afd15c6df74 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfge.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll index 2d6c6a8bc4fd4..ea8e2fcf19af9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfge.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll index 942de0218179f..a8246870ac786 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfgt.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll index 18e1136778e09..457ea76560ddd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfgt.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll index 128cdf6ce59f6..803eb1ceaab13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfle.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll index fc59c864bdcd7..d28d8d73b1960 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfle.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll index 09577ac427d13..9485c178df504 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmflt.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll index a05b50120896e..b55f491944323 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmflt.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll index 7a15fa7ba66e1..30c8be8e6fe0c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfne.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll index ad572467abdb6..e0d9c231516b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmfne.nxv1f16( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll index 1a08e35eca440..ebc304b18f760 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmin.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll index 46726121b21e7..febe87403cdda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmin.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll index 15d2a2e0c0ae7..bb3a4b149a714 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vmin_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vmin_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll index 8add74b9114e1..ec3cc011bd6e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vminu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll index 02040135fde8a..f3b9100c38a62 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vminu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll index e289491af5677..2ee5b370f07a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vmin_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vmin_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll index 60144b7239719..958d38f0023f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmnand.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll index 8c95c93e3f008..b4397d512e34b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmnand.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll index c772ab9562f8a..ca5bcf5463fa5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmnor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll index a14d0e7ffd1b9..5beaeadcdcab2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmnor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll index 0f43644d2a241..117d152f7248c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll index 689c1400b07ec..81fa40663fc1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll index 15fc0c3c33706..8509603acd2f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmorn.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll index d0358ead0012f..89de2b78e94f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmorn.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll index 1ca8c1f0fe2bd..fb05c89fd11b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsbc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll index 2e3b6bd2d49a4..02e1a4541a30b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsbc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll index 142c0eeb303e1..f9b46796d6423 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsbc.borrow.in.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll index 9d9ec65c9f4af..3b4e2619937ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsbc.borrow.in.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll index d79b3d2d1c03e..b8f27529534da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsbf.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll index 4d8246d187af9..bf773b7c45502 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsbf.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll index c93e1aa867e1e..6c9536a0d65fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmseq.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll index ee35bfd4003fe..300deebbbbf3a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmseq.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll index 5b8a4f1102d6d..68a6719c76580 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmset.nxv1i1( i32); diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll index 22c0f7813dfad..f34157dfd3797 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmset.nxv1i1( i64); diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll index 29a2aaf6f192d..2924ca51b4255 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsge.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll index 5062948a6c839..cf14df3631b9b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsge.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll index fc9043d633dd2..02750e7e4fff6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsgeu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll index 638140176d7f8..c6c2daf8ed9c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsgeu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll index f11aabd622479..604f2775277f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsgt.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll index 7299b3d2132f5..3082564243bb0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsgt.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll index f7ccc30165644..209b714eb7948 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsgtu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll index e75132618467e..510fc0664d6bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsgtu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll index abf13933c87d0..888b6ebbbc3f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsif.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll index 5a0b49f27a469..0f776c83e0129 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsif.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll index d072e50d12ee5..c5810dacd0e2e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsle.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll index 88e3ca1efb501..ec74926c8087b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsle.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll index cca51cae50ffa..f85b2d16b7cf3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsleu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll index b2b859448d59f..74a9516c0ea04 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsleu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll index c0b0faa8b4d9b..2b70fb0a5bff7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmslt.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll index 3afdc6193c11f..711ac89528aba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmslt.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll index 9652a76bdb821..d20313188376f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsltu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll index 82491fd181e19..456a37a3f71bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsltu.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll index 1865ea6e47850..534b895d00978 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsne.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll index cd705033921f5..06a51aa27f2a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsne.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll index a8f41ad67800f..b5db2a4b284ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsof.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll index 588c95315d2c8..8fba91102f814 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmsof.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll index a0ec4a11c625d..cf0c1da69743c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmul.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll index b24dcf139260e..adf746657527d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmul.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll index 8ecad6004b9bb..9ebda9dd30eb7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vmul_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vmul_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll index ecf33e3c2ca20..488fc89203a3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.mul.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll index 2a7789fd61f10..51e254f5fb352 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmulh.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll index 4fd892986d871..9242b6220237d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmulh.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll index 076f726e78359..a65642d05f6ec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; Test that the prepareSREMEqFold optimization doesn't crash on scalable ; vector types. diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll index fe7c0b23c4788..068a53f994fe1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmulhsu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll index 09c8be1ee63f6..328c9098fb4d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmulhsu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll index 67327a9f8914a..8c04d78a90602 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmulhu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll index ea7091333b387..48ce2b21aacb6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmulhu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll index 54cb5bfaeef5f..08b0d35a9a7b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vmulhu_vv_nxv1i32( %va, %vb) { ; CHECK-LABEL: vmulhu_vv_nxv1i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll index fa001c38ef796..44dbe3c095c3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare @llvm.riscv.vmv.s.x.nxv1i8(, i8, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll index 45d607f2a15dd..f324d4a45e474 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare @llvm.riscv.vmv.s.x.nxv1i8(, i8, i64); diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll index af3dfa4cb4987..d4cebd0ab615e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmv.v.v.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll index aa4ab67be8285..7038f655e5bfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmv.v.v.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll index dab05270b0039..320fa626e7aad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmv.v.x.nxv1i8( i8, diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll index 399e8b53a932c..6bfc5ace93717 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmv.v.x.nxv1i8( i8, diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll index 190cb0ba1c4de..86a687f65c808 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare i8 @llvm.riscv.vmv.x.s.nxv1i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll index b6aff9d0ebb97..5ac41b3558f9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare i8 @llvm.riscv.vmv.x.s.nxv1i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll index d923de1200851..82759bc9ea0b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmxnor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll index 48425887bf69b..eb31e34b9c4be 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmxnor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll index 2eb3bc6d3de47..15cb88f17599a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmxor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll index 2058bdab69b3d..0e8d66b558315 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vmxor.nxv1i1( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll index 5301ad154fe3c..3dfadcc3f6c46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll index 9c47b5f66f4f3..dc0f3f879e370 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll index ce705c2d686f6..1fb5c651ebe0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll index 00ef63edd25d2..0fd55a75c5cf5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll index 5ba99c50dc74f..7c1c3e6fcc4f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnmsac.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll index 79862351375b7..f9ba1e3016eab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnmsac.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll index 372ba28bd459b..3cf95ae313257 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnmsub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll index b5383a9f7325e..6519e590aed38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnmsub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll index 075c0a35f05b7..3e2ff603401a6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; This tests a mix of vmacc and vmsub by using different operand orders to diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll index 2cd8e67c7cf22..123dfbe13d77a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll index 51849e008e94c..b6ad65065adbd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll index 88d790c99b778..9336bb1c6dc1f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define @vnsra_wv_nxv1i32_sext( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll index 20cfb1ee1e552..065203a3fa0d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll index e11eec3c83f88..8f8a89a85e821 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll index fe243983f4ef2..26f32edfc8b7d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define @vnsrl_wv_nxv1i32_sext( %va, %vb) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll index 9514f246eb7eb..ab10d1ce9c3ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vor.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll index a7b031a79ba0d..d30bf628ad030 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vor.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll index 4a3768c0fd5dc..bef35df66dc62 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vor_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vor_vx_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll index 469d0489c5d83..50023f8ccefad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.or.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index 2291a9acd4bf4..dc38a17e10a25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare @llvm.vp.gather.nxv1i8.nxv1p0i8(, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index e909ffe6a025a..ade29d29364a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.load.nxv1i8.p0nxv1i8(*, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 92706f7c162f5..62a7244de1b3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare void @llvm.vp.scatter.nxv1i8.nxv1p0i8(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 60d9d6c4faedc..2e5cb42e80ba2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.nxv1i8.p0nxv1i8(, *, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll index e07c51aa26883..322cce33df0a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredand.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll index bd6fd729dda6d..0f92380beb5c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredand.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll index fb64bd0df1243..8bb16465d9fa0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredmax.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll index 8b3c8374a0da0..f02fdcf277b38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredmax.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll index 4d2aeae714e75..833ba0bb5723d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredmaxu.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll index 2415dbb83ad43..df869a03f1a04 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredmaxu.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll index 129f80531fe1f..e7e147bf8f39f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredmin.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll index fa0976a5353ee..cdaa99581731d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredmin.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll index bbfcd210e7986..8231eaa631fe7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredminu.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll index 7a97c6550b4c0..3082c309719e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredminu.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll index f453673238152..61663a336d818 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredor.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll index d8e6be8c8e4e1..355e4ee240bbb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredor.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll index 50c730f16b3d7..73de97e1a1f02 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredsum.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll index 37aa891f4d6a2..691a521757664 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredsum.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index ee6eeb9275760..bcd803fe6902f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare half @llvm.vector.reduce.fadd.nxv1f16(half, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 6cd3478cae769..916863d14e6f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare half @llvm.vp.reduce.fadd.nxv1f16(half, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll index 6ef7129bea8e9..b9e60a32cb926 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare i8 @llvm.vector.reduce.add.nxv1i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll index f1bc225da2fec..04e8ddf72cc40 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare i8 @llvm.vector.reduce.add.nxv1i8() diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll index aa725dbca9533..9f57f9770753d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare i8 @llvm.vp.reduce.add.nxv1i8(i8, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll index bd4a0b038724b..e204b1be0e359 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s declare i1 @llvm.vp.reduce.and.nxv1i1(i1, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll index f499922b24a33..9b51b2534dfe2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s declare i1 @llvm.vector.reduce.or.nxv1i1() diff --git a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll index 4acd94eb2d872..4959f922ddeb4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredxor.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll index d96536c30cafd..717dd8e51e62a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vredxor.nxv8i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll index 2a17d9f5fbc80..e9a24bcee7d69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrem.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll index 7e4400f7623cd..100b79bde6258 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrem.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll index c58c3025050ee..f6e47b8272b41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vrem_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vrem_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll index 5b299da083174..5cb0cd623caf3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.srem.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll index dbc2c55e3eb81..6b3cf9df77ca6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vremu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll index 4f88f35b269fb..9b5c03b51f0b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vremu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll index 792ed45d6ab66..e67f25b6b5c7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vremu_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vremu_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll index dededca7bb720..ccc9af287965d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.urem.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll index 239e70e49cefb..75e05a90a06cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgather.vv.nxv1i8.i32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll index dc3f02c2501a8..a15e7ba69e385 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgather.vv.nxv1i8.i64( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll index 3c8e8035e6cb3..720d6d8585fdd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgatherei16.vv.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll index 4a656ec40fe22..9592630c19781 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgatherei16.vv.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll index 54ac860a6cc41..bd00557309afd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrsub.nxv1i8.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll index cead8fe1784fe..fe930ea9dcce7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrsub.nxv1i8.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll index 352b8b4d27adb..824c34a632a39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vrsub_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vrsub_vx_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll index 99bf38d64c92b..f3b360261a226 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.sub.nxv1i8(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll index ee57455925000..968fbf120aa3c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll index c360fc5a4d9b4..b1cf6b22b0f97 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsadd.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll index ab07fd7d20bf1..14728e92d7b76 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.sadd.sat.nxv1i8(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll index c743ac9bbceec..c57a590eb3888 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsaddu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll index 9b683baf1590f..991d230237757 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsaddu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll index 8eec3839a37c6..2ca5a68b41d77 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.uadd.sat.nxv1i8(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll index c15ad9f2f3a1b..2c5ef91cf9c20 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsbc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll index 0140009a5a5b4..555c74c918551 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsbc.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll index c42261a048558..c01318e733036 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vse.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll index 6d5ef4155c6c3..7be33d5a8050d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vse.nxv1i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll index 4f766ebfb441b..16ede807a4143 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfmerge_vv_nxv1f16( %va, %vb, %cond) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll index cbc714e15a4b1..3d1106a5ec94c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfmerge_vv_nxv1f16( %va, %vb, %cond) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll index 7df7112bb43be..3d8a951ee52d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vmerge_vv_nxv1i8( %va, %vb, %cond) { ; CHECK-LABEL: vmerge_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll index 5c79b20c439b0..0a3467c7a05d3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vmerge_vv_nxv1i8( %va, %vb, %cond) { ; CHECK-LABEL: vmerge_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll index bc8e96ec31258..f8227fb538254 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vselect_nxv1i1( %a, %b, %cc) { ; CHECK-LABEL: vselect_nxv1i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index ddb97fc7d40c8..bdd180c0e3a9e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+experimental-v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+experimental-v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.select.nxv1i1(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll index 632ca9a968ea4..64b73fc19c549 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-v | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s declare i64 @llvm.riscv.vsetvli( i64, i64, i64); diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index e506a623b6fe5..d6ea430c36ca6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ ; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s ; The following tests check whether inserting VSETVLI avoids inserting diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir index f3f5e5034a824..4429e0db4ecd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \ +# RUN: llc %s -o - -mtriple=riscv64 -mattr=v \ # RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s --- | @@ -116,7 +116,7 @@ ; Function Attrs: nounwind readnone declare @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(, i64) #1 - attributes #0 = { "target-features"="+experimental-v" } + attributes #0 = { "target-features"="+v" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } attributes #3 = { nounwind readonly } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 39a533ceedc6b..66878967ccb95 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ ; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s declare i64 @llvm.riscv.vsetvli(i64, i64, i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index 719a4844152a7..e5c85d1908c1d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \ +# RUN: llc %s -o - -mtriple=riscv64 -mattr=v \ # RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s --- | @@ -84,7 +84,7 @@ ; Function Attrs: nounwind readnone declare @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(, i64) #1 - attributes #0 = { "target-features"="+experimental-v" } + attributes #0 = { "target-features"="+v" } attributes #1 = { nounwind readnone } attributes #2 = { nofree nosync nounwind readnone willreturn } attributes #3 = { nounwind } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll index 3c8e5d9c0d5b0..c49c69b180409 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-v | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s ; This test checks a regression in the vsetvli insertion pass. We used to ; prserve the VL on the second vsetvli with ratio e32/m1, when the the last diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll index fae6c4a486e3a..72e3958f7e30e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsext.nxv1i64.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll index 74bca169184e3..e63fff73f538f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsext.nxv1i64.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll index 096944c6a219b..f4c63a2b83c08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vshl_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vshl_vx_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll index cdbeab6a1f707..c7d0732df226f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.shl.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll index 9674d9d711c9a..a9a9ea8019b0e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslide1down.nxv1i8.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll index 677c0925c5079..2110f21bf13d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslide1down.nxv1i8.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll index b48f1814a5411..c5b0bb3c07a29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslide1up.nxv1i8.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll index 8be9b2e7657a6..81c16b9c315cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslide1up.nxv1i8.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll index b78473e5e8b1c..4cd2fd28752ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslidedown.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll index 694dea11dd8c6..863c955753152 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslidedown.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll index 9d0f5de309977..3cdb4afac9692 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslideup.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll index 3e3bc67f4d303..a9a166c1056e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslideup.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll index 437dcf6e3b322..3f555dba39c5f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsll.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll index b63d8e31db694..8aa798d3b9e67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsll.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll index 2736ac8d2cc6e..3285cdbfe2741 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsm.nxv1i1(, *, i32); diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll index 2547cf3386f18..7fd84ccf35efa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsm.nxv1i1(, *, i64); diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll index d5ed31dbf536e..be3a442cc61b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsmul.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll index 4018735e756b0..3a5eb3c18f2b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsmul.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll index 3c589746ae044..2100af514fa3c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll index c352ab4c96df5..a3f6637cfde47 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i64( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll index a1d1170caf31c..9556358394554 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vsoxseg2.nxv16i16.nxv16i16(,, i16*, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll index 053a3b011e9a6..289825be3546d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vsoxseg2.nxv16i16.nxv16i16(,, i16*, , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll index 07dcb97573d12..d6a9d7dcb213d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-v -target-abi ilp32d -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+v -target-abi ilp32d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV32V -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-v -target-abi lp64d -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+v -target-abi lp64d -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64V define @vsplat_nxv8f16(half %f) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll index 29073eb2c8fd5..c98500a501e54 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vsplat_nxv1i1_0() { ; CHECK-LABEL: vsplat_nxv1i1_0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll index 724ffb2f65f29..14947011213f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV32V -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64V define @vsplat_nxv8i64_1() { diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll index 72803a5218646..78eac5fcc9ec7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsra.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll index a0c463ea0fe93..4841ca02e382b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsra.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll index 3c6cf9e089c7b..312f1fde4936d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vsra_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vsra_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll index 676c1631c2ad9..323368986a1a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.ashr.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll index 25932f4e185fd..8ae5ec19ccda0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsrl.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll index e23dec5652f1d..054d81a54db17 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsrl.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll index 2bf34ea07b987..d38da572389ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vsrl_vx_nxv1i8( %va, i8 signext %b) { ; CHECK-LABEL: vsrl_vx_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll index 663a1aaef6f11..1361891667d32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.lshr.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll index d26ab423eab4c..d8441bcd89cbc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsse.nxv1i64( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll index f3b6bb8095067..31aebfadf3daa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsse.nxv1i64( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll index 9e556fc69e874..130e84e003ad8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vsseg2.nxv16i16(,, i16* , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll index a58979e050471..779848c14865e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vsseg2.nxv16i16(,, i16* , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll index 8a22fd3f01806..9cb0c4803370c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssra.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll index 04cd8f2bddfad..b39bdb4bdda64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssra.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll index 6d0f202499b10..173f2f863d55b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssrl.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll index f8fd647088592..3bc2f68ebb94a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssrl.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll index 5749bea3ae4b8..34d4482e382da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vssseg2.nxv16i16(,, i16*, i32, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll index dd35ac4c4c7d0..c90d4140221cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vssseg2.nxv16i16(,, i16*, i64, i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll index 7360929828013..c695923f56a06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll index 02fc13ce15e81..bff302a14b36b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll index bf8c0fd77304f..0ac720d1ca43f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.ssub.sat.nxv1i8(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll index 7f2debfec993b..2b0fcf54963c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssubu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll index 923a1d32e74af..0169d78b4a798 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssubu.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll index 20be3f2b29c94..341ec7ae580f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.usub.sat.nxv1i8(, ) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll index a49968a40720d..a9896a305516f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll index ad0280ddb48b9..4840fa6030363 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsub.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll index f9fe816659300..4744b8e2c33d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vsub_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vsub_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll index d4818c7514049..9af879ef4fde1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.sub.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll index e6c209074e923..bcfa09f17f8d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i32( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll index 8f5798e624865..614471ef46429 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i64( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll index 13e4d9781b77a..f2cfbe5837b05 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vsuxseg2.nxv16i16.nxv16i16(,, i16*, , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll index 6f3c5d384a979..fbe64219a8878 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.riscv.vsuxseg2.nxv16i16.nxv16i16(,, i16*, , i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll index 628fd4554885a..9a4b78a009e34 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vtrunc_nxv1i16_nxv1i8( %va) { ; CHECK-LABEL: vtrunc_nxv1i16_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll index f57bf32b789cd..72f559952e294 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll index 7ec1fa4e4a950..348d2b32b65c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index 54dbaf8a52d35..a0e609c832f75 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vwadd_vv_nxv1i64( %va, %vb) { ; CHECK-LABEL: vwadd_vv_nxv1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll index 6f711bc8b348d..6fab280f8f9b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s declare @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( , , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll index 441432c5e74d9..65481d304ddf4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s declare @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( , , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll index e42ccf233604b..f37d6ed898679 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll index f9ad3eb2a24ab..6c546a504b017 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll index 0ffeabaa38290..1128135e33ab4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll index 044737e1e3007..e3840e33f81a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll index 6f5aafc378350..a68c26ba9180c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmacc.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll index 261386ba8a87c..b3922ccb1ce43 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmacc.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll index fdc1bd7364120..5cb8ce11c317b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK define @vwmacc_vv_nxv1i32( %va, %vb, %vc) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll index 5014a750305b8..3b3ed3ce683ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll index 250df8d77149c..07f11b1ef1615 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll index a8dc1c6087b8e..4d7057a8f81a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmaccu.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll index 66f2d7fc9fc65..623e00c03aadb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmaccu.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll index 171e0d17fccc2..6a3aabba1be27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmaccus.nxv1i16.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll index 40e78df76bf19..6786754645fdc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmaccus.nxv1i16.i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll index 47ecba522f2fe..338e4b352ddf3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll index 05bdcf70f5480..da5accc0f1bd4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll index 0d91f668b792d..4f480ebea8726 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll index 4ac17a317522a..da7d9b97bc0ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll index 9f46f19b99386..11ac17450a4cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll index 99a0d2183133a..e9adaec0bdc8c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll index 3b849d9cbbe9f..13e130b7821af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwredsum.nxv4i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll index 09dc8c97aac88..1ea1418cbad06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwredsum.nxv4i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll index c3ad4998d6492..f6cde08be1175 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwredsumu.nxv4i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll index 62232b11f8c4c..c1776201926cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwredsumu.nxv4i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll index 82b59dc788f5a..ef27184103d60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll index 9aac53ae44b5c..2de365ecb1218 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll index 36f9305504aa8..a2fef3b541d56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s define @vwsub_vv_nxv1i64( %va, %vb) { ; CHECK-LABEL: vwsub_vv_nxv1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll index 42c6411fa4d65..213cb69a9a6a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll index 1257f5a546307..6e8aff5e81593 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll index acee77f9bbd01..603f685d4c7e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll index 65a7c23e6225c..3c4cc0283cc0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll index 7a4ac1c97e4be..1406f14de4422 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll index 4de4be003f58a..d504b680f2a6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll index 1af276b68441b..8da7f7acb2e62 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vxor.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll index f7f5819bad2d9..08c47cd6c17fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vxor.nxv1i8.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll index 5afb940d9d075..be0f5d96ce3a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define @vxor_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vxor_vv_nxv1i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll index 975c4e926fbd1..800da27d71f0e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.vp.xor.nxv8i7(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll index 4e826daa382be..d84527c161c97 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vzext.nxv1i64.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll index f48d607d984e0..dbd58c5650a2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vzext.nxv1i64.nxv1i8( , diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir index 08a3df1f0b44b..21fef81e60d9e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -# RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -o - %s \ +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -o - %s \ # RUN: -start-before=prologepilog | FileCheck %s # # This test checks that we are assigning the right stack slot to GPRs and to diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir index ce0497adaa39f..994857ba81cb3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -# RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -o - %s \ +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -o - %s \ # RUN: -start-before=prologepilog | FileCheck %s # # This test checks that we are assigning the right stack slot to GPRs and to diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir index 2bc83469896cc..66b320a42b02e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple riscv64 -mattr=+experimental-v -verify-machineinstrs -run-pass=postrapseudos %s -o - | FileCheck %s +# RUN: llc -mtriple riscv64 -mattr=+v -verify-machineinstrs -run-pass=postrapseudos %s -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir index 09b8eaccdab0c..8ed9afd836c9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s +# RUN: llc -march=riscv64 -mattr=+v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll index a96de294b270d..0420f8bf77240 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve32x \ +; RUN: llc -mtriple=riscv64 -mattr=+zve32x \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; Make sure we don't select a 0 vl to X0 in the custom isel handlers we use diff --git a/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll b/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll index ca71df0713cb1..030555ca76c1f 100644 --- a/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll +++ b/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s ; This demonstrates that we can pass a struct containing scalable vectors across ; a basic block. diff --git a/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll index 4c3f58432c134..fcf6a8c2b0e0a 100644 --- a/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll +++ b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 2b7f579c9b2ce..d103ca03cbf45 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -3,8 +3,8 @@ ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64 ; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M ; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV define i1 @test_srem_odd(i29 %X) nounwind { ; RV32-LABEL: test_srem_odd: diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index e0de325fd5359..c7e93bea08b9b 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -3,8 +3,8 @@ ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64 ; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M ; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M -; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV -; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV define i1 @test_urem_odd(i13 %X) nounwind { ; RV32-LABEL: test_urem_odd: diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll index 1b81beb113ecd..e161e5bb92e1a 100644 --- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=RV64 ; This test would lead one of the DAGCombiner's visitVSELECT optimizations to diff --git a/llvm/test/MC/RISCV/attribute-arch-invalid.s b/llvm/test/MC/RISCV/attribute-arch-invalid.s index 1dd5621d128ba..d1dc71d4982f3 100644 --- a/llvm/test/MC/RISCV/attribute-arch-invalid.s +++ b/llvm/test/MC/RISCV/attribute-arch-invalid.s @@ -5,9 +5,6 @@ ## Version strings are required for experimental extensions -.attribute arch, "rv32iv" -# CHECK: error: invalid arch name 'rv32iv', experimental extension requires explicit version number `v` - .attribute arch, "rv32izbe" # CHECK: error: invalid arch name 'rv32izbe', experimental extension requires explicit version number `zbe` @@ -25,6 +22,3 @@ .attribute arch, "rv32izbt" # CHECK: error: invalid arch name 'rv32izbt', experimental extension requires explicit version number `zbt` - -.attribute arch, "rv32iv" -# CHECK: error: invalid arch name 'rv32iv', experimental extension requires explicit version number `v` diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 65d4008a5869e..15ee933dcdd1d 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -33,11 +33,62 @@ .attribute arch, "rv32ima2p0_fdc" # CHECK: attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0" -## Experimental extensions require version string to be explicitly specified +.attribute arch, "rv32iv" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -.attribute arch, "rv32iv1p0" +.attribute arch, "rv32ivzvl32b" # CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +.attribute arch, "rv32ivzvl64b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" + +.attribute arch, "rv32ivzvl128b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" + +.attribute arch, "rv32ivzvl256b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0" + +.attribute arch, "rv32ivzvl512b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" + +.attribute arch, "rv32ivzvl1024b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" + +.attribute arch, "rv32ivzvl2048b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" + +.attribute arch, "rv32ivzvl4096b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0" + +.attribute arch, "rv32ivzvl8192b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" + +.attribute arch, "rv32ivzvl16384b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" + +.attribute arch, "rv32ivzvl32768b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" + +.attribute arch, "rv32ivzvl65536b" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0" + +.attribute arch, "rv32izve32x" +# CHECK: attribute 5, "rv32i2p0_zve32x1p0_zvl32b1p0" + +.attribute arch, "rv32ifzve32f" +# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zvl32b1p0" + +.attribute arch, "rv32izve64x" +# CHECK: attribute 5, "rv32i2p0_zve32x1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" + +.attribute arch, "rv32ifzve64f" +# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" + +.attribute arch, "rv32ifdzve64d" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" + +## Experimental extensions require version string to be explicitly specified + .attribute arch, "rv32izba1p0" # CHECK: attribute 5, "rv32i2p0_zba1p0" @@ -74,60 +125,6 @@ .attribute arch, "rv32ifzfh1p0" # CHECK: attribute 5, "rv32i2p0_f2p0_zfh1p0_zfhmin1p0" -.attribute arch, "rv32iv1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl32b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl64b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl128b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl256b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl512b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl1024b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl2048b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl4096b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0" - -.attribute arch, "rv32iv1p0zvl8192b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" - -.attribute arch, "rv32iv1p0zvl16384b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" - -.attribute arch, "rv32iv1p0zvl32768b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" - -.attribute arch, "rv32iv1p0zvl65536b1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0" - -.attribute arch, "rv32i_zve32x1p0" -# CHECK: attribute 5, "rv32i2p0_zve32x1p0_zvl32b1p0" - -.attribute arch, "rv32if_zve32f1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zvl32b1p0" - -.attribute arch, "rv32i_zve64x1p0" -# CHECK: attribute 5, "rv32i2p0_zve32x1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" - -.attribute arch, "rv32if_zve64f1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" - -.attribute arch, "rv32ifd_zve64d1p0" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0" - .attribute arch, "rv32i_zbkb1p0" # CHECK: attribute 5, "rv32i2p0_zbkb1p0" diff --git a/llvm/test/MC/RISCV/rvv/add.s b/llvm/test/MC/RISCV/rvv/add.s index 18143ff0d738c..17e84b00cce3d 100644 --- a/llvm/test/MC/RISCV/rvv/add.s +++ b/llvm/test/MC/RISCV/rvv/add.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vadd.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/aliases.s b/llvm/test/MC/RISCV/rvv/aliases.s index 3bf55fa405fe3..0dadeb1b1db19 100644 --- a/llvm/test/MC/RISCV/rvv/aliases.s +++ b/llvm/test/MC/RISCV/rvv/aliases.s @@ -1,6 +1,6 @@ -# RUN: llvm-mc --triple=riscv64 -mattr +experimental-v < %s --show-encoding 2>&1 \ +# RUN: llvm-mc --triple=riscv64 -mattr +v < %s --show-encoding 2>&1 \ # RUN: -mattr +d | FileCheck --check-prefix=ALIAS %s -# RUN: llvm-mc --triple=riscv64 -mattr=+experimental-v --riscv-no-aliases < %s \ +# RUN: llvm-mc --triple=riscv64 -mattr=+v --riscv-no-aliases < %s \ # RUN: -mattr +d --show-encoding 2>&1 | FileCheck --check-prefix=NO-ALIAS %s # ALIAS: vwcvt.x.x.v v2, v1, v0.t # encoding: [0x57,0x61,0x10,0xc4] diff --git a/llvm/test/MC/RISCV/rvv/and.s b/llvm/test/MC/RISCV/rvv/and.s index e098ff1f2f8f3..c6f3565042662 100644 --- a/llvm/test/MC/RISCV/rvv/and.s +++ b/llvm/test/MC/RISCV/rvv/and.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vand.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/clip.s b/llvm/test/MC/RISCV/rvv/clip.s index 04cbe5eab49a2..5ec62dafae08c 100644 --- a/llvm/test/MC/RISCV/rvv/clip.s +++ b/llvm/test/MC/RISCV/rvv/clip.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vnclipu.wv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/compare.s b/llvm/test/MC/RISCV/rvv/compare.s index 6258c46d05ea1..89f19c5af2550 100644 --- a/llvm/test/MC/RISCV/rvv/compare.s +++ b/llvm/test/MC/RISCV/rvv/compare.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vmslt.vv v0, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/convert.s b/llvm/test/MC/RISCV/rvv/convert.s index 3c45e79acf155..0a2d113b238fd 100644 --- a/llvm/test/MC/RISCV/rvv/convert.s +++ b/llvm/test/MC/RISCV/rvv/convert.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/div.s b/llvm/test/MC/RISCV/rvv/div.s index 47d88daace2b7..6aed20f03d24c 100644 --- a/llvm/test/MC/RISCV/rvv/div.s +++ b/llvm/test/MC/RISCV/rvv/div.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vdivu.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/ext.s b/llvm/test/MC/RISCV/rvv/ext.s index e095a1c3edb2c..c0cd7341fef2d 100644 --- a/llvm/test/MC/RISCV/rvv/ext.s +++ b/llvm/test/MC/RISCV/rvv/ext.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vzext.vf2 v8, v4, v0.t diff --git a/llvm/test/MC/RISCV/rvv/fadd.s b/llvm/test/MC/RISCV/rvv/fadd.s index fbb8850768ca3..acab959b3ade3 100644 --- a/llvm/test/MC/RISCV/rvv/fadd.s +++ b/llvm/test/MC/RISCV/rvv/fadd.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fcompare.s b/llvm/test/MC/RISCV/rvv/fcompare.s index e1c89969bfc56..5716736041c78 100644 --- a/llvm/test/MC/RISCV/rvv/fcompare.s +++ b/llvm/test/MC/RISCV/rvv/fcompare.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fdiv.s b/llvm/test/MC/RISCV/rvv/fdiv.s index ea14ef03e054b..a23e4820179f9 100644 --- a/llvm/test/MC/RISCV/rvv/fdiv.s +++ b/llvm/test/MC/RISCV/rvv/fdiv.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fmacc.s b/llvm/test/MC/RISCV/rvv/fmacc.s index 032b79ee6ce5c..b59da7a72525f 100644 --- a/llvm/test/MC/RISCV/rvv/fmacc.s +++ b/llvm/test/MC/RISCV/rvv/fmacc.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fminmax.s b/llvm/test/MC/RISCV/rvv/fminmax.s index 6734b2a14a7a7..c7fe5c4926db8 100644 --- a/llvm/test/MC/RISCV/rvv/fminmax.s +++ b/llvm/test/MC/RISCV/rvv/fminmax.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fmul.s b/llvm/test/MC/RISCV/rvv/fmul.s index 68d9610f67f39..4907fa0b25d52 100644 --- a/llvm/test/MC/RISCV/rvv/fmul.s +++ b/llvm/test/MC/RISCV/rvv/fmul.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fmv.s b/llvm/test/MC/RISCV/rvv/fmv.s index a2d915a71a365..f8a1e3e7111b6 100644 --- a/llvm/test/MC/RISCV/rvv/fmv.s +++ b/llvm/test/MC/RISCV/rvv/fmv.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fothers.s b/llvm/test/MC/RISCV/rvv/fothers.s index abed79070f867..214d03030cca2 100644 --- a/llvm/test/MC/RISCV/rvv/fothers.s +++ b/llvm/test/MC/RISCV/rvv/fothers.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f --riscv-no-aliases \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: --mattr=+f | llvm-objdump -d --mattr=+experimental-v --mattr=+f -M no-aliases - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: --mattr=+f | llvm-objdump -d --mattr=+v --mattr=+f -M no-aliases - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vfsqrt.v v8, v4, v0.t diff --git a/llvm/test/MC/RISCV/rvv/freduction.s b/llvm/test/MC/RISCV/rvv/freduction.s index f093fef0a2e3f..09e074743df26 100644 --- a/llvm/test/MC/RISCV/rvv/freduction.s +++ b/llvm/test/MC/RISCV/rvv/freduction.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f --riscv-no-aliases \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f \ # RUN: -M no-aliases - | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/fsub.s b/llvm/test/MC/RISCV/rvv/fsub.s index 82f4babb4d7e9..dca6d8d7d65ba 100644 --- a/llvm/test/MC/RISCV/rvv/fsub.s +++ b/llvm/test/MC/RISCV/rvv/fsub.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/invalid-eew.s b/llvm/test/MC/RISCV/rvv/invalid-eew.s index 65894831006e8..df336ca3b3e23 100644 --- a/llvm/test/MC/RISCV/rvv/invalid-eew.s +++ b/llvm/test/MC/RISCV/rvv/invalid-eew.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple=riscv32 --mattr=+experimental-zve32x %s 2>&1 \ +# RUN: not llvm-mc -triple=riscv32 --mattr=+zve32x %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR vluxei64.v v8, (a0), v4, v0.t diff --git a/llvm/test/MC/RISCV/rvv/invalid.s b/llvm/test/MC/RISCV/rvv/invalid.s index 29da69fabaa92..41f870963ef8d 100644 --- a/llvm/test/MC/RISCV/rvv/invalid.s +++ b/llvm/test/MC/RISCV/rvv/invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple=riscv64 --mattr=+experimental-v --mattr=+f %s 2>&1 \ +# RUN: not llvm-mc -triple=riscv64 --mattr=+v --mattr=+f %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR vsetivli a2, 32, e8,m1 diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s index e0991eec87874..689396b55c8a2 100644 --- a/llvm/test/MC/RISCV/rvv/load.s +++ b/llvm/test/MC/RISCV/rvv/load.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --riscv-no-aliases | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v -M no-aliases - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vlm.v v0, (a0) diff --git a/llvm/test/MC/RISCV/rvv/macc.s b/llvm/test/MC/RISCV/rvv/macc.s index e7cd4e6502aad..6cc2219220902 100644 --- a/llvm/test/MC/RISCV/rvv/macc.s +++ b/llvm/test/MC/RISCV/rvv/macc.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vmacc.vv v8, v20, v4, v0.t diff --git a/llvm/test/MC/RISCV/rvv/mask.s b/llvm/test/MC/RISCV/rvv/mask.s index 797679154bb37..8f4b4ef64b2e9 100644 --- a/llvm/test/MC/RISCV/rvv/mask.s +++ b/llvm/test/MC/RISCV/rvv/mask.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vmand.mm v8, v4, v20 diff --git a/llvm/test/MC/RISCV/rvv/minmax.s b/llvm/test/MC/RISCV/rvv/minmax.s index 9829fce067e1f..dce30d189c6c7 100644 --- a/llvm/test/MC/RISCV/rvv/minmax.s +++ b/llvm/test/MC/RISCV/rvv/minmax.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vminu.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/mul.s b/llvm/test/MC/RISCV/rvv/mul.s index 9ee35101d2dad..b5294867de248 100644 --- a/llvm/test/MC/RISCV/rvv/mul.s +++ b/llvm/test/MC/RISCV/rvv/mul.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vmul.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/mv.s b/llvm/test/MC/RISCV/rvv/mv.s index 05f7e1df7ae2a..62e1c7119033e 100644 --- a/llvm/test/MC/RISCV/rvv/mv.s +++ b/llvm/test/MC/RISCV/rvv/mv.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vmv.v.v v8, v20 diff --git a/llvm/test/MC/RISCV/rvv/or.s b/llvm/test/MC/RISCV/rvv/or.s index ffcf0c0ad39fe..891c313bd90d0 100644 --- a/llvm/test/MC/RISCV/rvv/or.s +++ b/llvm/test/MC/RISCV/rvv/or.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vor.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/others.s b/llvm/test/MC/RISCV/rvv/others.s index bbd797299f366..06c62a700bc2d 100644 --- a/llvm/test/MC/RISCV/rvv/others.s +++ b/llvm/test/MC/RISCV/rvv/others.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --riscv-no-aliases | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v -M no-aliases - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vmerge.vvm v8, v4, v20, v0 diff --git a/llvm/test/MC/RISCV/rvv/reduction.s b/llvm/test/MC/RISCV/rvv/reduction.s index 1c4ccac75a026..bacb7e6274504 100644 --- a/llvm/test/MC/RISCV/rvv/reduction.s +++ b/llvm/test/MC/RISCV/rvv/reduction.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vredsum.vs v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/shift.s b/llvm/test/MC/RISCV/rvv/shift.s index 64126367f469b..9b5b77e99ed1a 100644 --- a/llvm/test/MC/RISCV/rvv/shift.s +++ b/llvm/test/MC/RISCV/rvv/shift.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vsll.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/sign-injection.s b/llvm/test/MC/RISCV/rvv/sign-injection.s index 0b3df8c21d06d..9607d5936d42b 100644 --- a/llvm/test/MC/RISCV/rvv/sign-injection.s +++ b/llvm/test/MC/RISCV/rvv/sign-injection.s @@ -1,13 +1,13 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ -# RUN: | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \ +# RUN: | llvm-objdump -d --mattr=+v --mattr=+f - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: --mattr=+f \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN diff --git a/llvm/test/MC/RISCV/rvv/snippet.s b/llvm/test/MC/RISCV/rvv/snippet.s index a5c19b0f18b5a..0f43f8b6e8897 100644 --- a/llvm/test/MC/RISCV/rvv/snippet.s +++ b/llvm/test/MC/RISCV/rvv/snippet.s @@ -1,7 +1,7 @@ ## A snippet from https://github.com/riscv/riscv-v-spec. -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v < %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v < %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST loop: diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s index 2e502d3fbec88..aa8c9e2e38d41 100644 --- a/llvm/test/MC/RISCV/rvv/store.s +++ b/llvm/test/MC/RISCV/rvv/store.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --riscv-no-aliases | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v -M no-aliases - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vsm.v v24, (a0) diff --git a/llvm/test/MC/RISCV/rvv/sub.s b/llvm/test/MC/RISCV/rvv/sub.s index 67f10b2f16298..a1d4a5563fded 100644 --- a/llvm/test/MC/RISCV/rvv/sub.s +++ b/llvm/test/MC/RISCV/rvv/sub.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vsub.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s b/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s index d0b401da98b37..50ed09ba9629d 100644 --- a/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s +++ b/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s @@ -1,7 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple=riscv32 %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - | FileCheck %s +# RUN: | llvm-objdump -d --mattr=+v - | FileCheck %s # RUN: llvm-mc -filetype=obj -triple=riscv64 %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - | FileCheck %s +# RUN: | llvm-objdump -d --mattr=+v - | FileCheck %s # CHECK: vsetvli a1, a0, e64, m1, tu, mu .word 0x018575d7 diff --git a/llvm/test/MC/RISCV/rvv/vsetvl.s b/llvm/test/MC/RISCV/rvv/vsetvl.s index 853b12285e725..0d0233f148ba2 100644 --- a/llvm/test/MC/RISCV/rvv/vsetvl.s +++ b/llvm/test/MC/RISCV/rvv/vsetvl.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN # reserved filed: vlmul[2:0]=4, vsew[2:0]=0b1xx, non-zero bits 8/9/10. diff --git a/llvm/test/MC/RISCV/rvv/xor.s b/llvm/test/MC/RISCV/rvv/xor.s index b08fb1b662335..436d4505d7e99 100644 --- a/llvm/test/MC/RISCV/rvv/xor.s +++ b/llvm/test/MC/RISCV/rvv/xor.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vxor.vv v8, v4, v20, v0.t diff --git a/llvm/test/MC/RISCV/rvv/zvlsseg.s b/llvm/test/MC/RISCV/rvv/zvlsseg.s index 6845839fcaa33..608646164a2c1 100644 --- a/llvm/test/MC/RISCV/rvv/zvlsseg.s +++ b/llvm/test/MC/RISCV/rvv/zvlsseg.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \ # RUN: --riscv-no-aliases \ # RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ -# RUN: | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ +# RUN: | llvm-objdump -d --mattr=+v -M no-aliases - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST -# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN vlseg2e8.v v8, (a0), v0.t diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 802b251d59ed0..05c270c5f7744 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-vectorize -mtriple=riscv32 -mattr=+experimental-v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV32 -; RUN: opt < %s -loop-vectorize -mtriple=riscv64 -mattr=+experimental-v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV64 +; RUN: opt < %s -loop-vectorize -mtriple=riscv32 -mattr=+v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV32 +; RUN: opt < %s -loop-vectorize -mtriple=riscv64 -mattr=+v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV64 ; The source code: ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll index 4d6508b683770..fec0b66127039 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -1,18 +1,18 @@ ; REQUIRES: asserts ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=1 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1 ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2 ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=4 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4 ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+experimental-v,+d -debug-only=loop-vectorize \ +; RUN: -mattr=+v,+d -debug-only=loop-vectorize \ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=8 \ ; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll index 0eaa879fb8ff4..325983fb6036a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; RUN: opt -loop-vectorize -dce -instcombine -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+experimental-v -debug-only=loop-vectorize \ +; RUN: -mattr=+v -debug-only=loop-vectorize \ ; RUN: -riscv-v-vector-bits-min=128 -S < %s 2>&1 | FileCheck %s ; CHECK-LABEL: foo diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll index 49007beb5714a..e26638adc3ff6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1 -; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1 -; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2 -; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2 +; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1 +; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1 +; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2 +; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2 ; Function Attrs: nounwind define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll index 75ebccc46a74b..2a529cc653d6f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll @@ -2,7 +2,7 @@ ; RUN: -riscv-v-vector-bits-min=128 -riscv-v-vector-bits-max=128 \ ; RUN: -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize \ ; RUN: -pass-remarks-missed=loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -mattr=+experimental-v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK +; RUN: -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK ; Reduction can be vectorized diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll index 57c612e813897..0c32391a93ad3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \ +; RUN: opt -mtriple=riscv64 -mattr=+m,+v -loop-vectorize \ ; RUN: -riscv-v-vector-bits-max=512 -S -scalable-vectorization=on < %s 2>&1 \ ; RUN: | FileCheck %s diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll index 3349d4554491f..6bd40976e71cd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=riscv64 -mattr=+experimental-v -loop-vectorize < %s | FileCheck %s +; RUN: opt -S -mtriple=riscv64 -mattr=+v -loop-vectorize < %s | FileCheck %s ; Make sure we don't unroll scalar loops in the loop vectorizer. ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll index b40562f8f5768..4711e590a0004 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+experimental-v \ +; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+v \ ; RUN: -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefixes=CHECK,CHECK-128 -; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+experimental-v \ +; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+v \ ; RUN: -riscv-v-vector-bits-min=256 -S | FileCheck %s --check-prefixes=CHECK,CHECK-256 -; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+experimental-v \ +; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+v \ ; RUN: -riscv-v-vector-bits-min=512 -S | FileCheck %s --check-prefixes=CHECK,CHECK-512 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" From 85e42db1b6db16a2dab4405604971d84899612bf Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 22 Jan 2022 12:10:57 -0800 Subject: [PATCH 269/946] [RISCV] Merge some rvv intrinsic test cases that only differ by XLen type. Instead of having a test for i32 XLen and i64 XLen, use sed to replace iXLen with i32/i64 before running llc. This change updates tests for intrinsics that operate exclusively on mask values. It removes over 4000 lines worth of test content. More merging will come in future changes. Differential Revision: https://reviews.llvm.org/D117968 --- llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll | 282 ------ llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll | 282 ------ llvm/test/CodeGen/RISCV/rvv/vcpop.ll | 284 ++++++ llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll | 282 ------ llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll | 282 ------ llvm/test/CodeGen/RISCV/rvv/vfirst.ll | 284 ++++++ llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll | 758 --------------- .../CodeGen/RISCV/rvv/{vid-rv32.ll => vid.ll} | 258 ++--- llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll | 882 ------------------ .../RISCV/rvv/{viota-rv32.ll => viota.ll} | 270 +++--- llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll | 94 -- .../CodeGen/RISCV/rvv/{vlm-rv32.ll => vlm.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll | 142 --- .../RISCV/rvv/{vmand-rv32.ll => vmand.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll | 142 --- .../RISCV/rvv/{vmandn-rv32.ll => vmandn.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll | 114 --- .../RISCV/rvv/{vmclr-rv32.ll => vmclr.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll | 142 --- .../RISCV/rvv/{vmnand-rv32.ll => vmnand.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll | 142 --- .../RISCV/rvv/{vmnor-rv32.ll => vmnor.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll | 142 --- .../RISCV/rvv/{vmor-rv32.ll => vmor.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll | 142 --- .../RISCV/rvv/{vmorn-rv32.ll => vmorn.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll | 114 --- .../RISCV/rvv/{vmset-rv32.ll => vmset.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll | 296 ------ .../RISCV/rvv/{vmsif-rv32.ll => vmsif.ll} | 90 +- llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll | 296 ------ .../RISCV/rvv/{vmsof-rv32.ll => vmsof.ll} | 90 +- llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll | 142 --- .../RISCV/rvv/{vmxnor-rv32.ll => vmxnor.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll | 142 --- .../RISCV/rvv/{vmxor-rv32.ll => vmxor.ll} | 48 +- llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll | 137 --- .../CodeGen/RISCV/rvv/{vsm-rv32.ll => vsm.ll} | 64 +- 38 files changed, 1234 insertions(+), 5589 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vcpop.ll delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfirst.ll delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vid-rv32.ll => vid.ll} (81%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{viota-rv32.ll => viota.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vlm-rv32.ll => vlm.ll} (80%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmand-rv32.ll => vmand.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmandn-rv32.ll => vmandn.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmclr-rv32.ll => vmclr.ll} (72%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmnand-rv32.ll => vmnand.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmnor-rv32.ll => vmnor.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmor-rv32.ll => vmor.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmorn-rv32.ll => vmorn.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmset-rv32.ll => vmset.ll} (72%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmsif-rv32.ll => vmsif.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmsof-rv32.ll => vmsof.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmxnor-rv32.ll => vmxnor.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vmxor-rv32.ll => vmxor.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vsm-rv32.ll => vsm.ll} (79%) diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll deleted file mode 100644 index fe5bba14eb6d2..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll +++ /dev/null @@ -1,282 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare i32 @llvm.riscv.vcpop.i32.nxv1i1( - , - i32); - -define i32 @intrinsic_vcpop_m_i32_nxv1i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.i32.nxv1i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.mask.i32.nxv1i1( - , - , - i32); - -define i32 @intrinsic_vcpop_mask_m_i32_nxv1i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv1i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.i32.nxv2i1( - , - i32); - -define i32 @intrinsic_vcpop_m_i32_nxv2i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.i32.nxv2i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.mask.i32.nxv2i1( - , - , - i32); - -define i32 @intrinsic_vcpop_mask_m_i32_nxv2i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv2i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.i32.nxv4i1( - , - i32); - -define i32 @intrinsic_vcpop_m_i32_nxv4i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.i32.nxv4i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.mask.i32.nxv4i1( - , - , - i32); - -define i32 @intrinsic_vcpop_mask_m_i32_nxv4i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv4i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.i32.nxv8i1( - , - i32); - -define i32 @intrinsic_vcpop_m_i32_nxv8i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.i32.nxv8i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.mask.i32.nxv8i1( - , - , - i32); - -define i32 @intrinsic_vcpop_mask_m_i32_nxv8i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv8i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.i32.nxv16i1( - , - i32); - -define i32 @intrinsic_vcpop_m_i32_nxv16i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.i32.nxv16i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.mask.i32.nxv16i1( - , - , - i32); - -define i32 @intrinsic_vcpop_mask_m_i32_nxv16i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv16i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.i32.nxv32i1( - , - i32); - -define i32 @intrinsic_vcpop_m_i32_nxv32i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.i32.nxv32i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.mask.i32.nxv32i1( - , - , - i32); - -define i32 @intrinsic_vcpop_mask_m_i32_nxv32i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv32i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.i32.nxv64i1( - , - i32); - -define i32 @intrinsic_vcpop_m_i32_nxv64i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.i32.nxv64i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vcpop.mask.i32.nxv64i1( - , - , - i32); - -define i32 @intrinsic_vcpop_mask_m_i32_nxv64i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv64i1( - %0, - %1, - i32 %2) - - ret i32 %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll deleted file mode 100644 index 8d583b8df634a..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll +++ /dev/null @@ -1,282 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare i64 @llvm.riscv.vcpop.i64.nxv1i1( - , - i64); - -define i64 @intrinsic_vcpop_m_i64_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.i64.nxv1i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.mask.i64.nxv1i1( - , - , - i64); - -define i64 @intrinsic_vcpop_mask_m_i64_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv1i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.i64.nxv2i1( - , - i64); - -define i64 @intrinsic_vcpop_m_i64_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.i64.nxv2i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.mask.i64.nxv2i1( - , - , - i64); - -define i64 @intrinsic_vcpop_mask_m_i64_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv2i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.i64.nxv4i1( - , - i64); - -define i64 @intrinsic_vcpop_m_i64_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.i64.nxv4i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.mask.i64.nxv4i1( - , - , - i64); - -define i64 @intrinsic_vcpop_mask_m_i64_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv4i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.i64.nxv8i1( - , - i64); - -define i64 @intrinsic_vcpop_m_i64_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.i64.nxv8i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.mask.i64.nxv8i1( - , - , - i64); - -define i64 @intrinsic_vcpop_mask_m_i64_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv8i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.i64.nxv16i1( - , - i64); - -define i64 @intrinsic_vcpop_m_i64_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.i64.nxv16i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.mask.i64.nxv16i1( - , - , - i64); - -define i64 @intrinsic_vcpop_mask_m_i64_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv16i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.i64.nxv32i1( - , - i64); - -define i64 @intrinsic_vcpop_m_i64_nxv32i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.i64.nxv32i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.mask.i64.nxv32i1( - , - , - i64); - -define i64 @intrinsic_vcpop_mask_m_i64_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv32i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.i64.nxv64i1( - , - i64); - -define i64 @intrinsic_vcpop_m_i64_nxv64i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.i64.nxv64i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vcpop.mask.i64.nxv64i1( - , - , - i64); - -define i64 @intrinsic_vcpop_mask_m_i64_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vcpop.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv64i1( - %0, - %1, - i64 %2) - - ret i64 %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll new file mode 100644 index 0000000000000..1b77ec8dd82e0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll @@ -0,0 +1,284 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +declare iXLen @llvm.riscv.vcpop.iXLen.nxv1i1( + , + iXLen); + +define iXLen @intrinsic_vcpop_m_nxv1i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv1i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv1i1( + , + , + iXLen); + +define iXLen @intrinsic_vcpop_mask_m_nxv1i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv1i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.iXLen.nxv2i1( + , + iXLen); + +define iXLen @intrinsic_vcpop_m_nxv2i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv2i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv2i1( + , + , + iXLen); + +define iXLen @intrinsic_vcpop_mask_m_nxv2i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv2i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.iXLen.nxv4i1( + , + iXLen); + +define iXLen @intrinsic_vcpop_m_nxv4i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv4i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv4i1( + , + , + iXLen); + +define iXLen @intrinsic_vcpop_mask_m_nxv4i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv4i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.iXLen.nxv8i1( + , + iXLen); + +define iXLen @intrinsic_vcpop_m_nxv8i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv8i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv8i1( + , + , + iXLen); + +define iXLen @intrinsic_vcpop_mask_m_nxv8i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv8i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.iXLen.nxv16i1( + , + iXLen); + +define iXLen @intrinsic_vcpop_m_nxv16i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv16i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv16i1( + , + , + iXLen); + +define iXLen @intrinsic_vcpop_mask_m_nxv16i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv16i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.iXLen.nxv32i1( + , + iXLen); + +define iXLen @intrinsic_vcpop_m_nxv32i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv32i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv32i1( + , + , + iXLen); + +define iXLen @intrinsic_vcpop_mask_m_nxv32i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv32i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.iXLen.nxv64i1( + , + iXLen); + +define iXLen @intrinsic_vcpop_m_nxv64i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv64i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv64i1( + , + , + iXLen); + +define iXLen @intrinsic_vcpop_mask_m_nxv64i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vcpop.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv64i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll deleted file mode 100644 index 6d6d3a8c74616..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll +++ /dev/null @@ -1,282 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare i32 @llvm.riscv.vfirst.i32.nxv1i1( - , - i32); - -define i32 @intrinsic_vfirst_m_i32_nxv1i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.i32.nxv1i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.mask.i32.nxv1i1( - , - , - i32); - -define i32 @intrinsic_vfirst_mask_m_i32_nxv1i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv1i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.i32.nxv2i1( - , - i32); - -define i32 @intrinsic_vfirst_m_i32_nxv2i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.i32.nxv2i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.mask.i32.nxv2i1( - , - , - i32); - -define i32 @intrinsic_vfirst_mask_m_i32_nxv2i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv2i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.i32.nxv4i1( - , - i32); - -define i32 @intrinsic_vfirst_m_i32_nxv4i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.i32.nxv4i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.mask.i32.nxv4i1( - , - , - i32); - -define i32 @intrinsic_vfirst_mask_m_i32_nxv4i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv4i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.i32.nxv8i1( - , - i32); - -define i32 @intrinsic_vfirst_m_i32_nxv8i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.i32.nxv8i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.mask.i32.nxv8i1( - , - , - i32); - -define i32 @intrinsic_vfirst_mask_m_i32_nxv8i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv8i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.i32.nxv16i1( - , - i32); - -define i32 @intrinsic_vfirst_m_i32_nxv16i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.i32.nxv16i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.mask.i32.nxv16i1( - , - , - i32); - -define i32 @intrinsic_vfirst_mask_m_i32_nxv16i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv16i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.i32.nxv32i1( - , - i32); - -define i32 @intrinsic_vfirst_m_i32_nxv32i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.i32.nxv32i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.mask.i32.nxv32i1( - , - , - i32); - -define i32 @intrinsic_vfirst_mask_m_i32_nxv32i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv32i1( - %0, - %1, - i32 %2) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.i32.nxv64i1( - , - i32); - -define i32 @intrinsic_vfirst_m_i32_nxv64i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.i32.nxv64i1( - %0, - i32 %1) - - ret i32 %a -} - -declare i32 @llvm.riscv.vfirst.mask.i32.nxv64i1( - , - , - i32); - -define i32 @intrinsic_vfirst_mask_m_i32_nxv64i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv64i1( - %0, - %1, - i32 %2) - - ret i32 %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll deleted file mode 100644 index 0e20516a69c79..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll +++ /dev/null @@ -1,282 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare i64 @llvm.riscv.vfirst.i64.nxv1i1( - , - i64); - -define i64 @intrinsic_vfirst_m_i64_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.i64.nxv1i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.mask.i64.nxv1i1( - , - , - i64); - -define i64 @intrinsic_vfirst_mask_m_i64_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv1i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.i64.nxv2i1( - , - i64); - -define i64 @intrinsic_vfirst_m_i64_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.i64.nxv2i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.mask.i64.nxv2i1( - , - , - i64); - -define i64 @intrinsic_vfirst_mask_m_i64_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv2i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.i64.nxv4i1( - , - i64); - -define i64 @intrinsic_vfirst_m_i64_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.i64.nxv4i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.mask.i64.nxv4i1( - , - , - i64); - -define i64 @intrinsic_vfirst_mask_m_i64_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv4i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.i64.nxv8i1( - , - i64); - -define i64 @intrinsic_vfirst_m_i64_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.i64.nxv8i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.mask.i64.nxv8i1( - , - , - i64); - -define i64 @intrinsic_vfirst_mask_m_i64_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv8i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.i64.nxv16i1( - , - i64); - -define i64 @intrinsic_vfirst_m_i64_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.i64.nxv16i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.mask.i64.nxv16i1( - , - , - i64); - -define i64 @intrinsic_vfirst_mask_m_i64_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv16i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.i64.nxv32i1( - , - i64); - -define i64 @intrinsic_vfirst_m_i64_nxv32i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.i64.nxv32i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.mask.i64.nxv32i1( - , - , - i64); - -define i64 @intrinsic_vfirst_mask_m_i64_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv32i1( - %0, - %1, - i64 %2) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.i64.nxv64i1( - , - i64); - -define i64 @intrinsic_vfirst_m_i64_nxv64i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vfirst.m a0, v0 -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.i64.nxv64i1( - %0, - i64 %1) - - ret i64 %a -} - -declare i64 @llvm.riscv.vfirst.mask.i64.nxv64i1( - , - , - i64); - -define i64 @intrinsic_vfirst_mask_m_i64_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfirst.m a0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv64i1( - %0, - %1, - i64 %2) - - ret i64 %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll new file mode 100644 index 0000000000000..5d71c17db6c29 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll @@ -0,0 +1,284 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +declare iXLen @llvm.riscv.vfirst.iXLen.nxv1i1( + , + iXLen); + +define iXLen @intrinsic_vfirst_m_nxv1i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfirst_m_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv1i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv1i1( + , + , + iXLen); + +define iXLen @intrinsic_vfirst_mask_m_nxv1i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfirst.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv1i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.iXLen.nxv2i1( + , + iXLen); + +define iXLen @intrinsic_vfirst_m_nxv2i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfirst_m_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv2i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv2i1( + , + , + iXLen); + +define iXLen @intrinsic_vfirst_mask_m_nxv2i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfirst.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv2i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.iXLen.nxv4i1( + , + iXLen); + +define iXLen @intrinsic_vfirst_m_nxv4i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfirst_m_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv4i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv4i1( + , + , + iXLen); + +define iXLen @intrinsic_vfirst_mask_m_nxv4i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfirst.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv4i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.iXLen.nxv8i1( + , + iXLen); + +define iXLen @intrinsic_vfirst_m_nxv8i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfirst_m_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv8i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv8i1( + , + , + iXLen); + +define iXLen @intrinsic_vfirst_mask_m_nxv8i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfirst.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv8i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.iXLen.nxv16i1( + , + iXLen); + +define iXLen @intrinsic_vfirst_m_nxv16i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfirst_m_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv16i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv16i1( + , + , + iXLen); + +define iXLen @intrinsic_vfirst_mask_m_nxv16i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfirst.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv16i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.iXLen.nxv32i1( + , + iXLen); + +define iXLen @intrinsic_vfirst_m_nxv32i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfirst_m_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv32i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv32i1( + , + , + iXLen); + +define iXLen @intrinsic_vfirst_mask_m_nxv32i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfirst.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv32i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.iXLen.nxv64i1( + , + iXLen); + +define iXLen @intrinsic_vfirst_m_nxv64i1( %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfirst_m_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv64i1( + %0, + iXLen %1) + + ret iXLen %a +} + +declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv64i1( + , + , + iXLen); + +define iXLen @intrinsic_vfirst_mask_m_nxv64i1( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfirst.m a0, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv64i1( + %0, + %1, + iXLen %2) + + ret iXLen %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll deleted file mode 100644 index 1413c0ec4b443..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll +++ /dev/null @@ -1,758 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vid.nxv1i8( - i64); - -define @intrinsic_vid_v_nxv1i8(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv1i8( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv1i8( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv1i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv1i8( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv2i8( - i64); - -define @intrinsic_vid_v_nxv2i8(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv2i8( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv2i8( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv2i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv2i8( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv4i8( - i64); - -define @intrinsic_vid_v_nxv4i8(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv4i8( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv4i8( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv4i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv4i8( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv8i8( - i64); - -define @intrinsic_vid_v_nxv8i8(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv8i8( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv8i8( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv8i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv8i8( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv16i8( - i64); - -define @intrinsic_vid_v_nxv16i8(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv16i8( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv16i8( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv16i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv16i8( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv32i8( - i64); - -define @intrinsic_vid_v_nxv32i8(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv32i8( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv32i8( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv32i8( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv32i8( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv1i16( - i64); - -define @intrinsic_vid_v_nxv1i16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv1i16( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv1i16( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv1i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv1i16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv2i16( - i64); - -define @intrinsic_vid_v_nxv2i16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv2i16( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv2i16( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv2i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv2i16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv4i16( - i64); - -define @intrinsic_vid_v_nxv4i16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv4i16( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv4i16( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv4i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv4i16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv8i16( - i64); - -define @intrinsic_vid_v_nxv8i16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv8i16( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv8i16( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv8i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv8i16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv16i16( - i64); - -define @intrinsic_vid_v_nxv16i16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv16i16( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv16i16( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv16i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv16i16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv32i16( - i64); - -define @intrinsic_vid_v_nxv32i16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv32i16( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv32i16( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv32i16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv32i16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv1i32( - i64); - -define @intrinsic_vid_v_nxv1i32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv1i32( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv1i32( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv1i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv1i32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv2i32( - i64); - -define @intrinsic_vid_v_nxv2i32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv2i32( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv2i32( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv2i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv2i32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv4i32( - i64); - -define @intrinsic_vid_v_nxv4i32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv4i32( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv4i32( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv4i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv4i32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv8i32( - i64); - -define @intrinsic_vid_v_nxv8i32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv8i32( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv8i32( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv8i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv8i32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv16i32( - i64); - -define @intrinsic_vid_v_nxv16i32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv16i32( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv16i32( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv16i32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv16i32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv1i64( - i64); - -define @intrinsic_vid_v_nxv1i64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv1i64( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv1i64( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv1i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv1i64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv2i64( - i64); - -define @intrinsic_vid_v_nxv2i64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv2i64( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv2i64( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv2i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv2i64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv4i64( - i64); - -define @intrinsic_vid_v_nxv4i64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv4i64( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv4i64( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv4i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv4i64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vid.nxv8i64( - i64); - -define @intrinsic_vid_v_nxv8i64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vid_v_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.nxv8i64( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vid.mask.nxv8i64( - , - , - i64); - -define @intrinsic_vid_mask_v_nxv8i64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vid.v v8, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vid.mask.nxv8i64( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vid.ll similarity index 81% rename from llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vid.ll index 58ed5cfb7620d..1888074767cf8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vid.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vid.nxv1i8( - i32); + iXLen); -define @intrinsic_vid_v_nxv1i8(i32 %0) nounwind { +define @intrinsic_vid_v_nxv1i8(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -12,7 +14,7 @@ define @intrinsic_vid_v_nxv1i8(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv1i8( - i32 %0) + iXLen %0) ret %a } @@ -20,9 +22,9 @@ entry: declare @llvm.riscv.vid.mask.nxv1i8( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv1i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv1i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu @@ -32,15 +34,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv1i8( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv2i8( - i32); + iXLen); -define @intrinsic_vid_v_nxv2i8(i32 %0) nounwind { +define @intrinsic_vid_v_nxv2i8(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -48,7 +50,7 @@ define @intrinsic_vid_v_nxv2i8(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv2i8( - i32 %0) + iXLen %0) ret %a } @@ -56,9 +58,9 @@ entry: declare @llvm.riscv.vid.mask.nxv2i8( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv2i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv2i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu @@ -68,15 +70,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv2i8( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv4i8( - i32); + iXLen); -define @intrinsic_vid_v_nxv4i8(i32 %0) nounwind { +define @intrinsic_vid_v_nxv4i8(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -84,7 +86,7 @@ define @intrinsic_vid_v_nxv4i8(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv4i8( - i32 %0) + iXLen %0) ret %a } @@ -92,9 +94,9 @@ entry: declare @llvm.riscv.vid.mask.nxv4i8( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv4i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv4i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu @@ -104,15 +106,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv4i8( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv8i8( - i32); + iXLen); -define @intrinsic_vid_v_nxv8i8(i32 %0) nounwind { +define @intrinsic_vid_v_nxv8i8(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -120,7 +122,7 @@ define @intrinsic_vid_v_nxv8i8(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv8i8( - i32 %0) + iXLen %0) ret %a } @@ -128,9 +130,9 @@ entry: declare @llvm.riscv.vid.mask.nxv8i8( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv8i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv8i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu @@ -140,15 +142,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv8i8( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv16i8( - i32); + iXLen); -define @intrinsic_vid_v_nxv16i8(i32 %0) nounwind { +define @intrinsic_vid_v_nxv16i8(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -156,7 +158,7 @@ define @intrinsic_vid_v_nxv16i8(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv16i8( - i32 %0) + iXLen %0) ret %a } @@ -164,9 +166,9 @@ entry: declare @llvm.riscv.vid.mask.nxv16i8( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv16i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv16i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu @@ -176,15 +178,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv16i8( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv32i8( - i32); + iXLen); -define @intrinsic_vid_v_nxv32i8(i32 %0) nounwind { +define @intrinsic_vid_v_nxv32i8(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -192,7 +194,7 @@ define @intrinsic_vid_v_nxv32i8(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv32i8( - i32 %0) + iXLen %0) ret %a } @@ -200,9 +202,9 @@ entry: declare @llvm.riscv.vid.mask.nxv32i8( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv32i8( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu @@ -212,15 +214,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv32i8( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv1i16( - i32); + iXLen); -define @intrinsic_vid_v_nxv1i16(i32 %0) nounwind { +define @intrinsic_vid_v_nxv1i16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -228,7 +230,7 @@ define @intrinsic_vid_v_nxv1i16(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv1i16( - i32 %0) + iXLen %0) ret %a } @@ -236,9 +238,9 @@ entry: declare @llvm.riscv.vid.mask.nxv1i16( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv1i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv1i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -248,15 +250,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv1i16( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv2i16( - i32); + iXLen); -define @intrinsic_vid_v_nxv2i16(i32 %0) nounwind { +define @intrinsic_vid_v_nxv2i16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -264,7 +266,7 @@ define @intrinsic_vid_v_nxv2i16(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv2i16( - i32 %0) + iXLen %0) ret %a } @@ -272,9 +274,9 @@ entry: declare @llvm.riscv.vid.mask.nxv2i16( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv2i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv2i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -284,15 +286,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv2i16( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv4i16( - i32); + iXLen); -define @intrinsic_vid_v_nxv4i16(i32 %0) nounwind { +define @intrinsic_vid_v_nxv4i16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -300,7 +302,7 @@ define @intrinsic_vid_v_nxv4i16(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv4i16( - i32 %0) + iXLen %0) ret %a } @@ -308,9 +310,9 @@ entry: declare @llvm.riscv.vid.mask.nxv4i16( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv4i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv4i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -320,15 +322,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv4i16( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv8i16( - i32); + iXLen); -define @intrinsic_vid_v_nxv8i16(i32 %0) nounwind { +define @intrinsic_vid_v_nxv8i16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -336,7 +338,7 @@ define @intrinsic_vid_v_nxv8i16(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv8i16( - i32 %0) + iXLen %0) ret %a } @@ -344,9 +346,9 @@ entry: declare @llvm.riscv.vid.mask.nxv8i16( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv8i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv8i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -356,15 +358,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv8i16( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv16i16( - i32); + iXLen); -define @intrinsic_vid_v_nxv16i16(i32 %0) nounwind { +define @intrinsic_vid_v_nxv16i16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -372,7 +374,7 @@ define @intrinsic_vid_v_nxv16i16(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv16i16( - i32 %0) + iXLen %0) ret %a } @@ -380,9 +382,9 @@ entry: declare @llvm.riscv.vid.mask.nxv16i16( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv16i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv16i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -392,15 +394,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv16i16( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv32i16( - i32); + iXLen); -define @intrinsic_vid_v_nxv32i16(i32 %0) nounwind { +define @intrinsic_vid_v_nxv32i16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -408,7 +410,7 @@ define @intrinsic_vid_v_nxv32i16(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv32i16( - i32 %0) + iXLen %0) ret %a } @@ -416,9 +418,9 @@ entry: declare @llvm.riscv.vid.mask.nxv32i16( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv32i16( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv32i16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -428,15 +430,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv32i16( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv1i32( - i32); + iXLen); -define @intrinsic_vid_v_nxv1i32(i32 %0) nounwind { +define @intrinsic_vid_v_nxv1i32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -444,7 +446,7 @@ define @intrinsic_vid_v_nxv1i32(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv1i32( - i32 %0) + iXLen %0) ret %a } @@ -452,9 +454,9 @@ entry: declare @llvm.riscv.vid.mask.nxv1i32( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv1i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv1i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -464,15 +466,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv1i32( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv2i32( - i32); + iXLen); -define @intrinsic_vid_v_nxv2i32(i32 %0) nounwind { +define @intrinsic_vid_v_nxv2i32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -480,7 +482,7 @@ define @intrinsic_vid_v_nxv2i32(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv2i32( - i32 %0) + iXLen %0) ret %a } @@ -488,9 +490,9 @@ entry: declare @llvm.riscv.vid.mask.nxv2i32( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv2i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv2i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -500,15 +502,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv2i32( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv4i32( - i32); + iXLen); -define @intrinsic_vid_v_nxv4i32(i32 %0) nounwind { +define @intrinsic_vid_v_nxv4i32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -516,7 +518,7 @@ define @intrinsic_vid_v_nxv4i32(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv4i32( - i32 %0) + iXLen %0) ret %a } @@ -524,9 +526,9 @@ entry: declare @llvm.riscv.vid.mask.nxv4i32( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv4i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv4i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -536,15 +538,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv4i32( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv8i32( - i32); + iXLen); -define @intrinsic_vid_v_nxv8i32(i32 %0) nounwind { +define @intrinsic_vid_v_nxv8i32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -552,7 +554,7 @@ define @intrinsic_vid_v_nxv8i32(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv8i32( - i32 %0) + iXLen %0) ret %a } @@ -560,9 +562,9 @@ entry: declare @llvm.riscv.vid.mask.nxv8i32( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv8i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -572,15 +574,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv8i32( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv16i32( - i32); + iXLen); -define @intrinsic_vid_v_nxv16i32(i32 %0) nounwind { +define @intrinsic_vid_v_nxv16i32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vid_v_nxv16i32(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv16i32( - i32 %0) + iXLen %0) ret %a } @@ -596,9 +598,9 @@ entry: declare @llvm.riscv.vid.mask.nxv16i32( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv16i32( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv16i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -608,15 +610,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv16i32( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv1i64( - i32); + iXLen); -define @intrinsic_vid_v_nxv1i64(i32 %0) nounwind { +define @intrinsic_vid_v_nxv1i64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -624,7 +626,7 @@ define @intrinsic_vid_v_nxv1i64(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv1i64( - i32 %0) + iXLen %0) ret %a } @@ -632,9 +634,9 @@ entry: declare @llvm.riscv.vid.mask.nxv1i64( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv1i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv1i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -644,15 +646,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv1i64( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv2i64( - i32); + iXLen); -define @intrinsic_vid_v_nxv2i64(i32 %0) nounwind { +define @intrinsic_vid_v_nxv2i64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -660,7 +662,7 @@ define @intrinsic_vid_v_nxv2i64(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv2i64( - i32 %0) + iXLen %0) ret %a } @@ -668,9 +670,9 @@ entry: declare @llvm.riscv.vid.mask.nxv2i64( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv2i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv2i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -680,15 +682,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv2i64( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv4i64( - i32); + iXLen); -define @intrinsic_vid_v_nxv4i64(i32 %0) nounwind { +define @intrinsic_vid_v_nxv4i64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -696,7 +698,7 @@ define @intrinsic_vid_v_nxv4i64(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv4i64( - i32 %0) + iXLen %0) ret %a } @@ -704,9 +706,9 @@ entry: declare @llvm.riscv.vid.mask.nxv4i64( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv4i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv4i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -716,15 +718,15 @@ entry: %a = call @llvm.riscv.vid.mask.nxv4i64( %0, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.vid.nxv8i64( - i32); + iXLen); -define @intrinsic_vid_v_nxv8i64(i32 %0) nounwind { +define @intrinsic_vid_v_nxv8i64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vid_v_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -732,7 +734,7 @@ define @intrinsic_vid_v_nxv8i64(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vid.nxv8i64( - i32 %0) + iXLen %0) ret %a } @@ -740,9 +742,9 @@ entry: declare @llvm.riscv.vid.mask.nxv8i64( , , - i32); + iXLen); -define @intrinsic_vid_mask_v_nxv8i64( %0, %1, i32 %2) nounwind { +define @intrinsic_vid_mask_v_nxv8i64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -752,7 +754,7 @@ entry: %a = call @llvm.riscv.vid.mask.nxv8i64( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll deleted file mode 100644 index 0e37ff483fb4f..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll +++ /dev/null @@ -1,882 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.viota.nxv1i8( - , - i64); - -define @intrinsic_viota_m_nxv1i8_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv1i8_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv1i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv1i8( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv1i8_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i8_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv1i8( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv2i8( - , - i64); - -define @intrinsic_viota_m_nxv2i8_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv2i8_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv2i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv2i8( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv2i8_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i8_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv2i8( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv4i8( - , - i64); - -define @intrinsic_viota_m_nxv4i8_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv4i8_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv4i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv4i8( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv4i8_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i8_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv4i8( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv8i8( - , - i64); - -define @intrinsic_viota_m_nxv8i8_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv8i8_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv8i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv8i8( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv8i8_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i8_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv8i8( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv16i8( - , - i64); - -define @intrinsic_viota_m_nxv16i8_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv16i8_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv16i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv16i8( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv16i8_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i8_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv16i8( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv32i8( - , - i64); - -define @intrinsic_viota_m_nxv32i8_nxv32i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv32i8_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv32i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv32i8( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv32i8_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i8_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv32i8( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv64i8( - , - i64); - -define @intrinsic_viota_m_nxv64i8_nxv64i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv64i8_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv64i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv64i8( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv64i8_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv64i8_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv64i8( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv1i16( - , - i64); - -define @intrinsic_viota_m_nxv1i16_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv1i16_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv1i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv1i16( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv1i16_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i16_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv1i16( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv2i16( - , - i64); - -define @intrinsic_viota_m_nxv2i16_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv2i16_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv2i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv2i16( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv2i16_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i16_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv2i16( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv4i16( - , - i64); - -define @intrinsic_viota_m_nxv4i16_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv4i16_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv4i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv4i16( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv4i16_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i16_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv4i16( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv8i16( - , - i64); - -define @intrinsic_viota_m_nxv8i16_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv8i16_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv8i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv8i16( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv8i16_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i16_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv8i16( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv16i16( - , - i64); - -define @intrinsic_viota_m_nxv16i16_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv16i16_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv16i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv16i16( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv16i16_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i16_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv16i16( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv32i16( - , - i64); - -define @intrinsic_viota_m_nxv32i16_nxv32i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv32i16_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv32i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv32i16( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv32i16_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i16_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv32i16( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv1i32( - , - i64); - -define @intrinsic_viota_m_nxv1i32_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv1i32_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv1i32( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv1i32_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i32_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv1i32( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv2i32( - , - i64); - -define @intrinsic_viota_m_nxv2i32_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv2i32_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv2i32( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv2i32_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i32_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv2i32( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv4i32( - , - i64); - -define @intrinsic_viota_m_nxv4i32_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv4i32_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv4i32( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv4i32_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i32_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv4i32( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv8i32( - , - i64); - -define @intrinsic_viota_m_nxv8i32_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv8i32_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv8i32( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv8i32_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i32_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv8i32( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv16i32( - , - i64); - -define @intrinsic_viota_m_nxv16i32_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv16i32_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv16i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv16i32( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv16i32_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i32_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv16i32( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv1i64( - , - i64); - -define @intrinsic_viota_m_nxv1i64_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv1i64_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv1i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv1i64( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv1i64_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i64_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv1i64( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv2i64( - , - i64); - -define @intrinsic_viota_m_nxv2i64_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv2i64_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv2i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv2i64( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv2i64_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i64_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv2i64( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv4i64( - , - i64); - -define @intrinsic_viota_m_nxv4i64_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv4i64_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv4i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv4i64( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv4i64_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i64_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv4i64( - %0, - %1, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.viota.nxv8i64( - , - i64); - -define @intrinsic_viota_m_nxv8i64_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_viota_m_nxv8i64_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: viota.m v8, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.nxv8i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.viota.mask.nxv8i64( - , - , - , - i64); - -define @intrinsic_viota_mask_m_nxv8i64_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i64_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: viota.m v8, v0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.viota.mask.nxv8i64( - %0, - %1, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/viota.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/viota.ll index 6b3838cd5898c..acec6f7c8a08f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/viota.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.viota.nxv1i8( , - i32); + iXLen); -define @intrinsic_viota_m_nxv1i8_nxv1i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv1i8_nxv1i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv1i8_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_viota_m_nxv1i8_nxv1i1( %0, entry: %a = call @llvm.riscv.viota.nxv1i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,9 +25,9 @@ declare @llvm.riscv.viota.mask.nxv1i8( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv1i8_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv1i8_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i8_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu @@ -36,16 +38,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv2i8( , - i32); + iXLen); -define @intrinsic_viota_m_nxv2i8_nxv2i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv2i8_nxv2i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv2i8_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -54,7 +56,7 @@ define @intrinsic_viota_m_nxv2i8_nxv2i1( %0, entry: %a = call @llvm.riscv.viota.nxv2i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -63,9 +65,9 @@ declare @llvm.riscv.viota.mask.nxv2i8( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv2i8_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv2i8_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i8_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu @@ -76,16 +78,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv4i8( , - i32); + iXLen); -define @intrinsic_viota_m_nxv4i8_nxv4i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv4i8_nxv4i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv4i8_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -94,7 +96,7 @@ define @intrinsic_viota_m_nxv4i8_nxv4i1( %0, entry: %a = call @llvm.riscv.viota.nxv4i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -103,9 +105,9 @@ declare @llvm.riscv.viota.mask.nxv4i8( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv4i8_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv4i8_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i8_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu @@ -116,16 +118,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv8i8( , - i32); + iXLen); -define @intrinsic_viota_m_nxv8i8_nxv8i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv8i8_nxv8i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv8i8_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -134,7 +136,7 @@ define @intrinsic_viota_m_nxv8i8_nxv8i1( %0, entry: %a = call @llvm.riscv.viota.nxv8i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.viota.mask.nxv8i8( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv8i8_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv8i8_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i8_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu @@ -156,16 +158,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv16i8( , - i32); + iXLen); -define @intrinsic_viota_m_nxv16i8_nxv16i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv16i8_nxv16i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv16i8_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -174,7 +176,7 @@ define @intrinsic_viota_m_nxv16i8_nxv16i1( entry: %a = call @llvm.riscv.viota.nxv16i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -183,9 +185,9 @@ declare @llvm.riscv.viota.mask.nxv16i8( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv16i8_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv16i8_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i8_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu @@ -196,16 +198,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv32i8( , - i32); + iXLen); -define @intrinsic_viota_m_nxv32i8_nxv32i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv32i8_nxv32i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv32i8_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -214,7 +216,7 @@ define @intrinsic_viota_m_nxv32i8_nxv32i1( entry: %a = call @llvm.riscv.viota.nxv32i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -223,9 +225,9 @@ declare @llvm.riscv.viota.mask.nxv32i8( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv32i8_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv32i8_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i8_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu @@ -236,16 +238,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv64i8( , - i32); + iXLen); -define @intrinsic_viota_m_nxv64i8_nxv64i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv64i8_nxv64i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv64i8_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -254,7 +256,7 @@ define @intrinsic_viota_m_nxv64i8_nxv64i1( entry: %a = call @llvm.riscv.viota.nxv64i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -263,9 +265,9 @@ declare @llvm.riscv.viota.mask.nxv64i8( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv64i8_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv64i8_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv64i8_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu @@ -276,16 +278,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv1i16( , - i32); + iXLen); -define @intrinsic_viota_m_nxv1i16_nxv1i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv1i16_nxv1i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv1i16_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -294,7 +296,7 @@ define @intrinsic_viota_m_nxv1i16_nxv1i1( %0 entry: %a = call @llvm.riscv.viota.nxv1i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -303,9 +305,9 @@ declare @llvm.riscv.viota.mask.nxv1i16( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv1i16_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv1i16_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i16_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -316,16 +318,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv2i16( , - i32); + iXLen); -define @intrinsic_viota_m_nxv2i16_nxv2i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv2i16_nxv2i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv2i16_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -334,7 +336,7 @@ define @intrinsic_viota_m_nxv2i16_nxv2i1( %0 entry: %a = call @llvm.riscv.viota.nxv2i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -343,9 +345,9 @@ declare @llvm.riscv.viota.mask.nxv2i16( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv2i16_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv2i16_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i16_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -356,16 +358,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv4i16( , - i32); + iXLen); -define @intrinsic_viota_m_nxv4i16_nxv4i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv4i16_nxv4i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv4i16_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -374,7 +376,7 @@ define @intrinsic_viota_m_nxv4i16_nxv4i1( %0 entry: %a = call @llvm.riscv.viota.nxv4i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -383,9 +385,9 @@ declare @llvm.riscv.viota.mask.nxv4i16( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv4i16_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv4i16_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i16_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -396,16 +398,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv8i16( , - i32); + iXLen); -define @intrinsic_viota_m_nxv8i16_nxv8i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv8i16_nxv8i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv8i16_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -414,7 +416,7 @@ define @intrinsic_viota_m_nxv8i16_nxv8i1( %0 entry: %a = call @llvm.riscv.viota.nxv8i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -423,9 +425,9 @@ declare @llvm.riscv.viota.mask.nxv8i16( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv8i16_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv8i16_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i16_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -436,16 +438,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv16i16( , - i32); + iXLen); -define @intrinsic_viota_m_nxv16i16_nxv16i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv16i16_nxv16i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv16i16_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -454,7 +456,7 @@ define @intrinsic_viota_m_nxv16i16_nxv16i1( @llvm.riscv.viota.nxv16i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -463,9 +465,9 @@ declare @llvm.riscv.viota.mask.nxv16i16( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv16i16_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv16i16_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i16_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -476,16 +478,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv32i16( , - i32); + iXLen); -define @intrinsic_viota_m_nxv32i16_nxv32i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv32i16_nxv32i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv32i16_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -494,7 +496,7 @@ define @intrinsic_viota_m_nxv32i16_nxv32i1( @llvm.riscv.viota.nxv32i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -503,9 +505,9 @@ declare @llvm.riscv.viota.mask.nxv32i16( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv32i16_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv32i16_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i16_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -516,16 +518,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv1i32( , - i32); + iXLen); -define @intrinsic_viota_m_nxv1i32_nxv1i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv1i32_nxv1i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv1i32_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -534,7 +536,7 @@ define @intrinsic_viota_m_nxv1i32_nxv1i1( %0 entry: %a = call @llvm.riscv.viota.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -543,9 +545,9 @@ declare @llvm.riscv.viota.mask.nxv1i32( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv1i32_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv1i32_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i32_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -556,16 +558,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv2i32( , - i32); + iXLen); -define @intrinsic_viota_m_nxv2i32_nxv2i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv2i32_nxv2i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv2i32_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -574,7 +576,7 @@ define @intrinsic_viota_m_nxv2i32_nxv2i1( %0 entry: %a = call @llvm.riscv.viota.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -583,9 +585,9 @@ declare @llvm.riscv.viota.mask.nxv2i32( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv2i32_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv2i32_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i32_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -596,16 +598,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv4i32( , - i32); + iXLen); -define @intrinsic_viota_m_nxv4i32_nxv4i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv4i32_nxv4i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv4i32_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -614,7 +616,7 @@ define @intrinsic_viota_m_nxv4i32_nxv4i1( %0 entry: %a = call @llvm.riscv.viota.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -623,9 +625,9 @@ declare @llvm.riscv.viota.mask.nxv4i32( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv4i32_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv4i32_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i32_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -636,16 +638,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv8i32( , - i32); + iXLen); -define @intrinsic_viota_m_nxv8i32_nxv8i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv8i32_nxv8i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv8i32_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -654,7 +656,7 @@ define @intrinsic_viota_m_nxv8i32_nxv8i1( %0 entry: %a = call @llvm.riscv.viota.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -663,9 +665,9 @@ declare @llvm.riscv.viota.mask.nxv8i32( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv8i32_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv8i32_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i32_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -676,16 +678,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv16i32( , - i32); + iXLen); -define @intrinsic_viota_m_nxv16i32_nxv16i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv16i32_nxv16i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv16i32_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -694,7 +696,7 @@ define @intrinsic_viota_m_nxv16i32_nxv16i1( @llvm.riscv.viota.nxv16i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -703,9 +705,9 @@ declare @llvm.riscv.viota.mask.nxv16i32( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv16i32_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv16i32_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i32_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -716,16 +718,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv1i64( , - i32); + iXLen); -define @intrinsic_viota_m_nxv1i64_nxv1i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv1i64_nxv1i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv1i64_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -734,7 +736,7 @@ define @intrinsic_viota_m_nxv1i64_nxv1i1( %0 entry: %a = call @llvm.riscv.viota.nxv1i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -743,9 +745,9 @@ declare @llvm.riscv.viota.mask.nxv1i64( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv1i64_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv1i64_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i64_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -756,16 +758,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv2i64( , - i32); + iXLen); -define @intrinsic_viota_m_nxv2i64_nxv2i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv2i64_nxv2i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv2i64_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -774,7 +776,7 @@ define @intrinsic_viota_m_nxv2i64_nxv2i1( %0 entry: %a = call @llvm.riscv.viota.nxv2i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -783,9 +785,9 @@ declare @llvm.riscv.viota.mask.nxv2i64( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv2i64_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv2i64_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i64_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -796,16 +798,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv4i64( , - i32); + iXLen); -define @intrinsic_viota_m_nxv4i64_nxv4i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv4i64_nxv4i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv4i64_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -814,7 +816,7 @@ define @intrinsic_viota_m_nxv4i64_nxv4i1( %0 entry: %a = call @llvm.riscv.viota.nxv4i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -823,9 +825,9 @@ declare @llvm.riscv.viota.mask.nxv4i64( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv4i64_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv4i64_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i64_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -836,16 +838,16 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } declare @llvm.riscv.viota.nxv8i64( , - i32); + iXLen); -define @intrinsic_viota_m_nxv8i64_nxv8i1( %0, i32 %1) nounwind { +define @intrinsic_viota_m_nxv8i64_nxv8i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_viota_m_nxv8i64_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -854,7 +856,7 @@ define @intrinsic_viota_m_nxv8i64_nxv8i1( %0 entry: %a = call @llvm.riscv.viota.nxv8i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -863,9 +865,9 @@ declare @llvm.riscv.viota.mask.nxv8i64( , , , - i32); + iXLen); -define @intrinsic_viota_mask_m_nxv8i64_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_viota_mask_m_nxv8i64_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i64_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -876,7 +878,7 @@ entry: %0, %1, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll deleted file mode 100644 index 21b3f7b0b0ad2..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll +++ /dev/null @@ -1,94 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s - -declare @llvm.riscv.vlm.nxv1i1(*, i64); - -define @intrinsic_vlm_v_nxv1i1(* %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vlm_v_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vlm.nxv1i1(* %0, i64 %1) - ret %a -} - -declare @llvm.riscv.vlm.nxv2i1(*, i64); - -define @intrinsic_vlm_v_nxv2i1(* %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vlm_v_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vlm.nxv2i1(* %0, i64 %1) - ret %a -} - -declare @llvm.riscv.vlm.nxv4i1(*, i64); - -define @intrinsic_vlm_v_nxv4i1(* %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vlm_v_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vlm.nxv4i1(* %0, i64 %1) - ret %a -} - -declare @llvm.riscv.vlm.nxv8i1(*, i64); - -define @intrinsic_vlm_v_nxv8i1(* %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vlm_v_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vlm.nxv8i1(* %0, i64 %1) - ret %a -} - -declare @llvm.riscv.vlm.nxv16i1(*, i64); - -define @intrinsic_vlm_v_nxv16i1(* %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vlm_v_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vlm.nxv16i1(* %0, i64 %1) - ret %a -} - -declare @llvm.riscv.vlm.nxv32i1(*, i64); - -define @intrinsic_vlm_v_nxv32i1(* %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vlm_v_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vlm.nxv32i1(* %0, i64 %1) - ret %a -} - -declare @llvm.riscv.vlm.nxv64i1(*, i64); - -define @intrinsic_vlm_v_nxv64i1(* %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vlm_v_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vlm.nxv64i1(* %0, i64 %1) - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlm.ll similarity index 80% rename from llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vlm.ll index 72cac45129035..765db6d47d7a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlm.ll @@ -1,94 +1,96 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s -declare @llvm.riscv.vlm.nxv1i1(*, i32); +declare @llvm.riscv.vlm.nxv1i1(*, iXLen); -define @intrinsic_vlm_v_nxv1i1(* %0, i32 %1) nounwind { +define @intrinsic_vlm_v_nxv1i1(* %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vlm_v_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vlm.nxv1i1(* %0, i32 %1) + %a = call @llvm.riscv.vlm.nxv1i1(* %0, iXLen %1) ret %a } -declare @llvm.riscv.vlm.nxv2i1(*, i32); +declare @llvm.riscv.vlm.nxv2i1(*, iXLen); -define @intrinsic_vlm_v_nxv2i1(* %0, i32 %1) nounwind { +define @intrinsic_vlm_v_nxv2i1(* %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vlm_v_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vlm.nxv2i1(* %0, i32 %1) + %a = call @llvm.riscv.vlm.nxv2i1(* %0, iXLen %1) ret %a } -declare @llvm.riscv.vlm.nxv4i1(*, i32); +declare @llvm.riscv.vlm.nxv4i1(*, iXLen); -define @intrinsic_vlm_v_nxv4i1(* %0, i32 %1) nounwind { +define @intrinsic_vlm_v_nxv4i1(* %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vlm_v_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vlm.nxv4i1(* %0, i32 %1) + %a = call @llvm.riscv.vlm.nxv4i1(* %0, iXLen %1) ret %a } -declare @llvm.riscv.vlm.nxv8i1(*, i32); +declare @llvm.riscv.vlm.nxv8i1(*, iXLen); -define @intrinsic_vlm_v_nxv8i1(* %0, i32 %1) nounwind { +define @intrinsic_vlm_v_nxv8i1(* %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vlm_v_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vlm.nxv8i1(* %0, i32 %1) + %a = call @llvm.riscv.vlm.nxv8i1(* %0, iXLen %1) ret %a } -declare @llvm.riscv.vlm.nxv16i1(*, i32); +declare @llvm.riscv.vlm.nxv16i1(*, iXLen); -define @intrinsic_vlm_v_nxv16i1(* %0, i32 %1) nounwind { +define @intrinsic_vlm_v_nxv16i1(* %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vlm_v_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vlm.nxv16i1(* %0, i32 %1) + %a = call @llvm.riscv.vlm.nxv16i1(* %0, iXLen %1) ret %a } -declare @llvm.riscv.vlm.nxv32i1(*, i32); +declare @llvm.riscv.vlm.nxv32i1(*, iXLen); -define @intrinsic_vlm_v_nxv32i1(* %0, i32 %1) nounwind { +define @intrinsic_vlm_v_nxv32i1(* %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vlm_v_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vlm.nxv32i1(* %0, i32 %1) + %a = call @llvm.riscv.vlm.nxv32i1(* %0, iXLen %1) ret %a } -declare @llvm.riscv.vlm.nxv64i1(*, i32); +declare @llvm.riscv.vlm.nxv64i1(*, iXLen); -define @intrinsic_vlm_v_nxv64i1(* %0, i32 %1) nounwind { +define @intrinsic_vlm_v_nxv64i1(* %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vlm_v_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vlm.nxv64i1(* %0, i32 %1) + %a = call @llvm.riscv.vlm.nxv64i1(* %0, iXLen %1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll deleted file mode 100644 index 12107a960f87d..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmand.nxv1i1( - , - , - i64); - -define @intrinsic_vmand_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmand_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmand.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmand.nxv2i1( - , - , - i64); - -define @intrinsic_vmand_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmand_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmand.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmand.nxv4i1( - , - , - i64); - -define @intrinsic_vmand_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmand_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmand.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmand.nxv8i1( - , - , - i64); - -define @intrinsic_vmand_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmand_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmand.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmand.nxv16i1( - , - , - i64); - -define @intrinsic_vmand_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmand_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmand.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmand.nxv32i1( - , - , - i64); - -define @intrinsic_vmand_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmand_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmand.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmand.nxv64i1( - , - , - i64); - -define @intrinsic_vmand_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmand_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmand.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmand.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmand.ll index 4d0a65d0f892b..2743f9deab803 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmand.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmand.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmand_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmand_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmand_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmand.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmand.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmand_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmand_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmand_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmand.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmand.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmand_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmand_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmand_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmand.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmand.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmand_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmand_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmand_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmand.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmand.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmand_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmand_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmand_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmand.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmand.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmand_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmand_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmand_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmand.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmand.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmand_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmand_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmand_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmand.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll deleted file mode 100644 index 5ad6aa2ee4d8f..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmandn.nxv1i1( - , - , - i64); - -define @intrinsic_vmandn_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandn_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmandn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandn.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandn.nxv2i1( - , - , - i64); - -define @intrinsic_vmandn_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandn_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmandn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandn.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandn.nxv4i1( - , - , - i64); - -define @intrinsic_vmandn_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandn_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmandn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandn.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandn.nxv8i1( - , - , - i64); - -define @intrinsic_vmandn_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandn_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmandn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandn.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandn.nxv16i1( - , - , - i64); - -define @intrinsic_vmandn_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandn_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmandn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandn.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandn.nxv32i1( - , - , - i64); - -define @intrinsic_vmandn_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandn_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmandn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandn.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandn.nxv64i1( - , - , - i64); - -define @intrinsic_vmandn_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandn_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmandn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandn.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmandn.ll index 90d6cba3592d0..c7977e9855ea5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmandn.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmandn.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmandn_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmandn_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmandn_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmandn.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmandn.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmandn_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmandn_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmandn_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmandn.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmandn.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmandn_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmandn_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmandn_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmandn.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmandn.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmandn_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmandn_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmandn_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmandn.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmandn.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmandn_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmandn_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmandn_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmandn.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmandn.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmandn_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmandn_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmandn_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmandn.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmandn.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmandn_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmandn_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmandn_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmandn.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll deleted file mode 100644 index 50516db35a4fc..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll +++ /dev/null @@ -1,114 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmclr.nxv1i1( - i64); - -define @intrinsic_vmclr_m_pseudo_nxv1i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmclr.nxv1i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmclr.nxv2i1( - i64); - -define @intrinsic_vmclr_m_pseudo_nxv2i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmclr.nxv2i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmclr.nxv4i1( - i64); - -define @intrinsic_vmclr_m_pseudo_nxv4i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmclr.nxv4i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmclr.nxv8i1( - i64); - -define @intrinsic_vmclr_m_pseudo_nxv8i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmclr.nxv8i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmclr.nxv16i1( - i64); - -define @intrinsic_vmclr_m_pseudo_nxv16i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmclr.nxv16i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmclr.nxv32i1( - i64); - -define @intrinsic_vmclr_m_pseudo_nxv32i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmclr.nxv32i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmclr.nxv64i1( - i64); - -define @intrinsic_vmclr_m_pseudo_nxv64i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmclr.nxv64i1( - i64 %0) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr.ll similarity index 72% rename from llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmclr.ll index 49fb55c901b2b..f786f033db2ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmclr.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmclr.nxv1i1( - i32); + iXLen); -define @intrinsic_vmclr_m_pseudo_nxv1i1(i32 %0) nounwind { +define @intrinsic_vmclr_m_pseudo_nxv1i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -12,15 +14,15 @@ define @intrinsic_vmclr_m_pseudo_nxv1i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmclr.nxv1i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmclr.nxv2i1( - i32); + iXLen); -define @intrinsic_vmclr_m_pseudo_nxv2i1(i32 %0) nounwind { +define @intrinsic_vmclr_m_pseudo_nxv2i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -28,15 +30,15 @@ define @intrinsic_vmclr_m_pseudo_nxv2i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmclr.nxv2i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmclr.nxv4i1( - i32); + iXLen); -define @intrinsic_vmclr_m_pseudo_nxv4i1(i32 %0) nounwind { +define @intrinsic_vmclr_m_pseudo_nxv4i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -44,15 +46,15 @@ define @intrinsic_vmclr_m_pseudo_nxv4i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmclr.nxv4i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmclr.nxv8i1( - i32); + iXLen); -define @intrinsic_vmclr_m_pseudo_nxv8i1(i32 %0) nounwind { +define @intrinsic_vmclr_m_pseudo_nxv8i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -60,15 +62,15 @@ define @intrinsic_vmclr_m_pseudo_nxv8i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmclr.nxv8i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmclr.nxv16i1( - i32); + iXLen); -define @intrinsic_vmclr_m_pseudo_nxv16i1(i32 %0) nounwind { +define @intrinsic_vmclr_m_pseudo_nxv16i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -76,15 +78,15 @@ define @intrinsic_vmclr_m_pseudo_nxv16i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmclr.nxv16i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmclr.nxv32i1( - i32); + iXLen); -define @intrinsic_vmclr_m_pseudo_nxv32i1(i32 %0) nounwind { +define @intrinsic_vmclr_m_pseudo_nxv32i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -92,15 +94,15 @@ define @intrinsic_vmclr_m_pseudo_nxv32i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmclr.nxv32i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmclr.nxv64i1( - i32); + iXLen); -define @intrinsic_vmclr_m_pseudo_nxv64i1(i32 %0) nounwind { +define @intrinsic_vmclr_m_pseudo_nxv64i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -108,7 +110,7 @@ define @intrinsic_vmclr_m_pseudo_nxv64i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmclr.nxv64i1( - i32 %0) + iXLen %0) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll deleted file mode 100644 index b4397d512e34b..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmnand.nxv1i1( - , - , - i64); - -define @intrinsic_vmnand_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnand_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmnand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnand.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnand.nxv2i1( - , - , - i64); - -define @intrinsic_vmnand_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnand_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmnand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnand.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnand.nxv4i1( - , - , - i64); - -define @intrinsic_vmnand_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnand_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmnand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnand.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnand.nxv8i1( - , - , - i64); - -define @intrinsic_vmnand_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnand_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmnand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnand.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnand.nxv16i1( - , - , - i64); - -define @intrinsic_vmnand_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnand_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmnand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnand.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnand.nxv32i1( - , - , - i64); - -define @intrinsic_vmnand_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnand_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmnand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnand.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnand.nxv64i1( - , - , - i64); - -define @intrinsic_vmnand_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnand_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmnand.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnand.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmnand.ll index 958d38f0023f6..ee128c178c552 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmnand.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmnand.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmnand_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnand_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnand_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmnand.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmnand.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmnand_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnand_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnand_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmnand.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmnand.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmnand_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnand_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnand_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmnand.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmnand.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmnand_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnand_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnand_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmnand.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmnand.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmnand_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnand_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnand_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmnand.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmnand.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmnand_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnand_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnand_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmnand.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmnand.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmnand_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnand_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnand_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmnand.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll deleted file mode 100644 index 5beaeadcdcab2..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmnor.nxv1i1( - , - , - i64); - -define @intrinsic_vmnor_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnor_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnor.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnor.nxv2i1( - , - , - i64); - -define @intrinsic_vmnor_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnor_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnor.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnor.nxv4i1( - , - , - i64); - -define @intrinsic_vmnor_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnor_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnor.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnor.nxv8i1( - , - , - i64); - -define @intrinsic_vmnor_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnor_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnor.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnor.nxv16i1( - , - , - i64); - -define @intrinsic_vmnor_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnor_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnor.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnor.nxv32i1( - , - , - i64); - -define @intrinsic_vmnor_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnor_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnor.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmnor.nxv64i1( - , - , - i64); - -define @intrinsic_vmnor_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmnor_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmnor.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmnor.ll index ca5bcf5463fa5..9eb829964b0f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmnor.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmnor.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmnor_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnor_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnor_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmnor.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmnor.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmnor_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnor_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnor_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmnor.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmnor.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmnor_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnor_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnor_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmnor.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmnor.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmnor_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnor_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnor_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmnor.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmnor.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmnor_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnor_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnor_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmnor.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmnor.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmnor_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnor_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnor_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmnor.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmnor.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmnor_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmnor_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmnor_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmnor.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll deleted file mode 100644 index 81fa40663fc1a..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmor.nxv1i1( - , - , - i64); - -define @intrinsic_vmor_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmor_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmor.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmor.nxv2i1( - , - , - i64); - -define @intrinsic_vmor_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmor_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmor.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmor.nxv4i1( - , - , - i64); - -define @intrinsic_vmor_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmor_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmor.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmor.nxv8i1( - , - , - i64); - -define @intrinsic_vmor_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmor_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmor.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmor.nxv16i1( - , - , - i64); - -define @intrinsic_vmor_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmor_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmor.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmor.nxv32i1( - , - , - i64); - -define @intrinsic_vmor_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmor_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmor.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmor.nxv64i1( - , - , - i64); - -define @intrinsic_vmor_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmor_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmor.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmor.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmor.ll index 117d152f7248c..a5bcc135150dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmor.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmor.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmor_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmor_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmor_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmor.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmor.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmor_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmor_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmor_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmor.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmor.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmor_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmor_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmor_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmor.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmor.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmor_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmor_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmor_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmor.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmor.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmor_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmor_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmor_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmor.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmor.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmor_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmor_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmor_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmor.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmor.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmor_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmor_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmor_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmor.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll deleted file mode 100644 index 89de2b78e94f5..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmorn.nxv1i1( - , - , - i64); - -define @intrinsic_vmorn_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmorn_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmorn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmorn.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmorn.nxv2i1( - , - , - i64); - -define @intrinsic_vmorn_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmorn_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmorn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmorn.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmorn.nxv4i1( - , - , - i64); - -define @intrinsic_vmorn_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmorn_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmorn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmorn.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmorn.nxv8i1( - , - , - i64); - -define @intrinsic_vmorn_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmorn_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmorn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmorn.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmorn.nxv16i1( - , - , - i64); - -define @intrinsic_vmorn_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmorn_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmorn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmorn.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmorn.nxv32i1( - , - , - i64); - -define @intrinsic_vmorn_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmorn_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmorn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmorn.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmorn.nxv64i1( - , - , - i64); - -define @intrinsic_vmorn_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmorn_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmorn.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmorn.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmorn.ll index 8509603acd2f5..eddfe24493f70 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmorn.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmorn.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmorn_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmorn_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmorn_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmorn.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmorn.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmorn_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmorn_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmorn_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmorn.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmorn.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmorn_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmorn_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmorn_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmorn.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmorn.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmorn_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmorn_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmorn_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmorn.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmorn.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmorn_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmorn_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmorn_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmorn.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmorn.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmorn_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmorn_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmorn_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmorn.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmorn.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmorn_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmorn_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmorn_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmorn.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll deleted file mode 100644 index f34157dfd3797..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll +++ /dev/null @@ -1,114 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmset.nxv1i1( - i64); - -define @intrinsic_vmset_m_pseudo_nxv1i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmset.nxv1i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmset.nxv2i1( - i64); - -define @intrinsic_vmset_m_pseudo_nxv2i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmset.nxv2i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmset.nxv4i1( - i64); - -define @intrinsic_vmset_m_pseudo_nxv4i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmset.nxv4i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmset.nxv8i1( - i64); - -define @intrinsic_vmset_m_pseudo_nxv8i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmset.nxv8i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmset.nxv16i1( - i64); - -define @intrinsic_vmset_m_pseudo_nxv16i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmset.nxv16i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmset.nxv32i1( - i64); - -define @intrinsic_vmset_m_pseudo_nxv32i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmset.nxv32i1( - i64 %0) - - ret %a -} - -declare @llvm.riscv.vmset.nxv64i1( - i64); - -define @intrinsic_vmset_m_pseudo_nxv64i1(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmset.nxv64i1( - i64 %0) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmset.ll similarity index 72% rename from llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmset.ll index 68a6719c76580..19b05954e243f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmset.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmset.nxv1i1( - i32); + iXLen); -define @intrinsic_vmset_m_pseudo_nxv1i1(i32 %0) nounwind { +define @intrinsic_vmset_m_pseudo_nxv1i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -12,15 +14,15 @@ define @intrinsic_vmset_m_pseudo_nxv1i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmset.nxv1i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmset.nxv2i1( - i32); + iXLen); -define @intrinsic_vmset_m_pseudo_nxv2i1(i32 %0) nounwind { +define @intrinsic_vmset_m_pseudo_nxv2i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -28,15 +30,15 @@ define @intrinsic_vmset_m_pseudo_nxv2i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmset.nxv2i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmset.nxv4i1( - i32); + iXLen); -define @intrinsic_vmset_m_pseudo_nxv4i1(i32 %0) nounwind { +define @intrinsic_vmset_m_pseudo_nxv4i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -44,15 +46,15 @@ define @intrinsic_vmset_m_pseudo_nxv4i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmset.nxv4i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmset.nxv8i1( - i32); + iXLen); -define @intrinsic_vmset_m_pseudo_nxv8i1(i32 %0) nounwind { +define @intrinsic_vmset_m_pseudo_nxv8i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -60,15 +62,15 @@ define @intrinsic_vmset_m_pseudo_nxv8i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmset.nxv8i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmset.nxv16i1( - i32); + iXLen); -define @intrinsic_vmset_m_pseudo_nxv16i1(i32 %0) nounwind { +define @intrinsic_vmset_m_pseudo_nxv16i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -76,15 +78,15 @@ define @intrinsic_vmset_m_pseudo_nxv16i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmset.nxv16i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmset.nxv32i1( - i32); + iXLen); -define @intrinsic_vmset_m_pseudo_nxv32i1(i32 %0) nounwind { +define @intrinsic_vmset_m_pseudo_nxv32i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -92,15 +94,15 @@ define @intrinsic_vmset_m_pseudo_nxv32i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmset.nxv32i1( - i32 %0) + iXLen %0) ret %a } declare @llvm.riscv.vmset.nxv64i1( - i32); + iXLen); -define @intrinsic_vmset_m_pseudo_nxv64i1(i32 %0) nounwind { +define @intrinsic_vmset_m_pseudo_nxv64i1(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -108,7 +110,7 @@ define @intrinsic_vmset_m_pseudo_nxv64i1(i32 %0) nounwind { ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmset.nxv64i1( - i32 %0) + iXLen %0) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll deleted file mode 100644 index 0f776c83e0129..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll +++ /dev/null @@ -1,296 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmsif.nxv1i1( - , - i64); - -define @intrinsic_vmsif_m_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsif_m_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsif.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.nxv1i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsif.mask.nxv1i1( - , - , - , - i64); - -define @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv1i1_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsif.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.mask.nxv1i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsif.nxv2i1( - , - i64); - -define @intrinsic_vmsif_m_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsif_m_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsif.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.nxv2i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsif.mask.nxv2i1( - , - , - , - i64); - -define @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv2i1_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsif.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.mask.nxv2i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsif.nxv4i1( - , - i64); - -define @intrinsic_vmsif_m_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsif_m_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsif.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.nxv4i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsif.mask.nxv4i1( - , - , - , - i64); - -define @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv4i1_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsif.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.mask.nxv4i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsif.nxv8i1( - , - i64); - -define @intrinsic_vmsif_m_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsif_m_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsif.m v8, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.nxv8i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsif.mask.nxv8i1( - , - , - , - i64); - -define @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv8i1_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsif.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.mask.nxv8i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsif.nxv16i1( - , - i64); - -define @intrinsic_vmsif_m_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsif_m_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsif.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.nxv16i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsif.mask.nxv16i1( - , - , - , - i64); - -define @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv16i1_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsif.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.mask.nxv16i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsif.nxv32i1( - , - i64); - -define @intrinsic_vmsif_m_nxv32i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsif_m_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsif.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.nxv32i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsif.mask.nxv32i1( - , - , - , - i64); - -define @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv32i1_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsif.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.mask.nxv32i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsif.nxv64i1( - , - i64); - -define @intrinsic_vmsif_m_nxv64i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsif_m_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmsif.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.nxv64i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsif.mask.nxv64i1( - , - , - , - i64); - -define @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv64i1_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsif.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsif.mask.nxv64i1( - %0, - %1, - %2, - i64 %3) - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmsif.ll index 888b6ebbbc3f8..5b76892e5a3fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmsif.nxv1i1( , - i32); + iXLen); -define @intrinsic_vmsif_m_nxv1i1( %0, i32 %1) nounwind { +define @intrinsic_vmsif_m_nxv1i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsif_m_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vmsif_m_nxv1i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsif.nxv1i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,9 +25,9 @@ declare @llvm.riscv.vmsif.mask.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -39,15 +41,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsif.nxv2i1( , - i32); + iXLen); -define @intrinsic_vmsif_m_nxv2i1( %0, i32 %1) nounwind { +define @intrinsic_vmsif_m_nxv2i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsif_m_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vmsif_m_nxv2i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsif.nxv2i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -65,9 +67,9 @@ declare @llvm.riscv.vmsif.mask.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -81,15 +83,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsif.nxv4i1( , - i32); + iXLen); -define @intrinsic_vmsif_m_nxv4i1( %0, i32 %1) nounwind { +define @intrinsic_vmsif_m_nxv4i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsif_m_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vmsif_m_nxv4i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsif.nxv4i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -107,9 +109,9 @@ declare @llvm.riscv.vmsif.mask.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -123,15 +125,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsif.nxv8i1( , - i32); + iXLen); -define @intrinsic_vmsif_m_nxv8i1( %0, i32 %1) nounwind { +define @intrinsic_vmsif_m_nxv8i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsif_m_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vmsif_m_nxv8i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsif.nxv8i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -149,9 +151,9 @@ declare @llvm.riscv.vmsif.mask.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -165,15 +167,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsif.nxv16i1( , - i32); + iXLen); -define @intrinsic_vmsif_m_nxv16i1( %0, i32 %1) nounwind { +define @intrinsic_vmsif_m_nxv16i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsif_m_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vmsif_m_nxv16i1( %0, i32 entry: %a = call @llvm.riscv.vmsif.nxv16i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -191,9 +193,9 @@ declare @llvm.riscv.vmsif.mask.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv16i1_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -207,15 +209,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsif.nxv32i1( , - i32); + iXLen); -define @intrinsic_vmsif_m_nxv32i1( %0, i32 %1) nounwind { +define @intrinsic_vmsif_m_nxv32i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsif_m_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vmsif_m_nxv32i1( %0, i32 entry: %a = call @llvm.riscv.vmsif.nxv32i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -233,9 +235,9 @@ declare @llvm.riscv.vmsif.mask.nxv32i1( , , , - i32); + iXLen); -define @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv32i1_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -249,15 +251,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsif.nxv64i1( , - i32); + iXLen); -define @intrinsic_vmsif_m_nxv64i1( %0, i32 %1) nounwind { +define @intrinsic_vmsif_m_nxv64i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsif_m_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vmsif_m_nxv64i1( %0, i32 entry: %a = call @llvm.riscv.vmsif.nxv64i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -275,9 +277,9 @@ declare @llvm.riscv.vmsif.mask.nxv64i1( , , , - i32); + iXLen); -define @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv64i1_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -291,6 +293,6 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll deleted file mode 100644 index 8fba91102f814..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll +++ /dev/null @@ -1,296 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmsof.nxv1i1( - , - i64); - -define @intrinsic_vmsof_m_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsof_m_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsof.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.nxv1i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsof.mask.nxv1i1( - , - , - , - i64); - -define @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv1i1_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsof.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.mask.nxv1i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsof.nxv2i1( - , - i64); - -define @intrinsic_vmsof_m_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsof_m_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsof.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.nxv2i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsof.mask.nxv2i1( - , - , - , - i64); - -define @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv2i1_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsof.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.mask.nxv2i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsof.nxv4i1( - , - i64); - -define @intrinsic_vmsof_m_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsof_m_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsof.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.nxv4i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsof.mask.nxv4i1( - , - , - , - i64); - -define @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv4i1_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsof.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.mask.nxv4i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsof.nxv8i1( - , - i64); - -define @intrinsic_vmsof_m_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsof_m_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsof.m v8, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.nxv8i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsof.mask.nxv8i1( - , - , - , - i64); - -define @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv8i1_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsof.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.mask.nxv8i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsof.nxv16i1( - , - i64); - -define @intrinsic_vmsof_m_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsof_m_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsof.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.nxv16i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsof.mask.nxv16i1( - , - , - , - i64); - -define @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv16i1_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsof.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.mask.nxv16i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsof.nxv32i1( - , - i64); - -define @intrinsic_vmsof_m_nxv32i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsof_m_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsof.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.nxv32i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsof.mask.nxv32i1( - , - , - , - i64); - -define @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv32i1_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsof.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.mask.nxv32i1( - %0, - %1, - %2, - i64 %3) - ret %a -} - -declare @llvm.riscv.vmsof.nxv64i1( - , - i64); - -define @intrinsic_vmsof_m_nxv64i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vmsof_m_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmsof.m v8, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.nxv64i1( - %0, - i64 %1) - ret %a -} - -declare @llvm.riscv.vmsof.mask.nxv64i1( - , - , - , - i64); - -define @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv64i1_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmsof.m v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmsof.mask.nxv64i1( - %0, - %1, - %2, - i64 %3) - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmsof.ll index b5db2a4b284ad..290206244f3e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmsof.nxv1i1( , - i32); + iXLen); -define @intrinsic_vmsof_m_nxv1i1( %0, i32 %1) nounwind { +define @intrinsic_vmsof_m_nxv1i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsof_m_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vmsof_m_nxv1i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsof.nxv1i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,9 +25,9 @@ declare @llvm.riscv.vmsof.mask.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv1i1_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -39,15 +41,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsof.nxv2i1( , - i32); + iXLen); -define @intrinsic_vmsof_m_nxv2i1( %0, i32 %1) nounwind { +define @intrinsic_vmsof_m_nxv2i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsof_m_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vmsof_m_nxv2i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsof.nxv2i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -65,9 +67,9 @@ declare @llvm.riscv.vmsof.mask.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv2i1_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -81,15 +83,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsof.nxv4i1( , - i32); + iXLen); -define @intrinsic_vmsof_m_nxv4i1( %0, i32 %1) nounwind { +define @intrinsic_vmsof_m_nxv4i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsof_m_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vmsof_m_nxv4i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsof.nxv4i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -107,9 +109,9 @@ declare @llvm.riscv.vmsof.mask.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv4i1_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -123,15 +125,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsof.nxv8i1( , - i32); + iXLen); -define @intrinsic_vmsof_m_nxv8i1( %0, i32 %1) nounwind { +define @intrinsic_vmsof_m_nxv8i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsof_m_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vmsof_m_nxv8i1( %0, i32 %1) entry: %a = call @llvm.riscv.vmsof.nxv8i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -149,9 +151,9 @@ declare @llvm.riscv.vmsof.mask.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv8i1_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -165,15 +167,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsof.nxv16i1( , - i32); + iXLen); -define @intrinsic_vmsof_m_nxv16i1( %0, i32 %1) nounwind { +define @intrinsic_vmsof_m_nxv16i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsof_m_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vmsof_m_nxv16i1( %0, i32 entry: %a = call @llvm.riscv.vmsof.nxv16i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -191,9 +193,9 @@ declare @llvm.riscv.vmsof.mask.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv16i1_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -207,15 +209,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsof.nxv32i1( , - i32); + iXLen); -define @intrinsic_vmsof_m_nxv32i1( %0, i32 %1) nounwind { +define @intrinsic_vmsof_m_nxv32i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsof_m_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vmsof_m_nxv32i1( %0, i32 entry: %a = call @llvm.riscv.vmsof.nxv32i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -233,9 +235,9 @@ declare @llvm.riscv.vmsof.mask.nxv32i1( , , , - i32); + iXLen); -define @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv32i1_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -249,15 +251,15 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vmsof.nxv64i1( , - i32); + iXLen); -define @intrinsic_vmsof_m_nxv64i1( %0, i32 %1) nounwind { +define @intrinsic_vmsof_m_nxv64i1( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vmsof_m_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vmsof_m_nxv64i1( %0, i32 entry: %a = call @llvm.riscv.vmsof.nxv64i1( %0, - i32 %1) + iXLen %1) ret %a } @@ -275,9 +277,9 @@ declare @llvm.riscv.vmsof.mask.nxv64i1( , , , - i32); + iXLen); -define @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv64i1_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 @@ -291,6 +293,6 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll deleted file mode 100644 index eb31e34b9c4be..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmxnor.nxv1i1( - , - , - i64); - -define @intrinsic_vmxnor_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxnor_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmxnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxnor.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxnor.nxv2i1( - , - , - i64); - -define @intrinsic_vmxnor_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxnor_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmxnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxnor.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxnor.nxv4i1( - , - , - i64); - -define @intrinsic_vmxnor_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxnor_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmxnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxnor.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxnor.nxv8i1( - , - , - i64); - -define @intrinsic_vmxnor_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxnor_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmxnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxnor.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxnor.nxv16i1( - , - , - i64); - -define @intrinsic_vmxnor_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxnor_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmxnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxnor.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxnor.nxv32i1( - , - , - i64); - -define @intrinsic_vmxnor_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxnor_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmxnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxnor.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxnor.nxv64i1( - , - , - i64); - -define @intrinsic_vmxnor_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxnor_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmxnor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxnor.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmxnor.ll index 82759bc9ea0b5..fbadb42494837 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmxnor.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmxnor.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmxnor_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxnor_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmxnor.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmxnor.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmxnor_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxnor_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmxnor.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmxnor.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmxnor_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxnor_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmxnor.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmxnor.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmxnor_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxnor_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmxnor.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmxnor.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmxnor_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxnor_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmxnor.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmxnor.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmxnor_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxnor_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmxnor.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmxnor.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmxnor_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxnor_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmxnor.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll deleted file mode 100644 index 0e8d66b558315..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmxor.nxv1i1( - , - , - i64); - -define @intrinsic_vmxor_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxor_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmxor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxor.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxor.nxv2i1( - , - , - i64); - -define @intrinsic_vmxor_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxor_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmxor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxor.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxor.nxv4i1( - , - , - i64); - -define @intrinsic_vmxor_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxor_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmxor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxor.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxor.nxv8i1( - , - , - i64); - -define @intrinsic_vmxor_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxor_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmxor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxor.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxor.nxv16i1( - , - , - i64); - -define @intrinsic_vmxor_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxor_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmxor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxor.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxor.nxv32i1( - , - , - i64); - -define @intrinsic_vmxor_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxor_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmxor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxor.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmxor.nxv64i1( - , - , - i64); - -define @intrinsic_vmxor_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmxor_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmxor.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmxor.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vmxor.ll index 15cb88f17599a..4a1900ed1538f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmxor.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vmxor.nxv1i1( , , - i32); + iXLen); -define @intrinsic_vmxor_mm_nxv1i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxor_mm_nxv1i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxor_mm_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vmxor.nxv1i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -24,9 +26,9 @@ entry: declare @llvm.riscv.vmxor.nxv2i1( , , - i32); + iXLen); -define @intrinsic_vmxor_mm_nxv2i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxor_mm_nxv2i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxor_mm_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -36,7 +38,7 @@ entry: %a = call @llvm.riscv.vmxor.nxv2i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -44,9 +46,9 @@ entry: declare @llvm.riscv.vmxor.nxv4i1( , , - i32); + iXLen); -define @intrinsic_vmxor_mm_nxv4i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxor_mm_nxv4i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxor_mm_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -56,7 +58,7 @@ entry: %a = call @llvm.riscv.vmxor.nxv4i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -64,9 +66,9 @@ entry: declare @llvm.riscv.vmxor.nxv8i1( , , - i32); + iXLen); -define @intrinsic_vmxor_mm_nxv8i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxor_mm_nxv8i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxor_mm_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -76,7 +78,7 @@ entry: %a = call @llvm.riscv.vmxor.nxv8i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -84,9 +86,9 @@ entry: declare @llvm.riscv.vmxor.nxv16i1( , , - i32); + iXLen); -define @intrinsic_vmxor_mm_nxv16i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxor_mm_nxv16i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxor_mm_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -96,7 +98,7 @@ entry: %a = call @llvm.riscv.vmxor.nxv16i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -104,9 +106,9 @@ entry: declare @llvm.riscv.vmxor.nxv32i1( , , - i32); + iXLen); -define @intrinsic_vmxor_mm_nxv32i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxor_mm_nxv32i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxor_mm_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -116,7 +118,7 @@ entry: %a = call @llvm.riscv.vmxor.nxv32i1( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -124,9 +126,9 @@ entry: declare @llvm.riscv.vmxor.nxv64i1( , , - i32); + iXLen); -define @intrinsic_vmxor_mm_nxv64i1( %0, %1, i32 %2) nounwind { +define @intrinsic_vmxor_mm_nxv64i1( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vmxor_mm_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu @@ -136,7 +138,7 @@ entry: %a = call @llvm.riscv.vmxor.nxv64i1( %0, %1, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll deleted file mode 100644 index 7fd84ccf35efa..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll +++ /dev/null @@ -1,137 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s - -declare void @llvm.riscv.vsm.nxv1i1(, *, i64); - -define void @intrinsic_vsm_v_nxv1i1( %0, * %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsm_v_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vsm.v v0, (a0) -; CHECK-NEXT: ret -entry: - call void @llvm.riscv.vsm.nxv1i1( %0, * %1, i64 %2) - ret void -} - -declare void @llvm.riscv.vsm.nxv2i1(, *, i64); - -define void @intrinsic_vsm_v_nxv2i1( %0, * %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsm_v_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vsm.v v0, (a0) -; CHECK-NEXT: ret -entry: - call void @llvm.riscv.vsm.nxv2i1( %0, * %1, i64 %2) - ret void -} - -declare void @llvm.riscv.vsm.nxv4i1(, *, i64); - -define void @intrinsic_vsm_v_nxv4i1( %0, * %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsm_v_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vsm.v v0, (a0) -; CHECK-NEXT: ret -entry: - call void @llvm.riscv.vsm.nxv4i1( %0, * %1, i64 %2) - ret void -} - -declare void @llvm.riscv.vsm.nxv8i1(, *, i64); - -define void @intrinsic_vsm_v_nxv8i1( %0, * %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsm_v_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vsm.v v0, (a0) -; CHECK-NEXT: ret -entry: - call void @llvm.riscv.vsm.nxv8i1( %0, * %1, i64 %2) - ret void -} - -declare void @llvm.riscv.vsm.nxv16i1(, *, i64); - -define void @intrinsic_vsm_v_nxv16i1( %0, * %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsm_v_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vsm.v v0, (a0) -; CHECK-NEXT: ret -entry: - call void @llvm.riscv.vsm.nxv16i1( %0, * %1, i64 %2) - ret void -} - -declare void @llvm.riscv.vsm.nxv32i1(, *, i64); - -define void @intrinsic_vsm_v_nxv32i1( %0, * %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsm_v_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vsm.v v0, (a0) -; CHECK-NEXT: ret -entry: - call void @llvm.riscv.vsm.nxv32i1( %0, * %1, i64 %2) - ret void -} - -declare void @llvm.riscv.vsm.nxv64i1(, *, i64); - -define void @intrinsic_vsm_v_nxv64i1( %0, * %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vsm_v_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vsm.v v0, (a0) -; CHECK-NEXT: ret -entry: - call void @llvm.riscv.vsm.nxv64i1( %0, * %1, i64 %2) - ret void -} - -declare @llvm.riscv.vmseq.nxv1i16( - , - , - i64); - -; Make sure we can use the vsetvli from the producing instruction. -define void @test_vsetvli_i16( %0, %1, * %2, i64 %3) nounwind { -; CHECK-LABEL: test_vsetvli_i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmseq.nxv1i16( - %0, - %1, - i64 %3) - call void @llvm.riscv.vsm.nxv1i1( %a, * %2, i64 %3) - ret void -} - -declare @llvm.riscv.vmseq.nxv1i32( - , - , - i64); - -define void @test_vsetvli_i32( %0, %1, * %2, i64 %3) nounwind { -; CHECK-LABEL: test_vsetvli_i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 -; CHECK-NEXT: vsm.v v8, (a0) -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmseq.nxv1i32( - %0, - %1, - i64 %3) - call void @llvm.riscv.vsm.nxv1i1( %a, * %2, i64 %3) - ret void -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsm.ll similarity index 79% rename from llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vsm.ll index 3285cdbfe2741..acbaa7a6d84b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsm.ll @@ -1,105 +1,107 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -verify-machineinstrs | FileCheck %s -declare void @llvm.riscv.vsm.nxv1i1(, *, i32); +declare void @llvm.riscv.vsm.nxv1i1(, *, iXLen); -define void @intrinsic_vsm_v_nxv1i1( %0, * %1, i32 %2) nounwind { +define void @intrinsic_vsm_v_nxv1i1( %0, * %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsm_v_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret entry: - call void @llvm.riscv.vsm.nxv1i1( %0, * %1, i32 %2) + call void @llvm.riscv.vsm.nxv1i1( %0, * %1, iXLen %2) ret void } -declare void @llvm.riscv.vsm.nxv2i1(, *, i32); +declare void @llvm.riscv.vsm.nxv2i1(, *, iXLen); -define void @intrinsic_vsm_v_nxv2i1( %0, * %1, i32 %2) nounwind { +define void @intrinsic_vsm_v_nxv2i1( %0, * %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsm_v_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret entry: - call void @llvm.riscv.vsm.nxv2i1( %0, * %1, i32 %2) + call void @llvm.riscv.vsm.nxv2i1( %0, * %1, iXLen %2) ret void } -declare void @llvm.riscv.vsm.nxv4i1(, *, i32); +declare void @llvm.riscv.vsm.nxv4i1(, *, iXLen); -define void @intrinsic_vsm_v_nxv4i1( %0, * %1, i32 %2) nounwind { +define void @intrinsic_vsm_v_nxv4i1( %0, * %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsm_v_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret entry: - call void @llvm.riscv.vsm.nxv4i1( %0, * %1, i32 %2) + call void @llvm.riscv.vsm.nxv4i1( %0, * %1, iXLen %2) ret void } -declare void @llvm.riscv.vsm.nxv8i1(, *, i32); +declare void @llvm.riscv.vsm.nxv8i1(, *, iXLen); -define void @intrinsic_vsm_v_nxv8i1( %0, * %1, i32 %2) nounwind { +define void @intrinsic_vsm_v_nxv8i1( %0, * %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsm_v_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret entry: - call void @llvm.riscv.vsm.nxv8i1( %0, * %1, i32 %2) + call void @llvm.riscv.vsm.nxv8i1( %0, * %1, iXLen %2) ret void } -declare void @llvm.riscv.vsm.nxv16i1(, *, i32); +declare void @llvm.riscv.vsm.nxv16i1(, *, iXLen); -define void @intrinsic_vsm_v_nxv16i1( %0, * %1, i32 %2) nounwind { +define void @intrinsic_vsm_v_nxv16i1( %0, * %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsm_v_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret entry: - call void @llvm.riscv.vsm.nxv16i1( %0, * %1, i32 %2) + call void @llvm.riscv.vsm.nxv16i1( %0, * %1, iXLen %2) ret void } -declare void @llvm.riscv.vsm.nxv32i1(, *, i32); +declare void @llvm.riscv.vsm.nxv32i1(, *, iXLen); -define void @intrinsic_vsm_v_nxv32i1( %0, * %1, i32 %2) nounwind { +define void @intrinsic_vsm_v_nxv32i1( %0, * %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsm_v_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret entry: - call void @llvm.riscv.vsm.nxv32i1( %0, * %1, i32 %2) + call void @llvm.riscv.vsm.nxv32i1( %0, * %1, iXLen %2) ret void } -declare void @llvm.riscv.vsm.nxv64i1(, *, i32); +declare void @llvm.riscv.vsm.nxv64i1(, *, iXLen); -define void @intrinsic_vsm_v_nxv64i1( %0, * %1, i32 %2) nounwind { +define void @intrinsic_vsm_v_nxv64i1( %0, * %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vsm_v_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret entry: - call void @llvm.riscv.vsm.nxv64i1( %0, * %1, i32 %2) + call void @llvm.riscv.vsm.nxv64i1( %0, * %1, iXLen %2) ret void } declare @llvm.riscv.vmseq.nxv1i16( , , - i32); + iXLen); ; Make sure we can use the vsetvli from the producing instruction. -define void @test_vsetvli_i16( %0, %1, * %2, i32 %3) nounwind { +define void @test_vsetvli_i16( %0, %1, * %2, iXLen %3) nounwind { ; CHECK-LABEL: test_vsetvli_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu @@ -110,17 +112,17 @@ entry: %a = call @llvm.riscv.vmseq.nxv1i16( %0, %1, - i32 %3) - call void @llvm.riscv.vsm.nxv1i1( %a, * %2, i32 %3) + iXLen %3) + call void @llvm.riscv.vsm.nxv1i1( %a, * %2, iXLen %3) ret void } declare @llvm.riscv.vmseq.nxv1i32( , , - i32); + iXLen); -define void @test_vsetvli_i32( %0, %1, * %2, i32 %3) nounwind { +define void @test_vsetvli_i32( %0, %1, * %2, iXLen %3) nounwind { ; CHECK-LABEL: test_vsetvli_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu @@ -131,7 +133,7 @@ entry: %a = call @llvm.riscv.vmseq.nxv1i32( %0, %1, - i32 %3) - call void @llvm.riscv.vsm.nxv1i1( %a, * %2, i32 %3) + iXLen %3) + call void @llvm.riscv.vsm.nxv1i1( %a, * %2, iXLen %3) ret void } From be6070c290e23d659c6374284a632442e2360967 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 22 Jan 2022 13:41:30 -0800 Subject: [PATCH 270/946] [RISCV] Use FP ABI for some RVV intrinsic tests. NFC Removes moves from GPR to FPR and improves f64 tests on RV32. Differential Revision: https://reviews.llvm.org/D117969 --- llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll | 93 +++------ llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll | 77 +++----- llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll | 146 +++++--------- llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll | 122 +++++------- llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll | 152 ++++++--------- .../CodeGen/RISCV/rvv/vfslide1down-rv32.ll | 184 ++++++------------ .../CodeGen/RISCV/rvv/vfslide1down-rv64.ll | 152 ++++++--------- .../test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll | 184 ++++++------------ .../test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll | 184 ++++++------------ llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll | 152 ++++++--------- llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll | 137 ++++++------- llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll | 137 ++++++------- llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll | 92 ++++----- llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll | 137 ++++++------- llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll | 137 ++++++------- 62 files changed, 3170 insertions(+), 5348 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll index cfdeba067718a..5df1881bffb23 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfadd.nxv1f16.f16( define @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfadd.mask.nxv1f16.f16( define @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfadd.nxv2f16.f16( define @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfadd.mask.nxv2f16.f16( define @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfadd.nxv4f16.f16( define @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfadd.mask.nxv4f16.f16( define @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfadd.nxv8f16.f16( define @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfadd.mask.nxv8f16.f16( define @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfadd.nxv16f16.f16( define @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfadd.mask.nxv16f16.f16( define @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfadd.nxv32f16.f16( define @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfadd.mask.nxv32f16.f16( define @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfadd.nxv1f32.f32( define @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfadd.mask.nxv1f32.f32( define @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfadd.nxv2f32.f32( define @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfadd.mask.nxv2f32.f32( define @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfadd.nxv4f32.f32( define @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfadd.mask.nxv4f32.f32( define @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfadd.nxv8f32.f32( define @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfadd.mask.nxv8f32.f32( define @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfadd.nxv16f32.f32( define @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfadd.mask.nxv16f32.f32( define @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfadd.nxv1f64.f64( define @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfadd.mask.nxv1f64.f64( define @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfadd.nxv2f64.f64( define @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfadd.mask.nxv2f64.f64( define @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfadd.nxv4f64.f64( define @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfadd.mask.nxv4f64.f64( define @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfadd.nxv8f64.f64( define @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfadd.mask.nxv8f64.f64( define @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll index 8e43295bf7ee3..b91d86befbc87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( , , @@ -688,9 +688,8 @@ declare @llvm.riscv.vfadd.nxv1f16.f16( define @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv1f16.f16( @@ -712,9 +711,8 @@ declare @llvm.riscv.vfadd.mask.nxv1f16.f16( define @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv1f16.f16( @@ -735,9 +733,8 @@ declare @llvm.riscv.vfadd.nxv2f16.f16( define @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv2f16.f16( @@ -759,9 +756,8 @@ declare @llvm.riscv.vfadd.mask.nxv2f16.f16( define @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv2f16.f16( @@ -782,9 +778,8 @@ declare @llvm.riscv.vfadd.nxv4f16.f16( define @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv4f16.f16( @@ -806,9 +801,8 @@ declare @llvm.riscv.vfadd.mask.nxv4f16.f16( define @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv4f16.f16( @@ -829,9 +823,8 @@ declare @llvm.riscv.vfadd.nxv8f16.f16( define @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv8f16.f16( @@ -853,9 +846,8 @@ declare @llvm.riscv.vfadd.mask.nxv8f16.f16( define @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv8f16.f16( @@ -876,9 +868,8 @@ declare @llvm.riscv.vfadd.nxv16f16.f16( define @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv16f16.f16( @@ -900,9 +891,8 @@ declare @llvm.riscv.vfadd.mask.nxv16f16.f16( define @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv16f16.f16( @@ -923,9 +913,8 @@ declare @llvm.riscv.vfadd.nxv32f16.f16( define @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv32f16.f16( @@ -947,9 +936,8 @@ declare @llvm.riscv.vfadd.mask.nxv32f16.f16( define @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv32f16.f16( @@ -970,9 +958,8 @@ declare @llvm.riscv.vfadd.nxv1f32.f32( define @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv1f32.f32( @@ -994,9 +981,8 @@ declare @llvm.riscv.vfadd.mask.nxv1f32.f32( define @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv1f32.f32( @@ -1017,9 +1003,8 @@ declare @llvm.riscv.vfadd.nxv2f32.f32( define @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv2f32.f32( @@ -1041,9 +1026,8 @@ declare @llvm.riscv.vfadd.mask.nxv2f32.f32( define @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv2f32.f32( @@ -1064,9 +1048,8 @@ declare @llvm.riscv.vfadd.nxv4f32.f32( define @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv4f32.f32( @@ -1088,9 +1071,8 @@ declare @llvm.riscv.vfadd.mask.nxv4f32.f32( define @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv4f32.f32( @@ -1111,9 +1093,8 @@ declare @llvm.riscv.vfadd.nxv8f32.f32( define @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv8f32.f32( @@ -1135,9 +1116,8 @@ declare @llvm.riscv.vfadd.mask.nxv8f32.f32( define @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv8f32.f32( @@ -1158,9 +1138,8 @@ declare @llvm.riscv.vfadd.nxv16f32.f32( define @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv16f32.f32( @@ -1182,9 +1161,8 @@ declare @llvm.riscv.vfadd.mask.nxv16f32.f32( define @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv16f32.f32( @@ -1205,9 +1183,8 @@ declare @llvm.riscv.vfadd.nxv1f64.f64( define @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv1f64.f64( @@ -1229,9 +1206,8 @@ declare @llvm.riscv.vfadd.mask.nxv1f64.f64( define @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv1f64.f64( @@ -1252,9 +1228,8 @@ declare @llvm.riscv.vfadd.nxv2f64.f64( define @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv2f64.f64( @@ -1276,9 +1251,8 @@ declare @llvm.riscv.vfadd.mask.nxv2f64.f64( define @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv2f64.f64( @@ -1299,9 +1273,8 @@ declare @llvm.riscv.vfadd.nxv4f64.f64( define @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv4f64.f64( @@ -1323,9 +1296,8 @@ declare @llvm.riscv.vfadd.mask.nxv4f64.f64( define @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv4f64.f64( @@ -1346,9 +1318,8 @@ declare @llvm.riscv.vfadd.nxv8f64.f64( define @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv8f64.f64( @@ -1370,9 +1341,8 @@ declare @llvm.riscv.vfadd.mask.nxv8f64.f64( define @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll index 2f2bc40a0e6fc..01bfb50ed9b49 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfdiv.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfdiv.nxv1f16.f16( define @intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfdiv.mask.nxv1f16.f16( define @intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfdiv.nxv2f16.f16( define @intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfdiv.mask.nxv2f16.f16( define @intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfdiv.nxv4f16.f16( define @intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfdiv.mask.nxv4f16.f16( define @intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfdiv.nxv8f16.f16( define @intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfdiv.mask.nxv8f16.f16( define @intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfdiv.nxv16f16.f16( define @intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfdiv.mask.nxv16f16.f16( define @intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfdiv.nxv32f16.f16( define @intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfdiv.mask.nxv32f16.f16( define @intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfdiv.nxv1f32.f32( define @intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfdiv.mask.nxv1f32.f32( define @intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfdiv.nxv2f32.f32( define @intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfdiv.mask.nxv2f32.f32( define @intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfdiv.nxv4f32.f32( define @intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfdiv.mask.nxv4f32.f32( define @intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfdiv.nxv8f32.f32( define @intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfdiv.mask.nxv8f32.f32( define @intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfdiv.nxv16f32.f32( define @intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfdiv.mask.nxv16f32.f32( define @intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfdiv.nxv1f64.f64( define @intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfdiv.mask.nxv1f64.f64( define @intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfdiv.nxv2f64.f64( define @intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfdiv.mask.nxv2f64.f64( define @intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfdiv.nxv4f64.f64( define @intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfdiv.mask.nxv4f64.f64( define @intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfdiv.nxv8f64.f64( define @intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfdiv.mask.nxv8f64.f64( define @intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll index dc6760900bafb..2d4a16e1bf4e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfdiv.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfdiv.nxv1f16.f16( define @intrinsic_vfdiv_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfdiv.mask.nxv1f16.f16( define @intrinsic_vfdiv_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfdiv.nxv2f16.f16( define @intrinsic_vfdiv_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfdiv.mask.nxv2f16.f16( define @intrinsic_vfdiv_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfdiv.nxv4f16.f16( define @intrinsic_vfdiv_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfdiv.mask.nxv4f16.f16( define @intrinsic_vfdiv_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfdiv.nxv8f16.f16( define @intrinsic_vfdiv_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfdiv.mask.nxv8f16.f16( define @intrinsic_vfdiv_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfdiv.nxv16f16.f16( define @intrinsic_vfdiv_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfdiv.mask.nxv16f16.f16( define @intrinsic_vfdiv_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfdiv.nxv32f16.f16( define @intrinsic_vfdiv_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfdiv.mask.nxv32f16.f16( define @intrinsic_vfdiv_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfdiv.nxv1f32.f32( define @intrinsic_vfdiv_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfdiv.mask.nxv1f32.f32( define @intrinsic_vfdiv_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfdiv.nxv2f32.f32( define @intrinsic_vfdiv_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfdiv.mask.nxv2f32.f32( define @intrinsic_vfdiv_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfdiv.nxv4f32.f32( define @intrinsic_vfdiv_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfdiv.mask.nxv4f32.f32( define @intrinsic_vfdiv_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfdiv.nxv8f32.f32( define @intrinsic_vfdiv_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfdiv.mask.nxv8f32.f32( define @intrinsic_vfdiv_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfdiv.nxv16f32.f32( define @intrinsic_vfdiv_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfdiv.mask.nxv16f32.f32( define @intrinsic_vfdiv_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv16f32.f32( @@ -1204,9 +1182,8 @@ declare @llvm.riscv.vfdiv.nxv1f64.f64( define @intrinsic_vfdiv_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv1f64.f64( @@ -1228,9 +1205,8 @@ declare @llvm.riscv.vfdiv.mask.nxv1f64.f64( define @intrinsic_vfdiv_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv1f64.f64( @@ -1251,9 +1227,8 @@ declare @llvm.riscv.vfdiv.nxv2f64.f64( define @intrinsic_vfdiv_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv2f64.f64( @@ -1275,9 +1250,8 @@ declare @llvm.riscv.vfdiv.mask.nxv2f64.f64( define @intrinsic_vfdiv_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv2f64.f64( @@ -1298,9 +1272,8 @@ declare @llvm.riscv.vfdiv.nxv4f64.f64( define @intrinsic_vfdiv_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv4f64.f64( @@ -1322,9 +1295,8 @@ declare @llvm.riscv.vfdiv.mask.nxv4f64.f64( define @intrinsic_vfdiv_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv4f64.f64( @@ -1345,9 +1317,8 @@ declare @llvm.riscv.vfdiv.nxv8f64.f64( define @intrinsic_vfdiv_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.nxv8f64.f64( @@ -1369,9 +1340,8 @@ declare @llvm.riscv.vfdiv.mask.nxv8f64.f64( define @intrinsic_vfdiv_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll index 207c7ecf5a3db..16d305bad846c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmacc.nxv1f16.f16( define @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmacc.mask.nxv1f16.f16( define @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmacc.nxv2f16.f16( define @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmacc.mask.nxv2f16.f16( define @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmacc.nxv4f16.f16( define @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmacc.mask.nxv4f16.f16( define @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmacc.nxv8f16.f16( define @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmacc.mask.nxv8f16.f16( define @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmacc.nxv16f16.f16( define @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmacc.mask.nxv16f16.f16( define @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmacc.nxv1f32.f32( define @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmacc.mask.nxv1f32.f32( define @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmacc.nxv2f32.f32( define @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmacc.mask.nxv2f32.f32( define @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmacc.nxv4f32.f32( define @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmacc.mask.nxv4f32.f32( define @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmacc.nxv8f32.f32( define @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmacc.mask.nxv8f32.f32( define @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfmacc.nxv1f64.f64( define @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfmacc.mask.nxv1f64.f64( define @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfmacc.nxv2f64.f64( define @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfmacc.mask.nxv2f64.f64( define @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfmacc.nxv4f64.f64( define @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfmacc.mask.nxv4f64.f64( define @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll index f636fa36eb7c0..c5809888ff17b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmacc.nxv1f16.f16( define @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmacc.mask.nxv1f16.f16( define @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmacc.nxv2f16.f16( define @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmacc.mask.nxv2f16.f16( define @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmacc.nxv4f16.f16( define @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmacc.mask.nxv4f16.f16( define @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmacc.nxv8f16.f16( define @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmacc.mask.nxv8f16.f16( define @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmacc.nxv16f16.f16( define @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmacc.mask.nxv16f16.f16( define @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmacc.nxv1f32.f32( define @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmacc.mask.nxv1f32.f32( define @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmacc.nxv2f32.f32( define @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmacc.mask.nxv2f32.f32( define @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmacc.nxv4f32.f32( define @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmacc.mask.nxv4f32.f32( define @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmacc.nxv8f32.f32( define @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmacc.mask.nxv8f32.f32( define @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfmacc.nxv1f64.f64( define @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfmacc.mask.nxv1f64.f64( define @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfmacc.nxv2f64.f64( define @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfmacc.mask.nxv2f64.f64( define @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfmacc.nxv4f64.f64( define @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfmacc.mask.nxv4f64.f64( define @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmacc.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll index dd12115fbb95b..cfb32cfab4cdc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmadd.nxv1f16.f16( define @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmadd.mask.nxv1f16.f16( define @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmadd.nxv2f16.f16( define @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmadd.mask.nxv2f16.f16( define @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmadd.nxv4f16.f16( define @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmadd.mask.nxv4f16.f16( define @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmadd.nxv8f16.f16( define @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmadd.mask.nxv8f16.f16( define @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmadd.nxv16f16.f16( define @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmadd.mask.nxv16f16.f16( define @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmadd.nxv1f32.f32( define @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmadd.mask.nxv1f32.f32( define @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmadd.nxv2f32.f32( define @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmadd.mask.nxv2f32.f32( define @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmadd.nxv4f32.f32( define @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmadd.mask.nxv4f32.f32( define @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmadd.nxv8f32.f32( define @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmadd.mask.nxv8f32.f32( define @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfmadd.nxv1f64.f64( define @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfmadd.mask.nxv1f64.f64( define @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfmadd.nxv2f64.f64( define @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfmadd.mask.nxv2f64.f64( define @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfmadd.nxv4f64.f64( define @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfmadd.mask.nxv4f64.f64( define @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll index ae39ef8dd3260..afd41bd8a2122 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmadd.nxv1f16.f16( define @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmadd.mask.nxv1f16.f16( define @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmadd.nxv2f16.f16( define @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmadd.mask.nxv2f16.f16( define @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmadd.nxv4f16.f16( define @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmadd.mask.nxv4f16.f16( define @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmadd.nxv8f16.f16( define @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmadd.mask.nxv8f16.f16( define @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmadd.nxv16f16.f16( define @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmadd.mask.nxv16f16.f16( define @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmadd.nxv1f32.f32( define @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmadd.mask.nxv1f32.f32( define @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmadd.nxv2f32.f32( define @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmadd.mask.nxv2f32.f32( define @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmadd.nxv4f32.f32( define @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmadd.mask.nxv4f32.f32( define @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmadd.nxv8f32.f32( define @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmadd.mask.nxv8f32.f32( define @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfmadd.nxv1f64.f64( define @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfmadd.mask.nxv1f64.f64( define @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfmadd.nxv2f64.f64( define @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfmadd.mask.nxv2f64.f64( define @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfmadd.nxv4f64.f64( define @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfmadd.mask.nxv4f64.f64( define @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmadd.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll index d6a880c743dd2..98b4cf71da14d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfmax.nxv1f16.f16( define @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfmax.mask.nxv1f16.f16( define @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfmax.nxv2f16.f16( define @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfmax.mask.nxv2f16.f16( define @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfmax.nxv4f16.f16( define @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfmax.mask.nxv4f16.f16( define @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfmax.nxv8f16.f16( define @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfmax.mask.nxv8f16.f16( define @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfmax.nxv16f16.f16( define @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfmax.mask.nxv16f16.f16( define @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfmax.nxv32f16.f16( define @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfmax.mask.nxv32f16.f16( define @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfmax.nxv1f32.f32( define @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfmax.mask.nxv1f32.f32( define @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfmax.nxv2f32.f32( define @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfmax.mask.nxv2f32.f32( define @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfmax.nxv4f32.f32( define @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfmax.mask.nxv4f32.f32( define @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfmax.nxv8f32.f32( define @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfmax.mask.nxv8f32.f32( define @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfmax.nxv16f32.f32( define @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfmax.mask.nxv16f32.f32( define @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfmax.nxv1f64.f64( define @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfmax.mask.nxv1f64.f64( define @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfmax.nxv2f64.f64( define @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfmax.mask.nxv2f64.f64( define @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfmax.nxv4f64.f64( define @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfmax.mask.nxv4f64.f64( define @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfmax.nxv8f64.f64( define @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfmax.mask.nxv8f64.f64( define @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll index aea557735e932..4fc7319fb2b29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfmax.nxv1f16.f16( define @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfmax.mask.nxv1f16.f16( define @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfmax.nxv2f16.f16( define @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfmax.mask.nxv2f16.f16( define @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfmax.nxv4f16.f16( define @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfmax.mask.nxv4f16.f16( define @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfmax.nxv8f16.f16( define @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfmax.mask.nxv8f16.f16( define @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfmax.nxv16f16.f16( define @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfmax.mask.nxv16f16.f16( define @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfmax.nxv32f16.f16( define @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfmax.mask.nxv32f16.f16( define @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfmax.nxv1f32.f32( define @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfmax.mask.nxv1f32.f32( define @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfmax.nxv2f32.f32( define @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfmax.mask.nxv2f32.f32( define @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfmax.nxv4f32.f32( define @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfmax.mask.nxv4f32.f32( define @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfmax.nxv8f32.f32( define @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfmax.mask.nxv8f32.f32( define @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfmax.nxv16f32.f32( define @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfmax.mask.nxv16f32.f32( define @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv16f32.f32( @@ -1204,9 +1182,8 @@ declare @llvm.riscv.vfmax.nxv1f64.f64( define @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv1f64.f64( @@ -1228,9 +1205,8 @@ declare @llvm.riscv.vfmax.mask.nxv1f64.f64( define @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv1f64.f64( @@ -1251,9 +1227,8 @@ declare @llvm.riscv.vfmax.nxv2f64.f64( define @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv2f64.f64( @@ -1275,9 +1250,8 @@ declare @llvm.riscv.vfmax.mask.nxv2f64.f64( define @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv2f64.f64( @@ -1298,9 +1272,8 @@ declare @llvm.riscv.vfmax.nxv4f64.f64( define @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv4f64.f64( @@ -1322,9 +1295,8 @@ declare @llvm.riscv.vfmax.mask.nxv4f64.f64( define @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv4f64.f64( @@ -1345,9 +1317,8 @@ declare @llvm.riscv.vfmax.nxv8f64.f64( define @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.nxv8f64.f64( @@ -1369,9 +1340,8 @@ declare @llvm.riscv.vfmax.mask.nxv8f64.f64( define @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll index c1d91e208aaa1..3dc1240d0f7b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( , , @@ -32,9 +32,8 @@ declare @llvm.riscv.vfmerge.nxv1f16.f16( define @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv1f16.f16( @@ -77,9 +76,8 @@ declare @llvm.riscv.vfmerge.nxv2f16.f16( define @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv2f16.f16( @@ -122,9 +120,8 @@ declare @llvm.riscv.vfmerge.nxv4f16.f16( define @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv4f16.f16( @@ -167,9 +164,8 @@ declare @llvm.riscv.vfmerge.nxv8f16.f16( define @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv8f16.f16( @@ -212,9 +208,8 @@ declare @llvm.riscv.vfmerge.nxv16f16.f16( define @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv16f16.f16( @@ -257,9 +252,8 @@ declare @llvm.riscv.vfmerge.nxv32f16.f16( define @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv32f16.f16( @@ -302,9 +296,8 @@ declare @llvm.riscv.vfmerge.nxv1f32.f32( define @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv1f32.f32( @@ -347,9 +340,8 @@ declare @llvm.riscv.vfmerge.nxv2f32.f32( define @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv2f32.f32( @@ -392,9 +384,8 @@ declare @llvm.riscv.vfmerge.nxv4f32.f32( define @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv4f32.f32( @@ -437,9 +428,8 @@ declare @llvm.riscv.vfmerge.nxv8f32.f32( define @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv8f32.f32( @@ -482,9 +472,8 @@ declare @llvm.riscv.vfmerge.nxv16f32.f32( define @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv16f32.f32( @@ -527,13 +516,8 @@ declare @llvm.riscv.vfmerge.nxv1f64.f64( define @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv1f64.f64( @@ -576,13 +560,8 @@ declare @llvm.riscv.vfmerge.nxv2f64.f64( define @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv2f64.f64( @@ -625,13 +604,8 @@ declare @llvm.riscv.vfmerge.nxv4f64.f64( define @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv4f64.f64( @@ -674,13 +648,8 @@ declare @llvm.riscv.vfmerge.nxv8f64.f64( define @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll index cf4ad020e4cf1..b23d908c7edda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( , , @@ -32,9 +32,8 @@ declare @llvm.riscv.vfmerge.nxv1f16.f16( define @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv1f16.f16( @@ -77,9 +76,8 @@ declare @llvm.riscv.vfmerge.nxv2f16.f16( define @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv2f16.f16( @@ -122,9 +120,8 @@ declare @llvm.riscv.vfmerge.nxv4f16.f16( define @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv4f16.f16( @@ -167,9 +164,8 @@ declare @llvm.riscv.vfmerge.nxv8f16.f16( define @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv8f16.f16( @@ -212,9 +208,8 @@ declare @llvm.riscv.vfmerge.nxv16f16.f16( define @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv16f16.f16( @@ -257,9 +252,8 @@ declare @llvm.riscv.vfmerge.nxv32f16.f16( define @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv32f16.f16( @@ -302,9 +296,8 @@ declare @llvm.riscv.vfmerge.nxv1f32.f32( define @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv1f32.f32( @@ -347,9 +340,8 @@ declare @llvm.riscv.vfmerge.nxv2f32.f32( define @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv2f32.f32( @@ -392,9 +384,8 @@ declare @llvm.riscv.vfmerge.nxv4f32.f32( define @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv4f32.f32( @@ -437,9 +428,8 @@ declare @llvm.riscv.vfmerge.nxv8f32.f32( define @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv8f32.f32( @@ -482,9 +472,8 @@ declare @llvm.riscv.vfmerge.nxv16f32.f32( define @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv16f32.f32( @@ -527,9 +516,8 @@ declare @llvm.riscv.vfmerge.nxv1f64.f64( define @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv1f64.f64( @@ -572,9 +560,8 @@ declare @llvm.riscv.vfmerge.nxv2f64.f64( define @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv2f64.f64( @@ -617,9 +604,8 @@ declare @llvm.riscv.vfmerge.nxv4f64.f64( define @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv4f64.f64( @@ -662,9 +648,8 @@ declare @llvm.riscv.vfmerge.nxv8f64.f64( define @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, ft0, v0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmerge.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll index 2b2d954a7cc1d..0861a787440e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfmin.nxv1f16.f16( define @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfmin.mask.nxv1f16.f16( define @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfmin.nxv2f16.f16( define @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfmin.mask.nxv2f16.f16( define @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfmin.nxv4f16.f16( define @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfmin.mask.nxv4f16.f16( define @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfmin.nxv8f16.f16( define @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfmin.mask.nxv8f16.f16( define @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfmin.nxv16f16.f16( define @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfmin.mask.nxv16f16.f16( define @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfmin.nxv32f16.f16( define @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfmin.mask.nxv32f16.f16( define @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfmin.nxv1f32.f32( define @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfmin.mask.nxv1f32.f32( define @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfmin.nxv2f32.f32( define @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfmin.mask.nxv2f32.f32( define @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfmin.nxv4f32.f32( define @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfmin.mask.nxv4f32.f32( define @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfmin.nxv8f32.f32( define @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfmin.mask.nxv8f32.f32( define @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfmin.nxv16f32.f32( define @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfmin.mask.nxv16f32.f32( define @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfmin.nxv1f64.f64( define @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfmin.mask.nxv1f64.f64( define @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfmin.nxv2f64.f64( define @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfmin.mask.nxv2f64.f64( define @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfmin.nxv4f64.f64( define @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfmin.mask.nxv4f64.f64( define @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfmin.nxv8f64.f64( define @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfmin.mask.nxv8f64.f64( define @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll index 3f1ffd3e81a64..e647fe51ffb17 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfmin.nxv1f16.f16( define @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfmin.mask.nxv1f16.f16( define @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfmin.nxv2f16.f16( define @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfmin.mask.nxv2f16.f16( define @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfmin.nxv4f16.f16( define @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfmin.mask.nxv4f16.f16( define @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfmin.nxv8f16.f16( define @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfmin.mask.nxv8f16.f16( define @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfmin.nxv16f16.f16( define @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfmin.mask.nxv16f16.f16( define @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfmin.nxv32f16.f16( define @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfmin.mask.nxv32f16.f16( define @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfmin.nxv1f32.f32( define @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfmin.mask.nxv1f32.f32( define @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfmin.nxv2f32.f32( define @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfmin.mask.nxv2f32.f32( define @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfmin.nxv4f32.f32( define @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfmin.mask.nxv4f32.f32( define @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfmin.nxv8f32.f32( define @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfmin.mask.nxv8f32.f32( define @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfmin.nxv16f32.f32( define @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfmin.mask.nxv16f32.f32( define @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv16f32.f32( @@ -1204,9 +1182,8 @@ declare @llvm.riscv.vfmin.nxv1f64.f64( define @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv1f64.f64( @@ -1228,9 +1205,8 @@ declare @llvm.riscv.vfmin.mask.nxv1f64.f64( define @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv1f64.f64( @@ -1251,9 +1227,8 @@ declare @llvm.riscv.vfmin.nxv2f64.f64( define @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv2f64.f64( @@ -1275,9 +1250,8 @@ declare @llvm.riscv.vfmin.mask.nxv2f64.f64( define @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv2f64.f64( @@ -1298,9 +1272,8 @@ declare @llvm.riscv.vfmin.nxv4f64.f64( define @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv4f64.f64( @@ -1322,9 +1295,8 @@ declare @llvm.riscv.vfmin.mask.nxv4f64.f64( define @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv4f64.f64( @@ -1345,9 +1317,8 @@ declare @llvm.riscv.vfmin.nxv8f64.f64( define @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.nxv8f64.f64( @@ -1369,9 +1340,8 @@ declare @llvm.riscv.vfmin.mask.nxv8f64.f64( define @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll index ff51e6dab20a9..c8407dfe64730 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmsac.nxv1f16.f16( define @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmsac.mask.nxv1f16.f16( define @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmsac.nxv2f16.f16( define @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmsac.mask.nxv2f16.f16( define @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmsac.nxv4f16.f16( define @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmsac.mask.nxv4f16.f16( define @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmsac.nxv8f16.f16( define @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmsac.mask.nxv8f16.f16( define @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmsac.nxv16f16.f16( define @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmsac.mask.nxv16f16.f16( define @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmsac.nxv1f32.f32( define @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmsac.mask.nxv1f32.f32( define @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmsac.nxv2f32.f32( define @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmsac.mask.nxv2f32.f32( define @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmsac.nxv4f32.f32( define @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmsac.mask.nxv4f32.f32( define @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmsac.nxv8f32.f32( define @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmsac.mask.nxv8f32.f32( define @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfmsac.nxv1f64.f64( define @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfmsac.mask.nxv1f64.f64( define @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfmsac.nxv2f64.f64( define @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfmsac.mask.nxv2f64.f64( define @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfmsac.nxv4f64.f64( define @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfmsac.mask.nxv4f64.f64( define @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll index 08666a5dd51c8..2a5fb2896aa56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmsac.nxv1f16.f16( define @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmsac.mask.nxv1f16.f16( define @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmsac.nxv2f16.f16( define @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmsac.mask.nxv2f16.f16( define @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmsac.nxv4f16.f16( define @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmsac.mask.nxv4f16.f16( define @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmsac.nxv8f16.f16( define @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmsac.mask.nxv8f16.f16( define @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmsac.nxv16f16.f16( define @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmsac.mask.nxv16f16.f16( define @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmsac.nxv1f32.f32( define @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmsac.mask.nxv1f32.f32( define @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmsac.nxv2f32.f32( define @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmsac.mask.nxv2f32.f32( define @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmsac.nxv4f32.f32( define @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmsac.mask.nxv4f32.f32( define @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmsac.nxv8f32.f32( define @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmsac.mask.nxv8f32.f32( define @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfmsac.nxv1f64.f64( define @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfmsac.mask.nxv1f64.f64( define @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfmsac.nxv2f64.f64( define @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfmsac.mask.nxv2f64.f64( define @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfmsac.nxv4f64.f64( define @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfmsac.mask.nxv4f64.f64( define @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsac.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll index 8a10800c748ad..620c3dcb1025a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmsub.nxv1f16.f16( define @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmsub.mask.nxv1f16.f16( define @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmsub.nxv2f16.f16( define @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmsub.mask.nxv2f16.f16( define @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmsub.nxv4f16.f16( define @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmsub.mask.nxv4f16.f16( define @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmsub.nxv8f16.f16( define @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmsub.mask.nxv8f16.f16( define @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmsub.nxv16f16.f16( define @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmsub.mask.nxv16f16.f16( define @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmsub.nxv1f32.f32( define @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmsub.mask.nxv1f32.f32( define @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmsub.nxv2f32.f32( define @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmsub.mask.nxv2f32.f32( define @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmsub.nxv4f32.f32( define @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmsub.mask.nxv4f32.f32( define @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmsub.nxv8f32.f32( define @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmsub.mask.nxv8f32.f32( define @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfmsub.nxv1f64.f64( define @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfmsub.mask.nxv1f64.f64( define @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfmsub.nxv2f64.f64( define @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfmsub.mask.nxv2f64.f64( define @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfmsub.nxv4f64.f64( define @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfmsub.mask.nxv4f64.f64( define @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll index eb0733b6e5cc0..70efc0da21f5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfmsub.nxv1f16.f16( define @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfmsub.mask.nxv1f16.f16( define @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfmsub.nxv2f16.f16( define @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfmsub.mask.nxv2f16.f16( define @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfmsub.nxv4f16.f16( define @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfmsub.mask.nxv4f16.f16( define @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfmsub.nxv8f16.f16( define @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfmsub.mask.nxv8f16.f16( define @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfmsub.nxv16f16.f16( define @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfmsub.mask.nxv16f16.f16( define @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfmsub.nxv1f32.f32( define @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfmsub.mask.nxv1f32.f32( define @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfmsub.nxv2f32.f32( define @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfmsub.mask.nxv2f32.f32( define @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfmsub.nxv4f32.f32( define @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfmsub.mask.nxv4f32.f32( define @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfmsub.nxv8f32.f32( define @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfmsub.mask.nxv8f32.f32( define @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfmsub.nxv1f64.f64( define @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfmsub.mask.nxv1f64.f64( define @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfmsub.nxv2f64.f64( define @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfmsub.mask.nxv2f64.f64( define @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfmsub.nxv4f64.f64( define @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfmsub.mask.nxv4f64.f64( define @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmsub.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll index 9cab6de9e0b46..50ebccd92e64c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfmul.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfmul.nxv1f16.f16( define @intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfmul.mask.nxv1f16.f16( define @intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfmul.nxv2f16.f16( define @intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfmul.mask.nxv2f16.f16( define @intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfmul.nxv4f16.f16( define @intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfmul.mask.nxv4f16.f16( define @intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfmul.nxv8f16.f16( define @intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfmul.mask.nxv8f16.f16( define @intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfmul.nxv16f16.f16( define @intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfmul.mask.nxv16f16.f16( define @intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfmul.nxv32f16.f16( define @intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfmul.mask.nxv32f16.f16( define @intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfmul.nxv1f32.f32( define @intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfmul.mask.nxv1f32.f32( define @intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfmul.nxv2f32.f32( define @intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfmul.mask.nxv2f32.f32( define @intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfmul.nxv4f32.f32( define @intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfmul.mask.nxv4f32.f32( define @intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfmul.nxv8f32.f32( define @intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfmul.mask.nxv8f32.f32( define @intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfmul.nxv16f32.f32( define @intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfmul.mask.nxv16f32.f32( define @intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfmul.nxv1f64.f64( define @intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfmul.mask.nxv1f64.f64( define @intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfmul.nxv2f64.f64( define @intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfmul.mask.nxv2f64.f64( define @intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfmul.nxv4f64.f64( define @intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfmul.mask.nxv4f64.f64( define @intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfmul.nxv8f64.f64( define @intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfmul.mask.nxv8f64.f64( define @intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll index f260499700e6f..08aa64b6de7fc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfmul.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfmul.nxv1f16.f16( define @intrinsic_vfmul_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfmul.mask.nxv1f16.f16( define @intrinsic_vfmul_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfmul.nxv2f16.f16( define @intrinsic_vfmul_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfmul.mask.nxv2f16.f16( define @intrinsic_vfmul_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfmul.nxv4f16.f16( define @intrinsic_vfmul_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfmul.mask.nxv4f16.f16( define @intrinsic_vfmul_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfmul.nxv8f16.f16( define @intrinsic_vfmul_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfmul.mask.nxv8f16.f16( define @intrinsic_vfmul_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfmul.nxv16f16.f16( define @intrinsic_vfmul_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfmul.mask.nxv16f16.f16( define @intrinsic_vfmul_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfmul.nxv32f16.f16( define @intrinsic_vfmul_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfmul.mask.nxv32f16.f16( define @intrinsic_vfmul_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfmul.nxv1f32.f32( define @intrinsic_vfmul_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfmul.mask.nxv1f32.f32( define @intrinsic_vfmul_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfmul.nxv2f32.f32( define @intrinsic_vfmul_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfmul.mask.nxv2f32.f32( define @intrinsic_vfmul_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfmul.nxv4f32.f32( define @intrinsic_vfmul_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfmul.mask.nxv4f32.f32( define @intrinsic_vfmul_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfmul.nxv8f32.f32( define @intrinsic_vfmul_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfmul.mask.nxv8f32.f32( define @intrinsic_vfmul_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfmul.nxv16f32.f32( define @intrinsic_vfmul_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfmul.mask.nxv16f32.f32( define @intrinsic_vfmul_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv16f32.f32( @@ -1204,9 +1182,8 @@ declare @llvm.riscv.vfmul.nxv1f64.f64( define @intrinsic_vfmul_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv1f64.f64( @@ -1228,9 +1205,8 @@ declare @llvm.riscv.vfmul.mask.nxv1f64.f64( define @intrinsic_vfmul_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv1f64.f64( @@ -1251,9 +1227,8 @@ declare @llvm.riscv.vfmul.nxv2f64.f64( define @intrinsic_vfmul_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv2f64.f64( @@ -1275,9 +1250,8 @@ declare @llvm.riscv.vfmul.mask.nxv2f64.f64( define @intrinsic_vfmul_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv2f64.f64( @@ -1298,9 +1272,8 @@ declare @llvm.riscv.vfmul.nxv4f64.f64( define @intrinsic_vfmul_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv4f64.f64( @@ -1322,9 +1295,8 @@ declare @llvm.riscv.vfmul.mask.nxv4f64.f64( define @intrinsic_vfmul_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv4f64.f64( @@ -1345,9 +1317,8 @@ declare @llvm.riscv.vfmul.nxv8f64.f64( define @intrinsic_vfmul_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.nxv8f64.f64( @@ -1369,9 +1340,8 @@ declare @llvm.riscv.vfmul.mask.nxv8f64.f64( define @intrinsic_vfmul_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll index 956ac22ede348..fa1767202c126 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmacc.nxv1f16.f16( define @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmacc.nxv2f16.f16( define @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmacc.nxv4f16.f16( define @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmacc.nxv8f16.f16( define @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmacc.nxv16f16.f16( define @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv16f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmacc.nxv1f32.f32( define @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmacc.nxv2f32.f32( define @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmacc.nxv4f32.f32( define @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmacc.nxv8f32.f32( define @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfnmacc.nxv1f64.f64( define @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f64.f64( define @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfnmacc.nxv2f64.f64( define @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f64.f64( define @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfnmacc.nxv4f64.f64( define @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f64.f64( define @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll index 6d302b7f49d5c..f8419e81f7d06 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmacc.nxv1f16.f16( define @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmacc.nxv2f16.f16( define @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmacc.nxv4f16.f16( define @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmacc.nxv8f16.f16( define @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmacc.nxv16f16.f16( define @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv16f16.f16( define @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmacc.nxv1f32.f32( define @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmacc.nxv2f32.f32( define @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmacc.nxv4f32.f32( define @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmacc.nxv8f32.f32( define @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f32.f32( define @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfnmacc.nxv1f64.f64( define @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f64.f64( define @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfnmacc.nxv2f64.f64( define @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f64.f64( define @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfnmacc.nxv4f64.f64( define @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f64.f64( define @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmacc.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll index 9d5d70498ab3c..e0d33062322b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmadd.nxv1f16.f16( define @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmadd.nxv2f16.f16( define @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmadd.nxv4f16.f16( define @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmadd.nxv8f16.f16( define @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmadd.nxv16f16.f16( define @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv16f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmadd.nxv1f32.f32( define @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmadd.nxv2f32.f32( define @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmadd.nxv4f32.f32( define @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmadd.nxv8f32.f32( define @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfnmadd.nxv1f64.f64( define @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f64.f64( define @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfnmadd.nxv2f64.f64( define @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f64.f64( define @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfnmadd.nxv4f64.f64( define @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f64.f64( define @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll index 13c9484ad907f..ab407427952ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmadd.nxv1f16.f16( define @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmadd.nxv2f16.f16( define @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmadd.nxv4f16.f16( define @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmadd.nxv8f16.f16( define @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmadd.nxv16f16.f16( define @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv16f16.f16( define @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmadd.nxv1f32.f32( define @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmadd.nxv2f32.f32( define @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmadd.nxv4f32.f32( define @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmadd.nxv8f32.f32( define @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f32.f32( define @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfnmadd.nxv1f64.f64( define @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f64.f64( define @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfnmadd.nxv2f64.f64( define @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f64.f64( define @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfnmadd.nxv4f64.f64( define @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f64.f64( define @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmadd.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll index 385b3397ab28b..834938c7d6e74 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmsac.nxv1f16.f16( define @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmsac.nxv2f16.f16( define @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmsac.nxv4f16.f16( define @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmsac.nxv8f16.f16( define @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmsac.nxv16f16.f16( define @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv16f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmsac.nxv1f32.f32( define @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmsac.nxv2f32.f32( define @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmsac.nxv4f32.f32( define @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmsac.nxv8f32.f32( define @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfnmsac.nxv1f64.f64( define @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f64.f64( define @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfnmsac.nxv2f64.f64( define @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f64.f64( define @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfnmsac.nxv4f64.f64( define @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f64.f64( define @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll index 2a7a29a742f92..58e489618bc4e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmsac.nxv1f16.f16( define @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmsac.nxv2f16.f16( define @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmsac.nxv4f16.f16( define @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmsac.nxv8f16.f16( define @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmsac.nxv16f16.f16( define @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv16f16.f16( define @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmsac.nxv1f32.f32( define @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmsac.nxv2f32.f32( define @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmsac.nxv4f32.f32( define @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmsac.nxv8f32.f32( define @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f32.f32( define @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfnmsac.nxv1f64.f64( define @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f64.f64( define @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfnmsac.nxv2f64.f64( define @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f64.f64( define @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfnmsac.nxv4f64.f64( define @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f64.f64( define @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsac.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll index 35e687c4fcbc0..67dbb5a92dfa3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmsub.nxv1f16.f16( define @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmsub.nxv2f16.f16( define @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmsub.nxv4f16.f16( define @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmsub.nxv8f16.f16( define @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmsub.nxv16f16.f16( define @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv16f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmsub.nxv1f32.f32( define @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmsub.nxv2f32.f32( define @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmsub.nxv4f32.f32( define @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmsub.nxv8f32.f32( define @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.f32( @@ -994,13 +976,8 @@ declare @llvm.riscv.vfnmsub.nxv1f64.f64( define @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv1f64.f64( @@ -1022,13 +999,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f64.f64( define @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv1f64.f64( @@ -1050,13 +1022,8 @@ declare @llvm.riscv.vfnmsub.nxv2f64.f64( define @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv2f64.f64( @@ -1078,13 +1045,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f64.f64( define @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv2f64.f64( @@ -1106,13 +1068,8 @@ declare @llvm.riscv.vfnmsub.nxv4f64.f64( define @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv4f64.f64( @@ -1134,13 +1091,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f64.f64( define @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll index 977163f1383f0..07b23dbfb066d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( , , @@ -562,9 +562,8 @@ declare @llvm.riscv.vfnmsub.nxv1f16.f16( define @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv1f16.f16( @@ -586,9 +585,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.f16( @@ -610,9 +608,8 @@ declare @llvm.riscv.vfnmsub.nxv2f16.f16( define @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv2f16.f16( @@ -634,9 +631,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.f16( @@ -658,9 +654,8 @@ declare @llvm.riscv.vfnmsub.nxv4f16.f16( define @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv4f16.f16( @@ -682,9 +677,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.f16( @@ -706,9 +700,8 @@ declare @llvm.riscv.vfnmsub.nxv8f16.f16( define @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv8f16.f16( @@ -730,9 +723,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.f16( @@ -754,9 +746,8 @@ declare @llvm.riscv.vfnmsub.nxv16f16.f16( define @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv16f16.f16( @@ -778,9 +769,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv16f16.f16( define @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.f16( @@ -802,9 +792,8 @@ declare @llvm.riscv.vfnmsub.nxv1f32.f32( define @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv1f32.f32( @@ -826,9 +815,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.f32( @@ -850,9 +838,8 @@ declare @llvm.riscv.vfnmsub.nxv2f32.f32( define @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv2f32.f32( @@ -874,9 +861,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.f32( @@ -898,9 +884,8 @@ declare @llvm.riscv.vfnmsub.nxv4f32.f32( define @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv4f32.f32( @@ -922,9 +907,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.f32( @@ -946,9 +930,8 @@ declare @llvm.riscv.vfnmsub.nxv8f32.f32( define @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv8f32.f32( @@ -970,9 +953,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f32.f32( define @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.f32( @@ -994,9 +976,8 @@ declare @llvm.riscv.vfnmsub.nxv1f64.f64( define @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv1f64.f64( @@ -1018,9 +999,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f64.f64( define @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv1f64.f64( @@ -1042,9 +1022,8 @@ declare @llvm.riscv.vfnmsub.nxv2f64.f64( define @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv2f64.f64( @@ -1066,9 +1045,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f64.f64( define @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv2f64.f64( @@ -1090,9 +1068,8 @@ declare @llvm.riscv.vfnmsub.nxv4f64.f64( define @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.nxv4f64.f64( @@ -1114,9 +1091,8 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f64.f64( define @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfnmsub.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll index b117a120b04d1..1d502c84b1981 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfrdiv.nxv1f16.f16( , half, @@ -9,9 +9,8 @@ declare @llvm.riscv.vfrdiv.nxv1f16.f16( define @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv1f16.f16( @@ -33,9 +32,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv1f16.f16( @@ -56,9 +54,8 @@ declare @llvm.riscv.vfrdiv.nxv2f16.f16( define @intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv2f16.f16( @@ -80,9 +77,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv2f16.f16( @@ -103,9 +99,8 @@ declare @llvm.riscv.vfrdiv.nxv4f16.f16( define @intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv4f16.f16( @@ -127,9 +122,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv4f16.f16( @@ -150,9 +144,8 @@ declare @llvm.riscv.vfrdiv.nxv8f16.f16( define @intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv8f16.f16( @@ -174,9 +167,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv8f16.f16( @@ -197,9 +189,8 @@ declare @llvm.riscv.vfrdiv.nxv16f16.f16( define @intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv16f16.f16( @@ -221,9 +212,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv16f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv16f16.f16( @@ -244,9 +234,8 @@ declare @llvm.riscv.vfrdiv.nxv32f16.f16( define @intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv32f16.f16( @@ -268,9 +257,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv32f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv32f16.f16( @@ -291,9 +279,8 @@ declare @llvm.riscv.vfrdiv.nxv1f32.f32( define @intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv1f32.f32( @@ -315,9 +302,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv1f32.f32( @@ -338,9 +324,8 @@ declare @llvm.riscv.vfrdiv.nxv2f32.f32( define @intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv2f32.f32( @@ -362,9 +347,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv2f32.f32( @@ -385,9 +369,8 @@ declare @llvm.riscv.vfrdiv.nxv4f32.f32( define @intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv4f32.f32( @@ -409,9 +392,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv4f32.f32( @@ -432,9 +414,8 @@ declare @llvm.riscv.vfrdiv.nxv8f32.f32( define @intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv8f32.f32( @@ -456,9 +437,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv8f32.f32( @@ -479,9 +459,8 @@ declare @llvm.riscv.vfrdiv.nxv16f32.f32( define @intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv16f32.f32( @@ -503,9 +482,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv16f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv16f32.f32( @@ -526,13 +504,8 @@ declare @llvm.riscv.vfrdiv.nxv1f64.f64( define @intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv1f64.f64( @@ -554,13 +527,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv1f64.f64( @@ -581,13 +549,8 @@ declare @llvm.riscv.vfrdiv.nxv2f64.f64( define @intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv2f64.f64( @@ -609,13 +572,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv2f64.f64( @@ -636,13 +594,8 @@ declare @llvm.riscv.vfrdiv.nxv4f64.f64( define @intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv4f64.f64( @@ -664,13 +617,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv4f64.f64( @@ -691,13 +639,8 @@ declare @llvm.riscv.vfrdiv.nxv8f64.f64( define @intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv8f64.f64( @@ -719,13 +662,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll index 6cc0b53443a8f..ccdd6ad371896 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfrdiv.nxv1f16.f16( , half, @@ -9,9 +9,8 @@ declare @llvm.riscv.vfrdiv.nxv1f16.f16( define @intrinsic_vfrdiv_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv1f16.f16( @@ -33,9 +32,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv1f16.f16( @@ -56,9 +54,8 @@ declare @llvm.riscv.vfrdiv.nxv2f16.f16( define @intrinsic_vfrdiv_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv2f16.f16( @@ -80,9 +77,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv2f16.f16( @@ -103,9 +99,8 @@ declare @llvm.riscv.vfrdiv.nxv4f16.f16( define @intrinsic_vfrdiv_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv4f16.f16( @@ -127,9 +122,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv4f16.f16( @@ -150,9 +144,8 @@ declare @llvm.riscv.vfrdiv.nxv8f16.f16( define @intrinsic_vfrdiv_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv8f16.f16( @@ -174,9 +167,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv8f16.f16( @@ -197,9 +189,8 @@ declare @llvm.riscv.vfrdiv.nxv16f16.f16( define @intrinsic_vfrdiv_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv16f16.f16( @@ -221,9 +212,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv16f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv16f16.f16( @@ -244,9 +234,8 @@ declare @llvm.riscv.vfrdiv.nxv32f16.f16( define @intrinsic_vfrdiv_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv32f16.f16( @@ -268,9 +257,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv32f16.f16( define @intrinsic_vfrdiv_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv32f16.f16( @@ -291,9 +279,8 @@ declare @llvm.riscv.vfrdiv.nxv1f32.f32( define @intrinsic_vfrdiv_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv1f32.f32( @@ -315,9 +302,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv1f32.f32( @@ -338,9 +324,8 @@ declare @llvm.riscv.vfrdiv.nxv2f32.f32( define @intrinsic_vfrdiv_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv2f32.f32( @@ -362,9 +347,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv2f32.f32( @@ -385,9 +369,8 @@ declare @llvm.riscv.vfrdiv.nxv4f32.f32( define @intrinsic_vfrdiv_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv4f32.f32( @@ -409,9 +392,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv4f32.f32( @@ -432,9 +414,8 @@ declare @llvm.riscv.vfrdiv.nxv8f32.f32( define @intrinsic_vfrdiv_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv8f32.f32( @@ -456,9 +437,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv8f32.f32( @@ -479,9 +459,8 @@ declare @llvm.riscv.vfrdiv.nxv16f32.f32( define @intrinsic_vfrdiv_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv16f32.f32( @@ -503,9 +482,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv16f32.f32( define @intrinsic_vfrdiv_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv16f32.f32( @@ -526,9 +504,8 @@ declare @llvm.riscv.vfrdiv.nxv1f64.f64( define @intrinsic_vfrdiv_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv1f64.f64( @@ -550,9 +527,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv1f64.f64( @@ -573,9 +549,8 @@ declare @llvm.riscv.vfrdiv.nxv2f64.f64( define @intrinsic_vfrdiv_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv2f64.f64( @@ -597,9 +572,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv2f64.f64( @@ -620,9 +594,8 @@ declare @llvm.riscv.vfrdiv.nxv4f64.f64( define @intrinsic_vfrdiv_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv4f64.f64( @@ -644,9 +617,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv4f64.f64( @@ -667,9 +639,8 @@ declare @llvm.riscv.vfrdiv.nxv8f64.f64( define @intrinsic_vfrdiv_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.nxv8f64.f64( @@ -691,9 +662,8 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f64.f64( define @intrinsic_vfrdiv_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrdiv.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll index 8bbf0aac2b339..eab5b2a414e18 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfrsub.nxv1f16.f16( , half, @@ -9,9 +9,8 @@ declare @llvm.riscv.vfrsub.nxv1f16.f16( define @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv1f16.f16( @@ -33,9 +32,8 @@ declare @llvm.riscv.vfrsub.mask.nxv1f16.f16( define @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv1f16.f16( @@ -56,9 +54,8 @@ declare @llvm.riscv.vfrsub.nxv2f16.f16( define @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv2f16.f16( @@ -80,9 +77,8 @@ declare @llvm.riscv.vfrsub.mask.nxv2f16.f16( define @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv2f16.f16( @@ -103,9 +99,8 @@ declare @llvm.riscv.vfrsub.nxv4f16.f16( define @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv4f16.f16( @@ -127,9 +122,8 @@ declare @llvm.riscv.vfrsub.mask.nxv4f16.f16( define @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv4f16.f16( @@ -150,9 +144,8 @@ declare @llvm.riscv.vfrsub.nxv8f16.f16( define @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv8f16.f16( @@ -174,9 +167,8 @@ declare @llvm.riscv.vfrsub.mask.nxv8f16.f16( define @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv8f16.f16( @@ -197,9 +189,8 @@ declare @llvm.riscv.vfrsub.nxv16f16.f16( define @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv16f16.f16( @@ -221,9 +212,8 @@ declare @llvm.riscv.vfrsub.mask.nxv16f16.f16( define @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv16f16.f16( @@ -244,9 +234,8 @@ declare @llvm.riscv.vfrsub.nxv32f16.f16( define @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv32f16.f16( @@ -268,9 +257,8 @@ declare @llvm.riscv.vfrsub.mask.nxv32f16.f16( define @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv32f16.f16( @@ -291,9 +279,8 @@ declare @llvm.riscv.vfrsub.nxv1f32.f32( define @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv1f32.f32( @@ -315,9 +302,8 @@ declare @llvm.riscv.vfrsub.mask.nxv1f32.f32( define @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv1f32.f32( @@ -338,9 +324,8 @@ declare @llvm.riscv.vfrsub.nxv2f32.f32( define @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv2f32.f32( @@ -362,9 +347,8 @@ declare @llvm.riscv.vfrsub.mask.nxv2f32.f32( define @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv2f32.f32( @@ -385,9 +369,8 @@ declare @llvm.riscv.vfrsub.nxv4f32.f32( define @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv4f32.f32( @@ -409,9 +392,8 @@ declare @llvm.riscv.vfrsub.mask.nxv4f32.f32( define @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv4f32.f32( @@ -432,9 +414,8 @@ declare @llvm.riscv.vfrsub.nxv8f32.f32( define @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv8f32.f32( @@ -456,9 +437,8 @@ declare @llvm.riscv.vfrsub.mask.nxv8f32.f32( define @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv8f32.f32( @@ -479,9 +459,8 @@ declare @llvm.riscv.vfrsub.nxv16f32.f32( define @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv16f32.f32( @@ -503,9 +482,8 @@ declare @llvm.riscv.vfrsub.mask.nxv16f32.f32( define @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv16f32.f32( @@ -526,13 +504,8 @@ declare @llvm.riscv.vfrsub.nxv1f64.f64( define @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv1f64.f64( @@ -554,13 +527,8 @@ declare @llvm.riscv.vfrsub.mask.nxv1f64.f64( define @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv1f64.f64( @@ -581,13 +549,8 @@ declare @llvm.riscv.vfrsub.nxv2f64.f64( define @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv2f64.f64( @@ -609,13 +572,8 @@ declare @llvm.riscv.vfrsub.mask.nxv2f64.f64( define @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv2f64.f64( @@ -636,13 +594,8 @@ declare @llvm.riscv.vfrsub.nxv4f64.f64( define @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv4f64.f64( @@ -664,13 +617,8 @@ declare @llvm.riscv.vfrsub.mask.nxv4f64.f64( define @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv4f64.f64( @@ -691,13 +639,8 @@ declare @llvm.riscv.vfrsub.nxv8f64.f64( define @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv8f64.f64( @@ -719,13 +662,8 @@ declare @llvm.riscv.vfrsub.mask.nxv8f64.f64( define @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll index 85b7148c3c830..0477554c91419 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfrsub.nxv1f16.f16( , half, @@ -10,9 +10,8 @@ declare @llvm.riscv.vfrsub.nxv1f16.f16( define @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv1f16.f16( @@ -34,9 +33,8 @@ declare @llvm.riscv.vfrsub.mask.nxv1f16.f16( define @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv1f16.f16( @@ -57,9 +55,8 @@ declare @llvm.riscv.vfrsub.nxv2f16.f16( define @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv2f16.f16( @@ -81,9 +78,8 @@ declare @llvm.riscv.vfrsub.mask.nxv2f16.f16( define @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv2f16.f16( @@ -104,9 +100,8 @@ declare @llvm.riscv.vfrsub.nxv4f16.f16( define @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv4f16.f16( @@ -128,9 +123,8 @@ declare @llvm.riscv.vfrsub.mask.nxv4f16.f16( define @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv4f16.f16( @@ -151,9 +145,8 @@ declare @llvm.riscv.vfrsub.nxv8f16.f16( define @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv8f16.f16( @@ -175,9 +168,8 @@ declare @llvm.riscv.vfrsub.mask.nxv8f16.f16( define @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv8f16.f16( @@ -198,9 +190,8 @@ declare @llvm.riscv.vfrsub.nxv16f16.f16( define @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv16f16.f16( @@ -222,9 +213,8 @@ declare @llvm.riscv.vfrsub.mask.nxv16f16.f16( define @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv16f16.f16( @@ -245,9 +235,8 @@ declare @llvm.riscv.vfrsub.nxv32f16.f16( define @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv32f16.f16( @@ -269,9 +258,8 @@ declare @llvm.riscv.vfrsub.mask.nxv32f16.f16( define @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv32f16.f16( @@ -292,9 +280,8 @@ declare @llvm.riscv.vfrsub.nxv1f32.f32( define @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv1f32.f32( @@ -316,9 +303,8 @@ declare @llvm.riscv.vfrsub.mask.nxv1f32.f32( define @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv1f32.f32( @@ -339,9 +325,8 @@ declare @llvm.riscv.vfrsub.nxv2f32.f32( define @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv2f32.f32( @@ -363,9 +348,8 @@ declare @llvm.riscv.vfrsub.mask.nxv2f32.f32( define @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv2f32.f32( @@ -386,9 +370,8 @@ declare @llvm.riscv.vfrsub.nxv4f32.f32( define @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv4f32.f32( @@ -410,9 +393,8 @@ declare @llvm.riscv.vfrsub.mask.nxv4f32.f32( define @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv4f32.f32( @@ -433,9 +415,8 @@ declare @llvm.riscv.vfrsub.nxv8f32.f32( define @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv8f32.f32( @@ -457,9 +438,8 @@ declare @llvm.riscv.vfrsub.mask.nxv8f32.f32( define @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv8f32.f32( @@ -480,9 +460,8 @@ declare @llvm.riscv.vfrsub.nxv16f32.f32( define @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv16f32.f32( @@ -504,9 +483,8 @@ declare @llvm.riscv.vfrsub.mask.nxv16f32.f32( define @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv16f32.f32( @@ -527,9 +505,8 @@ declare @llvm.riscv.vfrsub.nxv1f64.f64( define @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv1f64.f64( @@ -551,9 +528,8 @@ declare @llvm.riscv.vfrsub.mask.nxv1f64.f64( define @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv1f64.f64( @@ -574,9 +550,8 @@ declare @llvm.riscv.vfrsub.nxv2f64.f64( define @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv2f64.f64( @@ -598,9 +573,8 @@ declare @llvm.riscv.vfrsub.mask.nxv2f64.f64( define @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv2f64.f64( @@ -621,9 +595,8 @@ declare @llvm.riscv.vfrsub.nxv4f64.f64( define @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv4f64.f64( @@ -645,9 +618,8 @@ declare @llvm.riscv.vfrsub.mask.nxv4f64.f64( define @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv4f64.f64( @@ -668,9 +640,8 @@ declare @llvm.riscv.vfrsub.nxv8f64.f64( define @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.nxv8f64.f64( @@ -692,9 +663,8 @@ declare @llvm.riscv.vfrsub.mask.nxv8f64.f64( define @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfrsub.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll index a1dcc1fa26bdc..c0e999a2433d0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfsgnj.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfsgnj.nxv1f16.f16( define @intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfsgnj.nxv2f16.f16( define @intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfsgnj.nxv4f16.f16( define @intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfsgnj.nxv8f16.f16( define @intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfsgnj.nxv16f16.f16( define @intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfsgnj.nxv32f16.f16( define @intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv32f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfsgnj.nxv1f32.f32( define @intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfsgnj.nxv2f32.f32( define @intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfsgnj.nxv4f32.f32( define @intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfsgnj.nxv8f32.f32( define @intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfsgnj.nxv16f32.f32( define @intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfsgnj.nxv1f64.f64( define @intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfsgnj.nxv2f64.f64( define @intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfsgnj.nxv4f64.f64( define @intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfsgnj.nxv8f64.f64( define @intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll index 8ff4b4ba03e2a..d71fb8fb25352 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfsgnj.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfsgnj.nxv1f16.f16( define @intrinsic_vfsgnj_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfsgnj.nxv2f16.f16( define @intrinsic_vfsgnj_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfsgnj.nxv4f16.f16( define @intrinsic_vfsgnj_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfsgnj.nxv8f16.f16( define @intrinsic_vfsgnj_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfsgnj.nxv16f16.f16( define @intrinsic_vfsgnj_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfsgnj.nxv32f16.f16( define @intrinsic_vfsgnj_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv32f16.f16( define @intrinsic_vfsgnj_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfsgnj.nxv1f32.f32( define @intrinsic_vfsgnj_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfsgnj.nxv2f32.f32( define @intrinsic_vfsgnj_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfsgnj.nxv4f32.f32( define @intrinsic_vfsgnj_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfsgnj.nxv8f32.f32( define @intrinsic_vfsgnj_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfsgnj.nxv16f32.f32( define @intrinsic_vfsgnj_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f32.f32( define @intrinsic_vfsgnj_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv16f32.f32( @@ -1204,9 +1182,8 @@ declare @llvm.riscv.vfsgnj.nxv1f64.f64( define @intrinsic_vfsgnj_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv1f64.f64( @@ -1228,9 +1205,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv1f64.f64( @@ -1251,9 +1227,8 @@ declare @llvm.riscv.vfsgnj.nxv2f64.f64( define @intrinsic_vfsgnj_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv2f64.f64( @@ -1275,9 +1250,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv2f64.f64( @@ -1298,9 +1272,8 @@ declare @llvm.riscv.vfsgnj.nxv4f64.f64( define @intrinsic_vfsgnj_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv4f64.f64( @@ -1322,9 +1295,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv4f64.f64( @@ -1345,9 +1317,8 @@ declare @llvm.riscv.vfsgnj.nxv8f64.f64( define @intrinsic_vfsgnj_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.nxv8f64.f64( @@ -1369,9 +1340,8 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f64.f64( define @intrinsic_vfsgnj_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll index ac708fa2d9918..0287f9ea2cbf3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfsgnjn.nxv1f16.f16( define @intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfsgnjn.nxv2f16.f16( define @intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfsgnjn.nxv4f16.f16( define @intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfsgnjn.nxv8f16.f16( define @intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfsgnjn.nxv16f16.f16( define @intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfsgnjn.nxv32f16.f16( define @intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv32f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfsgnjn.nxv1f32.f32( define @intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfsgnjn.nxv2f32.f32( define @intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfsgnjn.nxv4f32.f32( define @intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfsgnjn.nxv8f32.f32( define @intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfsgnjn.nxv16f32.f32( define @intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfsgnjn.nxv1f64.f64( define @intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfsgnjn.nxv2f64.f64( define @intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfsgnjn.nxv4f64.f64( define @intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfsgnjn.nxv8f64.f64( define @intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll index 9ff02249165bc..f751fd7406389 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfsgnjn.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfsgnjn.nxv1f16.f16( define @intrinsic_vfsgnjn_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfsgnjn.nxv2f16.f16( define @intrinsic_vfsgnjn_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfsgnjn.nxv4f16.f16( define @intrinsic_vfsgnjn_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfsgnjn.nxv8f16.f16( define @intrinsic_vfsgnjn_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfsgnjn.nxv16f16.f16( define @intrinsic_vfsgnjn_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfsgnjn.nxv32f16.f16( define @intrinsic_vfsgnjn_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv32f16.f16( define @intrinsic_vfsgnjn_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfsgnjn.nxv1f32.f32( define @intrinsic_vfsgnjn_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfsgnjn.nxv2f32.f32( define @intrinsic_vfsgnjn_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfsgnjn.nxv4f32.f32( define @intrinsic_vfsgnjn_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfsgnjn.nxv8f32.f32( define @intrinsic_vfsgnjn_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfsgnjn.nxv16f32.f32( define @intrinsic_vfsgnjn_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f32.f32( define @intrinsic_vfsgnjn_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv16f32.f32( @@ -1204,9 +1182,8 @@ declare @llvm.riscv.vfsgnjn.nxv1f64.f64( define @intrinsic_vfsgnjn_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv1f64.f64( @@ -1228,9 +1205,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv1f64.f64( @@ -1251,9 +1227,8 @@ declare @llvm.riscv.vfsgnjn.nxv2f64.f64( define @intrinsic_vfsgnjn_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv2f64.f64( @@ -1275,9 +1250,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv2f64.f64( @@ -1298,9 +1272,8 @@ declare @llvm.riscv.vfsgnjn.nxv4f64.f64( define @intrinsic_vfsgnjn_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv4f64.f64( @@ -1322,9 +1295,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv4f64.f64( @@ -1345,9 +1317,8 @@ declare @llvm.riscv.vfsgnjn.nxv8f64.f64( define @intrinsic_vfsgnjn_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.nxv8f64.f64( @@ -1369,9 +1340,8 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f64.f64( define @intrinsic_vfsgnjn_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll index 8f4fcaa73c42c..fba4eceb23fb6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfsgnjx.nxv1f16.f16( define @intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfsgnjx.nxv2f16.f16( define @intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfsgnjx.nxv4f16.f16( define @intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfsgnjx.nxv8f16.f16( define @intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfsgnjx.nxv16f16.f16( define @intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfsgnjx.nxv32f16.f16( define @intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv32f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfsgnjx.nxv1f32.f32( define @intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfsgnjx.nxv2f32.f32( define @intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfsgnjx.nxv4f32.f32( define @intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfsgnjx.nxv8f32.f32( define @intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfsgnjx.nxv16f32.f32( define @intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfsgnjx.nxv1f64.f64( define @intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfsgnjx.nxv2f64.f64( define @intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfsgnjx.nxv4f64.f64( define @intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfsgnjx.nxv8f64.f64( define @intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll index 77ebd259c6896..4ae69f0d4f613 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfsgnjx.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfsgnjx.nxv1f16.f16( define @intrinsic_vfsgnjx_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfsgnjx.nxv2f16.f16( define @intrinsic_vfsgnjx_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfsgnjx.nxv4f16.f16( define @intrinsic_vfsgnjx_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfsgnjx.nxv8f16.f16( define @intrinsic_vfsgnjx_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfsgnjx.nxv16f16.f16( define @intrinsic_vfsgnjx_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfsgnjx.nxv32f16.f16( define @intrinsic_vfsgnjx_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv32f16.f16( define @intrinsic_vfsgnjx_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfsgnjx.nxv1f32.f32( define @intrinsic_vfsgnjx_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfsgnjx.nxv2f32.f32( define @intrinsic_vfsgnjx_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfsgnjx.nxv4f32.f32( define @intrinsic_vfsgnjx_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfsgnjx.nxv8f32.f32( define @intrinsic_vfsgnjx_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfsgnjx.nxv16f32.f32( define @intrinsic_vfsgnjx_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f32.f32( define @intrinsic_vfsgnjx_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv16f32.f32( @@ -1204,9 +1182,8 @@ declare @llvm.riscv.vfsgnjx.nxv1f64.f64( define @intrinsic_vfsgnjx_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv1f64.f64( @@ -1228,9 +1205,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv1f64.f64( @@ -1251,9 +1227,8 @@ declare @llvm.riscv.vfsgnjx.nxv2f64.f64( define @intrinsic_vfsgnjx_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv2f64.f64( @@ -1275,9 +1250,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv2f64.f64( @@ -1298,9 +1272,8 @@ declare @llvm.riscv.vfsgnjx.nxv4f64.f64( define @intrinsic_vfsgnjx_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv4f64.f64( @@ -1322,9 +1295,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv4f64.f64( @@ -1345,9 +1317,8 @@ declare @llvm.riscv.vfsgnjx.nxv8f64.f64( define @intrinsic_vfsgnjx_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.nxv8f64.f64( @@ -1369,9 +1340,8 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f64.f64( define @intrinsic_vfsgnjx_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll index a4833408acfff..5baadc48d857e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfslide1down.nxv1f16.f16( , half, @@ -9,9 +9,8 @@ declare @llvm.riscv.vfslide1down.nxv1f16.f16( define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv1f16.f16( @@ -33,9 +32,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv1f16.f16( @@ -56,9 +54,8 @@ declare @llvm.riscv.vfslide1down.nxv2f16.f16( define @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv2f16.f16( @@ -80,9 +77,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv2f16.f16( @@ -103,9 +99,8 @@ declare @llvm.riscv.vfslide1down.nxv4f16.f16( define @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv4f16.f16( @@ -127,9 +122,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv4f16.f16( @@ -150,9 +144,8 @@ declare @llvm.riscv.vfslide1down.nxv8f16.f16( define @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv8f16.f16( @@ -174,9 +167,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv8f16.f16( @@ -197,9 +189,8 @@ declare @llvm.riscv.vfslide1down.nxv16f16.f16( define @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv16f16.f16( @@ -221,9 +212,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv16f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv16f16.f16( @@ -244,9 +234,8 @@ declare @llvm.riscv.vfslide1down.nxv32f16.f16( define @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv32f16.f16( @@ -268,9 +257,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv32f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv32f16.f16( @@ -291,9 +279,8 @@ declare @llvm.riscv.vfslide1down.nxv1f32.f32( define @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv1f32.f32( @@ -315,9 +302,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv1f32.f32( @@ -338,9 +324,8 @@ declare @llvm.riscv.vfslide1down.nxv2f32.f32( define @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv2f32.f32( @@ -362,9 +347,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv2f32.f32( @@ -385,9 +369,8 @@ declare @llvm.riscv.vfslide1down.nxv4f32.f32( define @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv4f32.f32( @@ -409,9 +392,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv4f32.f32( @@ -432,9 +414,8 @@ declare @llvm.riscv.vfslide1down.nxv8f32.f32( define @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv8f32.f32( @@ -456,9 +437,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv8f32.f32( @@ -479,9 +459,8 @@ declare @llvm.riscv.vfslide1down.nxv16f32.f32( define @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv16f32.f32( @@ -503,9 +482,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv16f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv16f32.f32( @@ -526,13 +504,8 @@ declare @llvm.riscv.vfslide1down.nxv1f64.f64( define @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv1f64.f64( @@ -554,13 +527,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv1f64.f64( @@ -581,13 +549,8 @@ declare @llvm.riscv.vfslide1down.nxv2f64.f64( define @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv2f64.f64( @@ -609,13 +572,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv2f64.f64( @@ -636,13 +594,8 @@ declare @llvm.riscv.vfslide1down.nxv4f64.f64( define @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv4f64.f64( @@ -664,13 +617,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv4f64.f64( @@ -691,13 +639,8 @@ declare @llvm.riscv.vfslide1down.nxv8f64.f64( define @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv8f64.f64( @@ -719,13 +662,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll index 84572f5dec0d1..a0ba31ea26686 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfslide1down.nxv1f16.f16( , half, @@ -9,9 +9,8 @@ declare @llvm.riscv.vfslide1down.nxv1f16.f16( define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv1f16.f16( @@ -33,9 +32,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv1f16.f16( @@ -56,9 +54,8 @@ declare @llvm.riscv.vfslide1down.nxv2f16.f16( define @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv2f16.f16( @@ -80,9 +77,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv2f16.f16( @@ -103,9 +99,8 @@ declare @llvm.riscv.vfslide1down.nxv4f16.f16( define @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv4f16.f16( @@ -127,9 +122,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv4f16.f16( @@ -150,9 +144,8 @@ declare @llvm.riscv.vfslide1down.nxv8f16.f16( define @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv8f16.f16( @@ -174,9 +167,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv8f16.f16( @@ -197,9 +189,8 @@ declare @llvm.riscv.vfslide1down.nxv16f16.f16( define @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv16f16.f16( @@ -221,9 +212,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv16f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv16f16.f16( @@ -244,9 +234,8 @@ declare @llvm.riscv.vfslide1down.nxv32f16.f16( define @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv32f16.f16( @@ -268,9 +257,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv32f16.f16( define @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv32f16.f16( @@ -291,9 +279,8 @@ declare @llvm.riscv.vfslide1down.nxv1f32.f32( define @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv1f32.f32( @@ -315,9 +302,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv1f32.f32( @@ -338,9 +324,8 @@ declare @llvm.riscv.vfslide1down.nxv2f32.f32( define @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv2f32.f32( @@ -362,9 +347,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv2f32.f32( @@ -385,9 +369,8 @@ declare @llvm.riscv.vfslide1down.nxv4f32.f32( define @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv4f32.f32( @@ -409,9 +392,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv4f32.f32( @@ -432,9 +414,8 @@ declare @llvm.riscv.vfslide1down.nxv8f32.f32( define @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv8f32.f32( @@ -456,9 +437,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv8f32.f32( @@ -479,9 +459,8 @@ declare @llvm.riscv.vfslide1down.nxv16f32.f32( define @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv16f32.f32( @@ -503,9 +482,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv16f32.f32( define @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv16f32.f32( @@ -526,9 +504,8 @@ declare @llvm.riscv.vfslide1down.nxv1f64.f64( define @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv1f64.f64( @@ -550,9 +527,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv1f64.f64( @@ -573,9 +549,8 @@ declare @llvm.riscv.vfslide1down.nxv2f64.f64( define @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv2f64.f64( @@ -597,9 +572,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv2f64.f64( @@ -620,9 +594,8 @@ declare @llvm.riscv.vfslide1down.nxv4f64.f64( define @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv4f64.f64( @@ -644,9 +617,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv4f64.f64( @@ -667,9 +639,8 @@ declare @llvm.riscv.vfslide1down.nxv8f64.f64( define @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.nxv8f64.f64( @@ -691,9 +662,8 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f64.f64( define @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1down.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll index 0e5b566812011..271bf70522bfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfslide1up.nxv1f16.f16( , half, @@ -9,9 +9,8 @@ declare @llvm.riscv.vfslide1up.nxv1f16.f16( define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -34,9 +33,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv1f16.f16( @@ -57,9 +55,8 @@ declare @llvm.riscv.vfslide1up.nxv2f16.f16( define @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -82,9 +79,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv2f16.f16( @@ -105,9 +101,8 @@ declare @llvm.riscv.vfslide1up.nxv4f16.f16( define @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret entry: @@ -130,9 +125,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv4f16.f16( @@ -153,9 +147,8 @@ declare @llvm.riscv.vfslide1up.nxv8f16.f16( define @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret entry: @@ -178,9 +171,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv8f16.f16( @@ -201,9 +193,8 @@ declare @llvm.riscv.vfslide1up.nxv16f16.f16( define @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret entry: @@ -226,9 +217,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv16f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv16f16.f16( @@ -249,9 +239,8 @@ declare @llvm.riscv.vfslide1up.nxv32f16.f16( define @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret entry: @@ -274,9 +263,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv32f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv32f16.f16( @@ -297,9 +285,8 @@ declare @llvm.riscv.vfslide1up.nxv1f32.f32( define @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -322,9 +309,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv1f32.f32( @@ -345,9 +331,8 @@ declare @llvm.riscv.vfslide1up.nxv2f32.f32( define @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret entry: @@ -370,9 +355,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv2f32.f32( @@ -393,9 +377,8 @@ declare @llvm.riscv.vfslide1up.nxv4f32.f32( define @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret entry: @@ -418,9 +401,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv4f32.f32( @@ -441,9 +423,8 @@ declare @llvm.riscv.vfslide1up.nxv8f32.f32( define @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret entry: @@ -466,9 +447,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv8f32.f32( @@ -489,9 +469,8 @@ declare @llvm.riscv.vfslide1up.nxv16f32.f32( define @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret entry: @@ -514,9 +493,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv16f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv16f32.f32( @@ -537,14 +515,9 @@ declare @llvm.riscv.vfslide1up.nxv1f64.f64( define @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.nxv1f64.f64( @@ -566,13 +539,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv1f64.f64( @@ -593,14 +561,9 @@ declare @llvm.riscv.vfslide1up.nxv2f64.f64( define @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.nxv2f64.f64( @@ -622,13 +585,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv2f64.f64( @@ -649,14 +607,9 @@ declare @llvm.riscv.vfslide1up.nxv4f64.f64( define @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.nxv4f64.f64( @@ -678,13 +631,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv4f64.f64( @@ -705,14 +653,9 @@ declare @llvm.riscv.vfslide1up.nxv8f64.f64( define @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.nxv8f64.f64( @@ -734,13 +677,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll index 37afacc6c7f27..4b7d1fe55e244 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfslide1up.nxv1f16.f16( , half, @@ -9,9 +9,8 @@ declare @llvm.riscv.vfslide1up.nxv1f16.f16( define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -34,9 +33,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv1f16.f16( @@ -57,9 +55,8 @@ declare @llvm.riscv.vfslide1up.nxv2f16.f16( define @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -82,9 +79,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv2f16.f16( @@ -105,9 +101,8 @@ declare @llvm.riscv.vfslide1up.nxv4f16.f16( define @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret entry: @@ -130,9 +125,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv4f16.f16( @@ -153,9 +147,8 @@ declare @llvm.riscv.vfslide1up.nxv8f16.f16( define @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret entry: @@ -178,9 +171,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv8f16.f16( @@ -201,9 +193,8 @@ declare @llvm.riscv.vfslide1up.nxv16f16.f16( define @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret entry: @@ -226,9 +217,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv16f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv16f16.f16( @@ -249,9 +239,8 @@ declare @llvm.riscv.vfslide1up.nxv32f16.f16( define @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret entry: @@ -274,9 +263,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv32f16.f16( define @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv32f16.f16( @@ -297,9 +285,8 @@ declare @llvm.riscv.vfslide1up.nxv1f32.f32( define @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -322,9 +309,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv1f32.f32( @@ -345,9 +331,8 @@ declare @llvm.riscv.vfslide1up.nxv2f32.f32( define @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret entry: @@ -370,9 +355,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv2f32.f32( @@ -393,9 +377,8 @@ declare @llvm.riscv.vfslide1up.nxv4f32.f32( define @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret entry: @@ -418,9 +401,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv4f32.f32( @@ -441,9 +423,8 @@ declare @llvm.riscv.vfslide1up.nxv8f32.f32( define @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret entry: @@ -466,9 +447,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv8f32.f32( @@ -489,9 +469,8 @@ declare @llvm.riscv.vfslide1up.nxv16f32.f32( define @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret entry: @@ -514,9 +493,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv16f32.f32( define @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv16f32.f32( @@ -537,9 +515,8 @@ declare @llvm.riscv.vfslide1up.nxv1f64.f64( define @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret entry: @@ -562,9 +539,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv1f64.f64( @@ -585,9 +561,8 @@ declare @llvm.riscv.vfslide1up.nxv2f64.f64( define @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret entry: @@ -610,9 +585,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv2f64.f64( @@ -633,9 +607,8 @@ declare @llvm.riscv.vfslide1up.nxv4f64.f64( define @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret entry: @@ -658,9 +631,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv4f64.f64( @@ -681,9 +653,8 @@ declare @llvm.riscv.vfslide1up.nxv8f64.f64( define @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret entry: @@ -706,9 +677,8 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f64.f64( define @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfslide1up.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll index 99e45214deea0..86371c1685fc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( , , @@ -687,9 +687,8 @@ declare @llvm.riscv.vfsub.nxv1f16.f16( define @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv1f16.f16( @@ -711,9 +710,8 @@ declare @llvm.riscv.vfsub.mask.nxv1f16.f16( define @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv1f16.f16( @@ -734,9 +732,8 @@ declare @llvm.riscv.vfsub.nxv2f16.f16( define @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv2f16.f16( @@ -758,9 +755,8 @@ declare @llvm.riscv.vfsub.mask.nxv2f16.f16( define @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv2f16.f16( @@ -781,9 +777,8 @@ declare @llvm.riscv.vfsub.nxv4f16.f16( define @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv4f16.f16( @@ -805,9 +800,8 @@ declare @llvm.riscv.vfsub.mask.nxv4f16.f16( define @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv4f16.f16( @@ -828,9 +822,8 @@ declare @llvm.riscv.vfsub.nxv8f16.f16( define @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv8f16.f16( @@ -852,9 +845,8 @@ declare @llvm.riscv.vfsub.mask.nxv8f16.f16( define @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv8f16.f16( @@ -875,9 +867,8 @@ declare @llvm.riscv.vfsub.nxv16f16.f16( define @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv16f16.f16( @@ -899,9 +890,8 @@ declare @llvm.riscv.vfsub.mask.nxv16f16.f16( define @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv16f16.f16( @@ -922,9 +912,8 @@ declare @llvm.riscv.vfsub.nxv32f16.f16( define @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv32f16.f16( @@ -946,9 +935,8 @@ declare @llvm.riscv.vfsub.mask.nxv32f16.f16( define @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv32f16.f16( @@ -969,9 +957,8 @@ declare @llvm.riscv.vfsub.nxv1f32.f32( define @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv1f32.f32( @@ -993,9 +980,8 @@ declare @llvm.riscv.vfsub.mask.nxv1f32.f32( define @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv1f32.f32( @@ -1016,9 +1002,8 @@ declare @llvm.riscv.vfsub.nxv2f32.f32( define @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv2f32.f32( @@ -1040,9 +1025,8 @@ declare @llvm.riscv.vfsub.mask.nxv2f32.f32( define @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv2f32.f32( @@ -1063,9 +1047,8 @@ declare @llvm.riscv.vfsub.nxv4f32.f32( define @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv4f32.f32( @@ -1087,9 +1070,8 @@ declare @llvm.riscv.vfsub.mask.nxv4f32.f32( define @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv4f32.f32( @@ -1110,9 +1092,8 @@ declare @llvm.riscv.vfsub.nxv8f32.f32( define @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv8f32.f32( @@ -1134,9 +1115,8 @@ declare @llvm.riscv.vfsub.mask.nxv8f32.f32( define @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv8f32.f32( @@ -1157,9 +1137,8 @@ declare @llvm.riscv.vfsub.nxv16f32.f32( define @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv16f32.f32( @@ -1181,9 +1160,8 @@ declare @llvm.riscv.vfsub.mask.nxv16f32.f32( define @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv16f32.f32( @@ -1204,13 +1182,8 @@ declare @llvm.riscv.vfsub.nxv1f64.f64( define @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv1f64.f64( @@ -1232,13 +1205,8 @@ declare @llvm.riscv.vfsub.mask.nxv1f64.f64( define @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv1f64.f64( @@ -1259,13 +1227,8 @@ declare @llvm.riscv.vfsub.nxv2f64.f64( define @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv2f64.f64( @@ -1287,13 +1250,8 @@ declare @llvm.riscv.vfsub.mask.nxv2f64.f64( define @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv2f64.f64( @@ -1314,13 +1272,8 @@ declare @llvm.riscv.vfsub.nxv4f64.f64( define @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv4f64.f64( @@ -1342,13 +1295,8 @@ declare @llvm.riscv.vfsub.mask.nxv4f64.f64( define @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv4f64.f64( @@ -1369,13 +1317,8 @@ declare @llvm.riscv.vfsub.nxv8f64.f64( define @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv8f64.f64( @@ -1397,13 +1340,8 @@ declare @llvm.riscv.vfsub.mask.nxv8f64.f64( define @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, ft0, v0.t -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll index c972ed358bf9a..7445cfb806d43 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ ; RUN: -mattr=+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( , , @@ -688,9 +688,8 @@ declare @llvm.riscv.vfsub.nxv1f16.f16( define @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv1f16.f16( @@ -712,9 +711,8 @@ declare @llvm.riscv.vfsub.mask.nxv1f16.f16( define @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv1f16.f16( @@ -735,9 +733,8 @@ declare @llvm.riscv.vfsub.nxv2f16.f16( define @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv2f16.f16( @@ -759,9 +756,8 @@ declare @llvm.riscv.vfsub.mask.nxv2f16.f16( define @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv2f16.f16( @@ -782,9 +778,8 @@ declare @llvm.riscv.vfsub.nxv4f16.f16( define @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv4f16.f16( @@ -806,9 +801,8 @@ declare @llvm.riscv.vfsub.mask.nxv4f16.f16( define @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv4f16.f16( @@ -829,9 +823,8 @@ declare @llvm.riscv.vfsub.nxv8f16.f16( define @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv8f16.f16( @@ -853,9 +846,8 @@ declare @llvm.riscv.vfsub.mask.nxv8f16.f16( define @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv8f16.f16( @@ -876,9 +868,8 @@ declare @llvm.riscv.vfsub.nxv16f16.f16( define @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv16f16.f16( @@ -900,9 +891,8 @@ declare @llvm.riscv.vfsub.mask.nxv16f16.f16( define @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv16f16.f16( @@ -923,9 +913,8 @@ declare @llvm.riscv.vfsub.nxv32f16.f16( define @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv32f16.f16( @@ -947,9 +936,8 @@ declare @llvm.riscv.vfsub.mask.nxv32f16.f16( define @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv32f16.f16( @@ -970,9 +958,8 @@ declare @llvm.riscv.vfsub.nxv1f32.f32( define @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv1f32.f32( @@ -994,9 +981,8 @@ declare @llvm.riscv.vfsub.mask.nxv1f32.f32( define @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv1f32.f32( @@ -1017,9 +1003,8 @@ declare @llvm.riscv.vfsub.nxv2f32.f32( define @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv2f32.f32( @@ -1041,9 +1026,8 @@ declare @llvm.riscv.vfsub.mask.nxv2f32.f32( define @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv2f32.f32( @@ -1064,9 +1048,8 @@ declare @llvm.riscv.vfsub.nxv4f32.f32( define @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv4f32.f32( @@ -1088,9 +1071,8 @@ declare @llvm.riscv.vfsub.mask.nxv4f32.f32( define @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv4f32.f32( @@ -1111,9 +1093,8 @@ declare @llvm.riscv.vfsub.nxv8f32.f32( define @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv8f32.f32( @@ -1135,9 +1116,8 @@ declare @llvm.riscv.vfsub.mask.nxv8f32.f32( define @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv8f32.f32( @@ -1158,9 +1138,8 @@ declare @llvm.riscv.vfsub.nxv16f32.f32( define @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv16f32.f32( @@ -1182,9 +1161,8 @@ declare @llvm.riscv.vfsub.mask.nxv16f32.f32( define @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv16f32.f32( @@ -1205,9 +1183,8 @@ declare @llvm.riscv.vfsub.nxv1f64.f64( define @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv1f64.f64( @@ -1229,9 +1206,8 @@ declare @llvm.riscv.vfsub.mask.nxv1f64.f64( define @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv1f64.f64( @@ -1252,9 +1228,8 @@ declare @llvm.riscv.vfsub.nxv2f64.f64( define @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv2f64.f64( @@ -1276,9 +1251,8 @@ declare @llvm.riscv.vfsub.mask.nxv2f64.f64( define @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv2f64.f64( @@ -1299,9 +1273,8 @@ declare @llvm.riscv.vfsub.nxv4f64.f64( define @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv4f64.f64( @@ -1323,9 +1296,8 @@ declare @llvm.riscv.vfsub.mask.nxv4f64.f64( define @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv4f64.f64( @@ -1346,9 +1318,8 @@ declare @llvm.riscv.vfsub.nxv8f64.f64( define @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.nxv8f64.f64( @@ -1370,9 +1341,8 @@ declare @llvm.riscv.vfsub.mask.nxv8f64.f64( define @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv8f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll index 02ce6a1c85f90..a3bdcc4573287 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( , , @@ -423,9 +423,8 @@ declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16( define @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16( define @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16( @@ -471,9 +469,8 @@ declare @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16( define @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16( define @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16( @@ -519,9 +515,8 @@ declare @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16( define @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16( define @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16( @@ -567,9 +561,8 @@ declare @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16( define @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16( define @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16( @@ -615,9 +607,8 @@ declare @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16( define @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16( define @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16( @@ -663,9 +653,8 @@ declare @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32( define @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32( define @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32( @@ -711,9 +699,8 @@ declare @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32( define @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32( define @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32( @@ -759,9 +745,8 @@ declare @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( define @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32( define @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32( @@ -807,9 +791,8 @@ declare @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32( define @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32( define @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll index 135b56dff1b47..a04b9a54b9306 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( , , @@ -423,9 +423,8 @@ declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16( define @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16( define @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16( @@ -471,9 +469,8 @@ declare @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16( define @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16( define @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16( @@ -519,9 +515,8 @@ declare @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16( define @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16( define @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16( @@ -567,9 +561,8 @@ declare @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16( define @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16( define @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16( @@ -615,9 +607,8 @@ declare @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16( define @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16( define @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16( @@ -663,9 +653,8 @@ declare @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32( define @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32( define @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32( @@ -711,9 +699,8 @@ declare @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32( define @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32( define @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32( @@ -759,9 +745,8 @@ declare @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( define @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32( define @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32( @@ -807,9 +791,8 @@ declare @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32( define @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32( define @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll index fde2d9f89d045..3d046d2ba8057 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( , , @@ -416,9 +416,8 @@ declare @llvm.riscv.vfwadd.w.nxv1f32.f16( define @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f32.f16( @@ -440,9 +439,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( @@ -463,9 +461,8 @@ declare @llvm.riscv.vfwadd.w.nxv2f32.f16( define @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f32.f16( @@ -487,9 +484,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( @@ -510,9 +506,8 @@ declare @llvm.riscv.vfwadd.w.nxv4f32.f16( define @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f32.f16( @@ -534,9 +529,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( @@ -557,9 +551,8 @@ declare @llvm.riscv.vfwadd.w.nxv8f32.f16( define @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f32.f16( @@ -581,9 +574,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( @@ -604,9 +596,8 @@ declare @llvm.riscv.vfwadd.w.nxv16f32.f16( define @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv16f32.f16( @@ -628,9 +619,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( @@ -651,9 +641,8 @@ declare @llvm.riscv.vfwadd.w.nxv1f64.f32( define @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f64.f32( @@ -675,9 +664,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( @@ -698,9 +686,8 @@ declare @llvm.riscv.vfwadd.w.nxv2f64.f32( define @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f64.f32( @@ -722,9 +709,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( @@ -745,9 +731,8 @@ declare @llvm.riscv.vfwadd.w.nxv4f64.f32( define @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f64.f32( @@ -769,9 +754,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( @@ -792,9 +776,8 @@ declare @llvm.riscv.vfwadd.w.nxv8f64.f32( define @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.f32( @@ -816,9 +799,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( @@ -987,9 +969,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( @@ -1005,9 +986,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( @@ -1023,9 +1003,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( @@ -1041,9 +1020,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( @@ -1059,9 +1037,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( @@ -1077,9 +1054,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( @@ -1095,9 +1071,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( @@ -1113,9 +1088,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( @@ -1131,9 +1105,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll index 9235c3f251d41..3586ec64e9dfe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( , , @@ -416,9 +416,8 @@ declare @llvm.riscv.vfwadd.w.nxv1f32.f16( define @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f32.f16( @@ -440,9 +439,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( @@ -463,9 +461,8 @@ declare @llvm.riscv.vfwadd.w.nxv2f32.f16( define @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f32.f16( @@ -487,9 +484,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( @@ -510,9 +506,8 @@ declare @llvm.riscv.vfwadd.w.nxv4f32.f16( define @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f32.f16( @@ -534,9 +529,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( @@ -557,9 +551,8 @@ declare @llvm.riscv.vfwadd.w.nxv8f32.f16( define @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f32.f16( @@ -581,9 +574,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( @@ -604,9 +596,8 @@ declare @llvm.riscv.vfwadd.w.nxv16f32.f16( define @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv16f32.f16( @@ -628,9 +619,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( define @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( @@ -651,9 +641,8 @@ declare @llvm.riscv.vfwadd.w.nxv1f64.f32( define @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv1f64.f32( @@ -675,9 +664,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( @@ -698,9 +686,8 @@ declare @llvm.riscv.vfwadd.w.nxv2f64.f32( define @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv2f64.f32( @@ -722,9 +709,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( @@ -745,9 +731,8 @@ declare @llvm.riscv.vfwadd.w.nxv4f64.f32( define @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv4f64.f32( @@ -769,9 +754,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( @@ -792,9 +776,8 @@ declare @llvm.riscv.vfwadd.w.nxv8f64.f32( define @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.f32( @@ -816,9 +799,8 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( define @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( @@ -987,9 +969,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( @@ -1005,9 +986,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( @@ -1023,9 +1003,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( @@ -1041,9 +1020,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( @@ -1059,9 +1037,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( @@ -1077,9 +1054,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( @@ -1095,9 +1071,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( @@ -1113,9 +1088,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( @@ -1131,9 +1105,8 @@ entry: define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll index ffcfd186c8e10..0de121cb3f002 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwmacc.nxv1f32.f16( define @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwmacc.nxv2f32.f16( define @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwmacc.nxv4f32.f16( define @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwmacc.nxv8f32.f16( define @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwmacc.nxv16f32.f16( define @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv16f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwmacc.nxv1f64.f32( define @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwmacc.nxv2f64.f32( define @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwmacc.nxv4f64.f32( define @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwmacc.nxv8f64.f32( define @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll index bd2dac9493022..eb21c54c18e90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwmacc.nxv1f32.f16( define @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwmacc.nxv2f32.f16( define @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwmacc.nxv4f32.f16( define @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwmacc.nxv8f32.f16( define @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwmacc.nxv16f32.f16( define @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv16f32.f16( define @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwmacc.nxv1f64.f32( define @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwmacc.nxv2f64.f32( define @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwmacc.nxv4f64.f32( define @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwmacc.nxv8f64.f32( define @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f64.f32( define @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmacc.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll index 8035ec77c03ff..82c4fad996e71 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwmsac.nxv1f32.f16( define @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwmsac.nxv2f32.f16( define @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwmsac.nxv4f32.f16( define @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwmsac.nxv8f32.f16( define @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwmsac.nxv16f32.f16( define @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv16f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwmsac.nxv1f64.f32( define @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwmsac.nxv2f64.f32( define @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwmsac.nxv4f64.f32( define @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwmsac.nxv8f64.f32( define @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll index 01d2ce92c916c..b2e1e235e9695 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwmsac.nxv1f32.f16( define @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwmsac.nxv2f32.f16( define @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwmsac.nxv4f32.f16( define @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwmsac.nxv8f32.f16( define @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwmsac.nxv16f32.f16( define @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv16f32.f16( define @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwmsac.nxv1f64.f32( define @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwmsac.nxv2f64.f32( define @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwmsac.nxv4f64.f32( define @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwmsac.nxv8f64.f32( define @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f64.f32( define @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmsac.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll index 8e40ddc009af6..670c79975a2e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( , , @@ -423,9 +423,8 @@ declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16( define @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16( define @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16( @@ -471,9 +469,8 @@ declare @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16( define @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16( define @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16( @@ -519,9 +515,8 @@ declare @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16( define @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16( define @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16( @@ -567,9 +561,8 @@ declare @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16( define @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16( define @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16( @@ -615,9 +607,8 @@ declare @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16( define @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16( define @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16( @@ -663,9 +653,8 @@ declare @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32( define @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32( define @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32( @@ -711,9 +699,8 @@ declare @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32( define @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32( define @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32( @@ -759,9 +745,8 @@ declare @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( define @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32( define @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32( @@ -807,9 +791,8 @@ declare @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32( define @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32( define @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll index 9b5a7b0641806..fc7d8dcb59e31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( , , @@ -423,9 +423,8 @@ declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16( define @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16( define @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16( @@ -471,9 +469,8 @@ declare @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16( define @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16( define @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16( @@ -519,9 +515,8 @@ declare @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16( define @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16( define @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16( @@ -567,9 +561,8 @@ declare @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16( define @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16( define @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16( @@ -615,9 +607,8 @@ declare @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16( define @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16( define @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16( @@ -663,9 +653,8 @@ declare @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32( define @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32( define @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32( @@ -711,9 +699,8 @@ declare @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32( define @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32( define @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32( @@ -759,9 +745,8 @@ declare @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( define @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32( define @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32( @@ -807,9 +791,8 @@ declare @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32( define @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32( define @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll index 8a5db70f5baf7..02842609f5685 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwnmacc.nxv1f32.f16( define @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwnmacc.nxv2f32.f16( define @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwnmacc.nxv4f32.f16( define @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwnmacc.nxv8f32.f16( define @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwnmacc.nxv16f32.f16( define @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwnmacc.nxv1f64.f32( define @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwnmacc.nxv2f64.f32( define @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwnmacc.nxv4f64.f32( define @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwnmacc.nxv8f64.f32( define @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll index 0b21a7aa9b395..ff2b40cfac2cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwnmacc.nxv1f32.f16( define @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwnmacc.nxv2f32.f16( define @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwnmacc.nxv4f32.f16( define @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwnmacc.nxv8f32.f16( define @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwnmacc.nxv16f32.f16( define @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( define @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwnmacc.nxv1f64.f32( define @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwnmacc.nxv2f64.f32( define @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwnmacc.nxv4f64.f32( define @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwnmacc.nxv8f64.f32( define @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( define @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll index 554234bb68a61..fe9683ed15adb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwnmsac.nxv1f32.f16( define @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwnmsac.nxv2f32.f16( define @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwnmsac.nxv4f32.f16( define @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwnmsac.nxv8f32.f16( define @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwnmsac.nxv16f32.f16( define @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwnmsac.nxv1f64.f32( define @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwnmsac.nxv2f64.f32( define @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwnmsac.nxv4f64.f32( define @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwnmsac.nxv8f64.f32( define @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll index 44f2e66113939..2fe370bb1d82f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( , , @@ -424,9 +424,8 @@ declare @llvm.riscv.vfwnmsac.nxv1f32.f16( define @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv1f32.f16( @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( @@ -472,9 +470,8 @@ declare @llvm.riscv.vfwnmsac.nxv2f32.f16( define @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv2f32.f16( @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( @@ -520,9 +516,8 @@ declare @llvm.riscv.vfwnmsac.nxv4f32.f16( define @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv4f32.f16( @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( @@ -568,9 +562,8 @@ declare @llvm.riscv.vfwnmsac.nxv8f32.f16( define @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv8f32.f16( @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( @@ -616,9 +608,8 @@ declare @llvm.riscv.vfwnmsac.nxv16f32.f16( define @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv16f32.f16( @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( define @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( @@ -664,9 +654,8 @@ declare @llvm.riscv.vfwnmsac.nxv1f64.f32( define @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv1f64.f32( @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( @@ -712,9 +700,8 @@ declare @llvm.riscv.vfwnmsac.nxv2f64.f32( define @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv2f64.f32( @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( @@ -760,9 +746,8 @@ declare @llvm.riscv.vfwnmsac.nxv4f64.f32( define @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv4f64.f32( @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v12, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( @@ -808,9 +792,8 @@ declare @llvm.riscv.vfwnmsac.nxv8f64.f32( define @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.nxv8f64.f32( @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( define @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, ft0, v16, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll index 0d7c2e7fe0867..e81121f848ddb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( , , @@ -423,9 +423,8 @@ declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16( define @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16( define @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16( @@ -471,9 +469,8 @@ declare @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16( define @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16( define @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16( @@ -519,9 +515,8 @@ declare @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16( define @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16( define @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16( @@ -567,9 +561,8 @@ declare @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16( define @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16( define @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16( @@ -615,9 +607,8 @@ declare @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16( define @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16( define @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16( @@ -663,9 +653,8 @@ declare @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32( define @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32( define @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32( @@ -711,9 +699,8 @@ declare @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32( define @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32( define @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32( @@ -759,9 +745,8 @@ declare @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( define @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32( define @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32( @@ -807,9 +791,8 @@ declare @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32( define @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32( define @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll index cdc77ffc55f8a..d4b0780f03c57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( , , @@ -423,9 +423,8 @@ declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16( define @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -448,9 +447,8 @@ declare @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16( define @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16( @@ -471,9 +469,8 @@ declare @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16( define @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -496,9 +493,8 @@ declare @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16( define @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16( @@ -519,9 +515,8 @@ declare @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16( define @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -544,9 +539,8 @@ declare @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16( define @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16( @@ -567,9 +561,8 @@ declare @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16( define @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -592,9 +585,8 @@ declare @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16( define @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16( @@ -615,9 +607,8 @@ declare @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16( define @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -640,9 +631,8 @@ declare @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16( define @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16( @@ -663,9 +653,8 @@ declare @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32( define @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -688,9 +677,8 @@ declare @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32( define @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32( @@ -711,9 +699,8 @@ declare @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32( define @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v10, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v10, v8, fa0 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret entry: @@ -736,9 +723,8 @@ declare @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32( define @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32( @@ -759,9 +745,8 @@ declare @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( define @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v12, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v12, v8, fa0 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret entry: @@ -784,9 +769,8 @@ declare @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32( define @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32( @@ -807,9 +791,8 @@ declare @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32( define @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v16, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v16, v8, fa0 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret entry: @@ -832,9 +815,8 @@ declare @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32( define @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll index 3234dec9db144..da2290be93d27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( , , @@ -416,9 +416,8 @@ declare @llvm.riscv.vfwsub.w.nxv1f32.f16( define @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f32.f16( @@ -440,9 +439,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( @@ -463,9 +461,8 @@ declare @llvm.riscv.vfwsub.w.nxv2f32.f16( define @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f32.f16( @@ -487,9 +484,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( @@ -510,9 +506,8 @@ declare @llvm.riscv.vfwsub.w.nxv4f32.f16( define @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f32.f16( @@ -534,9 +529,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( @@ -557,9 +551,8 @@ declare @llvm.riscv.vfwsub.w.nxv8f32.f16( define @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f32.f16( @@ -581,9 +574,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( @@ -604,9 +596,8 @@ declare @llvm.riscv.vfwsub.w.nxv16f32.f16( define @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv16f32.f16( @@ -628,9 +619,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( @@ -651,9 +641,8 @@ declare @llvm.riscv.vfwsub.w.nxv1f64.f32( define @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f64.f32( @@ -675,9 +664,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( @@ -698,9 +686,8 @@ declare @llvm.riscv.vfwsub.w.nxv2f64.f32( define @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f64.f32( @@ -722,9 +709,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( @@ -745,9 +731,8 @@ declare @llvm.riscv.vfwsub.w.nxv4f64.f32( define @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f64.f32( @@ -769,9 +754,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( @@ -792,9 +776,8 @@ declare @llvm.riscv.vfwsub.w.nxv8f64.f32( define @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.f32( @@ -816,9 +799,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( @@ -987,9 +969,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( @@ -1005,9 +986,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( @@ -1023,9 +1003,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( @@ -1041,9 +1020,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( @@ -1059,9 +1037,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( @@ -1077,9 +1054,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( @@ -1095,9 +1071,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( @@ -1113,9 +1088,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( @@ -1131,9 +1105,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll index 545bc4e4a53de..ec0bd527dafe6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( , , @@ -416,9 +416,8 @@ declare @llvm.riscv.vfwsub.w.nxv1f32.f16( define @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f32.f16( @@ -440,9 +439,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( @@ -463,9 +461,8 @@ declare @llvm.riscv.vfwsub.w.nxv2f32.f16( define @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f32.f16( @@ -487,9 +484,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( @@ -510,9 +506,8 @@ declare @llvm.riscv.vfwsub.w.nxv4f32.f16( define @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f32.f16( @@ -534,9 +529,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( @@ -557,9 +551,8 @@ declare @llvm.riscv.vfwsub.w.nxv8f32.f16( define @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f32.f16( @@ -581,9 +574,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( @@ -604,9 +596,8 @@ declare @llvm.riscv.vfwsub.w.nxv16f32.f16( define @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv16f32.f16( @@ -628,9 +619,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( define @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( @@ -651,9 +641,8 @@ declare @llvm.riscv.vfwsub.w.nxv1f64.f32( define @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv1f64.f32( @@ -675,9 +664,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( @@ -698,9 +686,8 @@ declare @llvm.riscv.vfwsub.w.nxv2f64.f32( define @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv2f64.f32( @@ -722,9 +709,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v10, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( @@ -745,9 +731,8 @@ declare @llvm.riscv.vfwsub.w.nxv4f64.f32( define @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv4f64.f32( @@ -769,9 +754,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v12, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( @@ -792,9 +776,8 @@ declare @llvm.riscv.vfwsub.w.nxv8f64.f32( define @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.f32( @@ -816,9 +799,8 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( define @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v16, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( @@ -987,9 +969,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( @@ -1005,9 +986,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( @@ -1023,9 +1003,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( @@ -1041,9 +1020,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( @@ -1059,9 +1037,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( @@ -1077,9 +1054,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( @@ -1095,9 +1071,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( @@ -1113,9 +1088,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( @@ -1131,9 +1105,8 @@ entry: define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( From 3dc6fd5151355c309f0c4595b63268138ac57910 Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Sun, 23 Jan 2022 09:44:09 +0000 Subject: [PATCH 271/946] [llvm-objcopy][MachO] Implement --update-section Implements `--update-section` which is currently supported for ELF for Mach-O as well Reviewed By: alexander-shaposhnikov Differential Revision: https://reviews.llvm.org/D117281 --- .../llvm-objcopy/MachO/update-section.test | 115 ++++++++++++++++++ .../tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 56 +++++++++ 2 files changed, 171 insertions(+) create mode 100644 llvm/test/tools/llvm-objcopy/MachO/update-section.test diff --git a/llvm/test/tools/llvm-objcopy/MachO/update-section.test b/llvm/test/tools/llvm-objcopy/MachO/update-section.test new file mode 100644 index 0000000000000..a4fa5423ee52b --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/MachO/update-section.test @@ -0,0 +1,115 @@ +# RUN: echo -n AAAB > %t.diff +# RUN: echo -n AAA > %t.smaller +# RUN: echo -n AAAAAAAAA > %t.larger + +# RUN: yaml2obj --docnum=1 %s -o %t + +# RUN: llvm-objcopy --update-section __TEXT,__text=%t.diff %t - | obj2yaml | FileCheck %s +# CHECK: content: '41414142' + +# RUN: llvm-objcopy --update-section __TEXT,__text=%t.smaller %t - | obj2yaml | FileCheck %s --check-prefix=SMALLER +# SMALLER: content: '414141' + +# RUN: not llvm-objcopy --update-section __TEXT,__text=%t.larger %t /dev/null 2>&1 | FileCheck %s --check-prefix=TOO-LARGE +# TOO-LARGE: error: {{.*}}new section cannot be larger than previous section + +# RUN: not llvm-objcopy --update-section __TEXT,__text=%t.noexist %t /dev/null + +# RUN: not llvm-objcopy --update-section __NOEXIST,__text=%t.diff %t /dev/null 2>&1 | FileCheck %s --check-prefix=NO-SEGMENT +# NO-SEGMENT: error: {{.*}}could not find segment with name '__NOEXIST' + +# RUN: not llvm-objcopy --update-section __TEXT,__noexist=%t.diff %t /dev/null 2>&1 | FileCheck %s --check-prefix=NO-SECTION +# NO-SECTION: error: {{.*}}could not find section with name '__noexist' + +# RUN: yaml2obj --docnum=2 %s -o %t + +# RUN: llvm-objcopy --update-section __TEXT,__text=%t.diff %t - | obj2yaml | FileCheck %s --check-prefix=FULL-SECNAME +# FULL-SECNAME: content: '41414142' + +# RUN: not llvm-objcopy --update-section __text=%t.dff %t /dev/null 2>&1 | FileCheck %s --check-prefix=NON-CANONICAL-SECNAME +# NON-CANONICAL-SECNAME: error: {{.*}}invalid section name '__text' (should be formatted as ',

') + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 1 + sizeofcmds: 152 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 0 + vmsize: 4 + fileoff: 184 + filesize: 4 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + content: '41414141' + size: 4 + offset: 184 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 1 + sizeofcmds: 312 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: '__TEXT' + vmaddr: 0 + vmsize: 12 + fileoff: 344 + filesize: 12 + maxprot: 7 + initprot: 7 + nsects: 3 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + content: 'AABBCCDD' + size: 4 + offset: 344 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + - sectname: __text + segname: __TEXT2 + addr: 0x0000000000000004 + content: '' + size: 0 + offset: 348 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp index 915394b65b12d..0f92ca516bef7 100644 --- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp +++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp @@ -317,6 +317,52 @@ static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) { return Error::success(); } +static Expected
findSection(StringRef SecName, Object &O) { + StringRef SegName; + std::tie(SegName, SecName) = SecName.split(","); + auto FoundSeg = + llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) { + return LC.getSegmentName() == SegName; + }); + if (FoundSeg == O.LoadCommands.end()) + return createStringError(errc::invalid_argument, + "could not find segment with name '%s'", + SegName.str().c_str()); + auto FoundSec = llvm::find_if(FoundSeg->Sections, + [SecName](const std::unique_ptr
&Sec) { + return Sec->Sectname == SecName; + }); + if (FoundSec == FoundSeg->Sections.end()) + return createStringError(errc::invalid_argument, + "could not find section with name '%s'", + SecName.str().c_str()); + + assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str()); + return *FoundSec->get(); +} + +static Error updateSection(StringRef SecName, StringRef Filename, Object &O) { + Expected
SecToUpdateOrErr = findSection(SecName, O); + + if (!SecToUpdateOrErr) + return SecToUpdateOrErr.takeError(); + Section &Sec = *SecToUpdateOrErr; + + ErrorOr> BufOrErr = + MemoryBuffer::getFile(Filename); + if (!BufOrErr) + return createFileError(Filename, errorCodeToError(BufOrErr.getError())); + std::unique_ptr Buf = std::move(*BufOrErr); + + if (Buf->getBufferSize() > Sec.Size) + return createStringError( + errc::invalid_argument, + "new section cannot be larger than previous section"); + Sec.Content = O.NewSectionsContents.save(Buf->getBuffer()); + Sec.Size = Sec.Content.size(); + return Error::success(); +} + // isValidMachOCannonicalName returns success if Name is a MachO cannonical name // (",
") and lengths of both segment and section names are // valid. @@ -374,6 +420,16 @@ static Error handleArgs(const CommonConfig &Config, return E; } + for (const auto &Flag : Config.UpdateSection) { + StringRef SectionName; + StringRef FileName; + std::tie(SectionName, FileName) = Flag.split('='); + if (Error E = isValidMachOCannonicalName(SectionName)) + return E; + if (Error E = updateSection(SectionName, FileName, Obj)) + return E; + } + if (Error E = processLoadCommands(MachOConfig, Obj)) return E; From a4f202549208b543ec4475c3210ecdbf84128776 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 11:34:25 +0000 Subject: [PATCH 272/946] [X86] Regenerate avx512-mask-op.ll Noticed on D86578 - several of the test cases were missing checks as they didn't start on a newline so the update script couldn't see them --- llvm/test/CodeGen/X86/avx512-mask-op.ll | 112 ++++++++++++++++++++++-- 1 file changed, 106 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 959ee566cfcfc..b93fcce26fbc6 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ +; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,KNL +; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW +; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86 @@ -596,7 +596,22 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 %resse = sext <2 x i1>%res to <2 x i64> ret <2 x i64> %resse -}define void @test6(<16 x i1> %mask) { +} + +define void @test6(<16 x i1> %mask) { +; CHECK-LABEL: test6: +; CHECK: ## %bb.0: ## %allocas +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 +; CHECK-NEXT: vpmovmskb %xmm0, %eax +; CHECK-NEXT: testl $21845, %eax ## imm = 0x5555 +; CHECK-NEXT: retq +; +; X86-LABEL: test6: +; X86: ## %bb.0: ## %allocas +; X86-NEXT: vpsllw $7, %xmm0, %xmm0 +; X86-NEXT: vpmovmskb %xmm0, %eax +; X86-NEXT: testl $21845, %eax ## imm = 0x5555 +; X86-NEXT: retl allocas: %a= and <16 x i1> %mask, %b = bitcast <16 x i1> %a to i16 @@ -609,6 +624,7 @@ true: false: ret void } + define void @test7(<8 x i1> %mask) { ; KNL-LABEL: test7: ; KNL: ## %bb.0: ## %allocas @@ -666,6 +682,7 @@ true: false: ret void } + define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; KNL-LABEL: test8: ; KNL: ## %bb.0: @@ -756,6 +773,7 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { %res = sext <16 x i1> %mix to <16 x i8> ret <16 x i8> %res } + define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { ; KNL-LABEL: test9: ; KNL: ## %bb.0: @@ -838,7 +856,89 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b ret <16 x i1>%c -}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { +} + +define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { +; KNL-LABEL: test10: +; KNL: ## %bb.0: +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: jg LBB19_1 +; KNL-NEXT: ## %bb.2: +; KNL-NEXT: vpmovsxwq %xmm1, %zmm0 +; KNL-NEXT: jmp LBB19_3 +; KNL-NEXT: LBB19_1: +; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 +; KNL-NEXT: LBB19_3: +; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpmovdw %zmm0, %ymm0 +; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; +; SKX-LABEL: test10: +; SKX: ## %bb.0: +; SKX-NEXT: cmpl %esi, %edi +; SKX-NEXT: jg LBB19_1 +; SKX-NEXT: ## %bb.2: +; SKX-NEXT: vpsllw $15, %xmm1, %xmm0 +; SKX-NEXT: jmp LBB19_3 +; SKX-NEXT: LBB19_1: +; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 +; SKX-NEXT: LBB19_3: +; SKX-NEXT: vpmovw2m %xmm0, %k0 +; SKX-NEXT: vpmovm2w %k0, %xmm0 +; SKX-NEXT: retq +; +; AVX512BW-LABEL: test10: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: jg LBB19_1 +; AVX512BW-NEXT: ## %bb.2: +; AVX512BW-NEXT: vpsllw $15, %xmm1, %xmm0 +; AVX512BW-NEXT: jmp LBB19_3 +; AVX512BW-NEXT: LBB19_1: +; AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX512BW-NEXT: LBB19_3: +; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 +; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 +; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: test10: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: jg LBB19_1 +; AVX512DQ-NEXT: ## %bb.2: +; AVX512DQ-NEXT: vpmovsxwq %xmm1, %zmm0 +; AVX512DQ-NEXT: jmp LBB19_3 +; AVX512DQ-NEXT: LBB19_1: +; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512DQ-NEXT: LBB19_3: +; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; +; X86-LABEL: test10: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; X86-NEXT: jg LBB19_1 +; X86-NEXT: ## %bb.2: +; X86-NEXT: vpsllw $15, %xmm1, %xmm0 +; X86-NEXT: jmp LBB19_3 +; X86-NEXT: LBB19_1: +; X86-NEXT: vpsllw $15, %xmm0, %xmm0 +; X86-NEXT: LBB19_3: +; X86-NEXT: vpmovw2m %xmm0, %k0 +; X86-NEXT: vpmovm2w %k0, %xmm0 +; X86-NEXT: retl %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b ret <8 x i1>%c From ff05b93a02d18c3b388658de2186f570d725ec71 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 12:45:12 +0000 Subject: [PATCH 273/946] [llvm-objdump] Use cast<> instead of dyn_cast<> to avoid dereference of nullptr The pointers are always dereferenced immediately, so assert the cast is correct instead of returning nullptr --- llvm/tools/llvm-objdump/MachODump.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index 193ba2da19410..31867625f0e5e 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -10231,12 +10231,12 @@ static void PrintMachHeader(const MachOObjectFile *Obj, bool verbose) { } void objdump::printMachOFileHeader(const object::ObjectFile *Obj) { - const MachOObjectFile *file = dyn_cast(Obj); + const MachOObjectFile *file = cast(Obj); PrintMachHeader(file, Verbose); } void objdump::printMachOLoadCommands(const object::ObjectFile *Obj) { - const MachOObjectFile *file = dyn_cast(Obj); + const MachOObjectFile *file = cast(Obj); uint32_t filetype = 0; uint32_t cputype = 0; if (file->is64Bit()) { From 20d46fbd4a51a0b80731268f8d72b62e87ead915 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 12:47:52 +0000 Subject: [PATCH 274/946] [CodeGenPrepare] Use dyn_cast result to check for null pointers Simplifies logic and helps the static analyzer correctly check for nullptr dereferences --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 747f4e4fdecca..28f24e5ea9088 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4168,11 +4168,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // We can get through binary operator, if it is legal. In other words, the // binary operator must have a nuw or nsw flag. - const BinaryOperator *BinOp = dyn_cast(Inst); - if (isa_and_nonnull(BinOp) && - ((!IsSExt && BinOp->hasNoUnsignedWrap()) || - (IsSExt && BinOp->hasNoSignedWrap()))) - return true; + if (const auto *BinOp = dyn_cast(Inst)) + if (isa(BinOp) && + ((!IsSExt && BinOp->hasNoUnsignedWrap()) || + (IsSExt && BinOp->hasNoSignedWrap()))) + return true; // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) if ((Inst->getOpcode() == Instruction::And || @@ -4181,10 +4181,10 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) if (Inst->getOpcode() == Instruction::Xor) { - const ConstantInt *Cst = dyn_cast(Inst->getOperand(1)); // Make sure it is not a NOT. - if (Cst && !Cst->getValue().isAllOnes()) - return true; + if (const auto *Cst = dyn_cast(Inst->getOperand(1))) + if (!Cst->getValue().isAllOnes()) + return true; } // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) From 946f29028e063c768dc9b8c6e87c2eb584993df6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 12:50:12 +0000 Subject: [PATCH 275/946] [llvm-objdump] Use cast<> instead of dyn_cast<> to avoid dereference of nullptr The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr --- llvm/tools/llvm-objdump/WasmDump.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-objdump/WasmDump.cpp b/llvm/tools/llvm-objdump/WasmDump.cpp index 28311361d97e9..df0a08e5b1dd0 100644 --- a/llvm/tools/llvm-objdump/WasmDump.cpp +++ b/llvm/tools/llvm-objdump/WasmDump.cpp @@ -20,7 +20,7 @@ using namespace llvm; using namespace llvm::object; void objdump::printWasmFileHeader(const object::ObjectFile *Obj) { - const auto *File = dyn_cast(Obj); + const auto *File = cast(Obj); outs() << "Program Header:\n"; outs() << "Version: 0x"; From 86497026a266f153d1c2b823fe7758acc4ab959d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 12:57:12 +0000 Subject: [PATCH 276/946] [clang-tidy] Use cast<>/castAs<> instead of dyn_cast<>/getAs<> to avoid dereference of nullptr The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr --- .../clang-tidy/abseil/DurationFactoryScaleCheck.cpp | 2 +- .../clang-tidy/readability/SuspiciousCallArgumentCheck.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp index c9f3a7db03461..dbc3cf2e6128f 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp @@ -192,7 +192,7 @@ void DurationFactoryScaleCheck::check(const MatchFinder::MatchResult &Result) { Result.Nodes.getNodeAs("div_binop")) { // We next handle division. // For division, we only check the RHS. - const auto *FloatLit = llvm::dyn_cast(DivBinOp->getRHS()); + const auto *FloatLit = llvm::cast(DivBinOp->getRHS()); llvm::Optional NewScale = getNewScale(Scale, 1.0 / FloatLit->getValueAsApproximateDouble()); diff --git a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp index 6ef10925c1336..4d7c3451acc7a 100644 --- a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp @@ -413,9 +413,9 @@ static bool areTypesCompatible(QualType ArgType, QualType ParamType, // Arithmetic types are interconvertible, except scoped enums. if (ParamType->isArithmeticType() && ArgType->isArithmeticType()) { if ((ParamType->isEnumeralType() && - ParamType->getAs()->getDecl()->isScoped()) || + ParamType->castAs()->getDecl()->isScoped()) || (ArgType->isEnumeralType() && - ArgType->getAs()->getDecl()->isScoped())) + ArgType->castAs()->getDecl()->isScoped())) return false; return true; From df0fd1c301d6a17c1cdeea1f19154e60a5b29f47 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 13:24:36 +0000 Subject: [PATCH 277/946] [clangd] Use castAs<> instead of getAs<> to avoid dereference of nullptr The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr --- clang-tools-extra/clangd/HeuristicResolver.cpp | 5 ++--- clang-tools-extra/clangd/Hover.cpp | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/HeuristicResolver.cpp b/clang-tools-extra/clangd/HeuristicResolver.cpp index 2505280ffa9aa..37e8f134efdfc 100644 --- a/clang-tools-extra/clangd/HeuristicResolver.cpp +++ b/clang-tools-extra/clangd/HeuristicResolver.cpp @@ -59,9 +59,8 @@ const Type *HeuristicResolver::getPointeeType(const Type *T) const { if (!T) return nullptr; - if (T->isPointerType()) { - return T->getAs()->getPointeeType().getTypePtrOrNull(); - } + if (T->isPointerType()) + return T->castAs()->getPointeeType().getTypePtrOrNull(); // Try to handle smart pointer types. diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index 1449faec559cd..58ef2e3feb99d 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -147,7 +147,7 @@ HoverInfo::PrintedType printType(QualType QT, ASTContext &ASTCtx, // FIXME: This doesn't handle composite types that contain a decltype in them. // We should rather have a printing policy for that. while (!QT.isNull() && QT->isDecltypeType()) - QT = QT->getAs()->getUnderlyingType(); + QT = QT->castAs()->getUnderlyingType(); HoverInfo::PrintedType Result; llvm::raw_string_ostream OS(Result.Type); // Special case: if the outer type is a tag type without qualifiers, then From c93491352cf3146559de7755283f0dd259392126 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 15:10:33 +0000 Subject: [PATCH 278/946] [lldb] CxxModuleHandler - use cast<> instead of dyn_cast<> to avoid dereference of nullptr The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr --- lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp index 74dd04600b4be..fecffd1183f89 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp @@ -138,7 +138,7 @@ getEqualLocalDeclContext(Sema &sema, DeclContext *foreign_ctxt) { // We currently only support building namespaces. if (foreign_ctxt->isNamespace()) { - NamedDecl *ns = llvm::dyn_cast(foreign_ctxt); + NamedDecl *ns = llvm::cast(foreign_ctxt); llvm::StringRef ns_name = ns->getName(); auto lookup_result = emulateLookupInCtxt(sema, ns_name, *parent); From d7aa402b4b8a325a68c20d0300ac6bc664766be0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 15:11:08 +0000 Subject: [PATCH 279/946] [lldb] PdbAstBuilder - use cast<> instead of dyn_cast<> to avoid dereference of nullptr The pointers are dereferenced immediately, so assert the cast is correct instead of returning nullptr --- lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index 9473befa6cc34..dc0969a0ce7c6 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -1081,7 +1081,7 @@ PdbAstBuilder::GetOrCreateFunctionDecl(PdbCompilandSymId func_id) { clang::FunctionDecl *function_decl = nullptr; if (parent->isRecord()) { - clang::QualType parent_qt = llvm::dyn_cast(parent) + clang::QualType parent_qt = llvm::cast(parent) ->getTypeForDecl() ->getCanonicalTypeInternal(); lldb::opaque_compiler_type_t parent_opaque_ty = @@ -1318,7 +1318,7 @@ void PdbAstBuilder::ParseAllNamespacesPlusChildrenOf( if (!context->isNamespace()) continue; - clang::NamespaceDecl *ns = llvm::dyn_cast(context); + clang::NamespaceDecl *ns = llvm::cast(context); std::string actual_ns = ns->getQualifiedNameAsString(); if (llvm::StringRef(actual_ns).startswith(*parent)) { clang::QualType qt = GetOrCreateType(tid); From d13847bbe5e632ec8f62abc81f74b9351a56d28c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 15:12:44 +0000 Subject: [PATCH 280/946] [lldb] TerminalState::Save - fix unused variable warning Non-POSIX target builds don't use the file descriptor --- lldb/source/Host/common/Terminal.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp index 2a1c12e667bcf..831e9dff4eb18 100644 --- a/lldb/source/Host/common/Terminal.cpp +++ b/lldb/source/Host/common/Terminal.cpp @@ -417,8 +417,8 @@ bool TerminalState::Save(Terminal term, bool save_process_group) { Clear(); m_tty = term; if (m_tty.IsATerminal()) { - int fd = m_tty.GetFileDescriptor(); #if LLDB_ENABLE_POSIX + int fd = m_tty.GetFileDescriptor(); m_tflags = ::fcntl(fd, F_GETFL, 0); #if LLDB_ENABLE_TERMIOS std::unique_ptr new_data{new Terminal::Data()}; From 49d38b1d618c02964af93068ee8e1ac753722104 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 15:14:10 +0000 Subject: [PATCH 281/946] Fix "not all control paths return a value" warning. NFC. --- lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp index c8063915b178d..d1d844bb4ca41 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp @@ -72,6 +72,7 @@ ConstString GenericBitsetFrontEnd::GetDataContainerMemberName() { case StdLib::LibStdcpp: return ConstString("_M_w"); } + llvm_unreachable("Unknown StdLib enum"); } bool GenericBitsetFrontEnd::Update() { From 938944445a1bad0f4467528015c8737227bbc9a7 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sat, 22 Jan 2022 13:06:05 -0500 Subject: [PATCH 282/946] [libc++] Mark LWG3541 as "Complete". NFC. Differential Revision: https://reviews.llvm.org/D117956 --- libcxx/docs/Status/Cxx2bIssues.csv | 4 ++-- libcxx/include/__iterator/readable_traits.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv index a91e3b52a1705..27b109eeb5dd9 100644 --- a/libcxx/docs/Status/Cxx2bIssues.csv +++ b/libcxx/docs/Status/Cxx2bIssues.csv @@ -85,7 +85,7 @@ `3536 `__,"Should ``chrono::from_stream()`` assign zero to duration for failure?","June 2021","","","|chrono|" `3539 `__,"``format_to`` must not copy models of ``output_iterator``","June 2021","","","|format|" `3540 `__,"§[format.arg] There should be no const in ``basic_format_arg(const T* p)``","June 2021","","","|format|" -`3541 `__,"``indirectly_readable_traits`` should be SFINAE-friendly for all types","June 2021","","","|ranges|" +`3541 `__,"``indirectly_readable_traits`` should be SFINAE-friendly for all types","June 2021","|Complete|","14.0","|ranges|" `3542 `__,"``basic_format_arg`` mishandles ``basic_string_view`` with custom traits","June 2021","|Complete|","14.0","|format|" `3543 `__,"Definition of when ``counted_iterators`` refer to the same sequence isn't quite right","June 2021","","","|ranges|" `3544 `__,"``format-arg-store::args`` is unintentionally not exposition-only","June 2021","","","|format|" @@ -138,4 +138,4 @@ `3595 `__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","","","|ranges|" "","","","","" `3645 `__,"``resize_and_overwrite`` is overspecified to call its callback with lvalues", "Not voted in","|Complete|","14.0","" -"","","","","" \ No newline at end of file +"","","","","" diff --git a/libcxx/include/__iterator/readable_traits.h b/libcxx/include/__iterator/readable_traits.h index 90121bea80736..13f323e295ba1 100644 --- a/libcxx/include/__iterator/readable_traits.h +++ b/libcxx/include/__iterator/readable_traits.h @@ -57,14 +57,14 @@ template<__has_member_element_type _Tp> struct indirectly_readable_traits<_Tp> : __cond_value_type {}; -// Pre-emptively applies LWG3541 template<__has_member_value_type _Tp> -requires __has_member_element_type<_Tp> + requires __has_member_element_type<_Tp> struct indirectly_readable_traits<_Tp> {}; + template<__has_member_value_type _Tp> -requires __has_member_element_type<_Tp> && - same_as, - remove_cv_t> + requires __has_member_element_type<_Tp> && + same_as, + remove_cv_t> struct indirectly_readable_traits<_Tp> : __cond_value_type {}; From 5d78fef6db15f7ba6642431fa3d07ddeda98d4f5 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Sat, 22 Jan 2022 15:24:53 -0500 Subject: [PATCH 283/946] [libc++] Fix LWG3437 "__cpp_lib_polymorphic_allocator is in the wrong header" https://cplusplus.github.io/LWG/issue3437 Differential Revision: https://reviews.llvm.org/D117963 --- libcxx/docs/Status/Cxx2bIssues.csv | 2 +- libcxx/include/version | 2 +- .../memory.version.pass.cpp | 39 ------------------- .../generate_feature_test_macro_components.py | 2 +- 4 files changed, 3 insertions(+), 42 deletions(-) diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv index 27b109eeb5dd9..27bc1e5832967 100644 --- a/libcxx/docs/Status/Cxx2bIssues.csv +++ b/libcxx/docs/Status/Cxx2bIssues.csv @@ -31,7 +31,7 @@ "`3427 `__","``operator<=>(const shared_ptr&, nullptr_t)`` definition ill-formed","November 2020","","","|spaceship|" "`3428 `__","``single_view``'s in place constructor should be explicit","November 2020","","","|ranges|" "`3434 `__","``ios_base`` never reclaims memory for iarray and parray","November 2020","","" -"`3437 `__","``__cpp_lib_polymorphic_allocator`` is in the wrong header","November 2020","","" +"`3437 `__","``__cpp_lib_polymorphic_allocator`` is in the wrong header","November 2020","|Complete|","14.0" "`3446 `__","``indirectly_readable_traits`` ambiguity for types with both ``value_type`` and ``element_type``","November 2020","","","|ranges|" "`3448 `__","``transform_view``'s ``sentinel`` not comparable with ``iterator``","November 2020","","","|ranges|" "`3449 `__","``take_view`` and ``take_while_view``'s ``sentinel`` not comparable with their ``const iterator``","November 2020","","","|ranges|" diff --git a/libcxx/include/version b/libcxx/include/version index 39f1b8eb19ef1..0c23656128536 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -128,7 +128,7 @@ __cpp_lib_null_iterators 201304L __cpp_lib_optional 201606L __cpp_lib_out_ptr 202106L __cpp_lib_parallel_algorithm 201603L -__cpp_lib_polymorphic_allocator 201902L +__cpp_lib_polymorphic_allocator 201902L __cpp_lib_quoted_string_io 201304L __cpp_lib_ranges 201811L diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp index 415eaa385767e..20fe2514b4eff 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp @@ -26,7 +26,6 @@ __cpp_lib_enable_shared_from_this 201603L [C++17] __cpp_lib_make_unique 201304L [C++14] __cpp_lib_out_ptr 202106L [C++2b] - __cpp_lib_polymorphic_allocator 201902L [C++20] __cpp_lib_ranges 201811L [C++20] __cpp_lib_raw_memory_algorithms 201606L [C++17] __cpp_lib_shared_ptr_arrays 201611L [C++17] @@ -82,10 +81,6 @@ # error "__cpp_lib_out_ptr should not be defined before c++2b" # endif -# ifdef __cpp_lib_polymorphic_allocator -# error "__cpp_lib_polymorphic_allocator should not be defined before c++20" -# endif - # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif @@ -159,10 +154,6 @@ # error "__cpp_lib_out_ptr should not be defined before c++2b" # endif -# ifdef __cpp_lib_polymorphic_allocator -# error "__cpp_lib_polymorphic_allocator should not be defined before c++20" -# endif - # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif @@ -248,10 +239,6 @@ # error "__cpp_lib_out_ptr should not be defined before c++2b" # endif -# ifdef __cpp_lib_polymorphic_allocator -# error "__cpp_lib_polymorphic_allocator should not be defined before c++20" -# endif - # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif @@ -364,19 +351,6 @@ # error "__cpp_lib_out_ptr should not be defined before c++2b" # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_polymorphic_allocator -# error "__cpp_lib_polymorphic_allocator should be defined in c++20" -# endif -# if __cpp_lib_polymorphic_allocator != 201902L -# error "__cpp_lib_polymorphic_allocator should have the value 201902L in c++20" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_polymorphic_allocator -# error "__cpp_lib_polymorphic_allocator should not be defined because it is unimplemented in libc++!" -# endif -# endif - # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_ranges # error "__cpp_lib_ranges should be defined in c++20" @@ -528,19 +502,6 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_polymorphic_allocator -# error "__cpp_lib_polymorphic_allocator should be defined in c++2b" -# endif -# if __cpp_lib_polymorphic_allocator != 201902L -# error "__cpp_lib_polymorphic_allocator should have the value 201902L in c++2b" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_polymorphic_allocator -# error "__cpp_lib_polymorphic_allocator should not be defined because it is unimplemented in libc++!" -# endif -# endif - # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_ranges # error "__cpp_lib_ranges should be defined in c++2b" diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 7047f3f4b0cc1..7b2fc0b621940 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -508,7 +508,7 @@ def add_version_header(tc): }, { "name": "__cpp_lib_polymorphic_allocator", "values": { "c++20": 201902 }, - "headers": ["memory"], + "headers": ["memory_resource"], "unimplemented": True, }, { "name": "__cpp_lib_quoted_string_io", From d4ed3eff9f9c4d00689e34712db8ac0ca65ddb26 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 15:34:41 +0000 Subject: [PATCH 284/946] [X86] Add vector signbit parity checks for non-popcnt targets Noticed while looking at D117983 - we miss some parity patterns with/without popcnt --- llvm/test/CodeGen/X86/parity-vec.ll | 165 +++++++++++++++++++--------- 1 file changed, 115 insertions(+), 50 deletions(-) diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll index e52a32d261ed2..60d9d4be68e79 100644 --- a/llvm/test/CodeGen/X86/parity-vec.ll +++ b/llvm/test/CodeGen/X86/parity-vec.ll @@ -1,27 +1,44 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt,+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-popcnt,+sse2 | FileCheck %s --check-prefix=NOPOPCNT +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt,+sse2 | FileCheck %s --check-prefix=POPCNT define i1 @noncanonical_parity(<16 x i1> %x) { -; CHECK-LABEL: noncanonical_parity: -; CHECK: # %bb.0: -; CHECK-NEXT: psllw $7, %xmm0 -; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntl %eax, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: retq +; NOPOPCNT-LABEL: noncanonical_parity: +; NOPOPCNT: # %bb.0: +; NOPOPCNT-NEXT: psllw $7, %xmm0 +; NOPOPCNT-NEXT: pmovmskb %xmm0, %eax +; NOPOPCNT-NEXT: xorb %ah, %al +; NOPOPCNT-NEXT: setnp %al +; NOPOPCNT-NEXT: retq +; +; POPCNT-LABEL: noncanonical_parity: +; POPCNT: # %bb.0: +; POPCNT-NEXT: psllw $7, %xmm0 +; POPCNT-NEXT: pmovmskb %xmm0, %eax +; POPCNT-NEXT: popcntl %eax, %eax +; POPCNT-NEXT: andl $1, %eax +; POPCNT-NEXT: # kill: def $al killed $al killed $eax +; POPCNT-NEXT: retq %r = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x) ret i1 %r } define i1 @canonical_parity(<16 x i1> %x) { -; CHECK-LABEL: canonical_parity: -; CHECK: # %bb.0: -; CHECK-NEXT: psllw $7, %xmm0 -; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntl %eax, %eax -; CHECK-NEXT: testb $1, %al -; CHECK-NEXT: setne %al -; CHECK-NEXT: retq +; NOPOPCNT-LABEL: canonical_parity: +; NOPOPCNT: # %bb.0: +; NOPOPCNT-NEXT: psllw $7, %xmm0 +; NOPOPCNT-NEXT: pmovmskb %xmm0, %eax +; NOPOPCNT-NEXT: xorb %ah, %al +; NOPOPCNT-NEXT: setnp %al +; NOPOPCNT-NEXT: retq +; +; POPCNT-LABEL: canonical_parity: +; POPCNT: # %bb.0: +; POPCNT-NEXT: psllw $7, %xmm0 +; POPCNT-NEXT: pmovmskb %xmm0, %eax +; POPCNT-NEXT: popcntl %eax, %eax +; POPCNT-NEXT: testb $1, %al +; POPCNT-NEXT: setne %al +; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) %i3 = and i16 %i2, 1 @@ -29,13 +46,37 @@ define i1 @canonical_parity(<16 x i1> %x) { ret i1 %i4 } define i1 @canonical_parity_noncanonical_pred(<16 x i1> %x) { -; CHECK-LABEL: canonical_parity_noncanonical_pred: -; CHECK: # %bb.0: -; CHECK-NEXT: psllw $7, %xmm0 -; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntl %eax, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: retq +; NOPOPCNT-LABEL: canonical_parity_noncanonical_pred: +; NOPOPCNT: # %bb.0: +; NOPOPCNT-NEXT: psllw $7, %xmm0 +; NOPOPCNT-NEXT: pmovmskb %xmm0, %eax +; NOPOPCNT-NEXT: movl %eax, %ecx +; NOPOPCNT-NEXT: shrl %ecx +; NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555 +; NOPOPCNT-NEXT: subl %ecx, %eax +; NOPOPCNT-NEXT: movl %eax, %ecx +; NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333 +; NOPOPCNT-NEXT: shrl $2, %eax +; NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333 +; NOPOPCNT-NEXT: addl %ecx, %eax +; NOPOPCNT-NEXT: movl %eax, %ecx +; NOPOPCNT-NEXT: shrl $4, %ecx +; NOPOPCNT-NEXT: addl %eax, %ecx +; NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F +; NOPOPCNT-NEXT: movl %ecx, %eax +; NOPOPCNT-NEXT: shll $8, %eax +; NOPOPCNT-NEXT: addl %ecx, %eax +; NOPOPCNT-NEXT: shrl $8, %eax +; NOPOPCNT-NEXT: # kill: def $al killed $al killed $eax +; NOPOPCNT-NEXT: retq +; +; POPCNT-LABEL: canonical_parity_noncanonical_pred: +; POPCNT: # %bb.0: +; POPCNT-NEXT: psllw $7, %xmm0 +; POPCNT-NEXT: pmovmskb %xmm0, %eax +; POPCNT-NEXT: popcntl %eax, %eax +; POPCNT-NEXT: # kill: def $al killed $al killed $eax +; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) %i3 = and i16 %i2, 1 @@ -44,28 +85,44 @@ define i1 @canonical_parity_noncanonical_pred(<16 x i1> %x) { } define i1 @noncanonical_nonparity(<16 x i1> %x) { -; CHECK-LABEL: noncanonical_nonparity: -; CHECK: # %bb.0: -; CHECK-NEXT: psllw $7, %xmm0 -; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntl %eax, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: xorb $1, %al -; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: retq +; NOPOPCNT-LABEL: noncanonical_nonparity: +; NOPOPCNT: # %bb.0: +; NOPOPCNT-NEXT: psllw $7, %xmm0 +; NOPOPCNT-NEXT: pmovmskb %xmm0, %eax +; NOPOPCNT-NEXT: xorb %ah, %al +; NOPOPCNT-NEXT: setp %al +; NOPOPCNT-NEXT: retq +; +; POPCNT-LABEL: noncanonical_nonparity: +; POPCNT: # %bb.0: +; POPCNT-NEXT: psllw $7, %xmm0 +; POPCNT-NEXT: pmovmskb %xmm0, %eax +; POPCNT-NEXT: popcntl %eax, %eax +; POPCNT-NEXT: andl $1, %eax +; POPCNT-NEXT: xorb $1, %al +; POPCNT-NEXT: # kill: def $al killed $al killed $eax +; POPCNT-NEXT: retq %r.inv = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x) %r = xor i1 %r.inv, -1 ret i1 %r } define i1 @canonical_nonparity(<16 x i1> %x) { -; CHECK-LABEL: canonical_nonparity: -; CHECK: # %bb.0: -; CHECK-NEXT: psllw $7, %xmm0 -; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntl %eax, %eax -; CHECK-NEXT: testb $1, %al -; CHECK-NEXT: sete %al -; CHECK-NEXT: retq +; NOPOPCNT-LABEL: canonical_nonparity: +; NOPOPCNT: # %bb.0: +; NOPOPCNT-NEXT: psllw $7, %xmm0 +; NOPOPCNT-NEXT: pmovmskb %xmm0, %eax +; NOPOPCNT-NEXT: xorb %ah, %al +; NOPOPCNT-NEXT: setp %al +; NOPOPCNT-NEXT: retq +; +; POPCNT-LABEL: canonical_nonparity: +; POPCNT: # %bb.0: +; POPCNT-NEXT: psllw $7, %xmm0 +; POPCNT-NEXT: pmovmskb %xmm0, %eax +; POPCNT-NEXT: popcntl %eax, %eax +; POPCNT-NEXT: testb $1, %al +; POPCNT-NEXT: sete %al +; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) %i3 = and i16 %i2, 1 @@ -73,15 +130,23 @@ define i1 @canonical_nonparity(<16 x i1> %x) { ret i1 %i4 } define i1 @canonical_nonparity_noncanonical_pred(<16 x i1> %x) { -; CHECK-LABEL: canonical_nonparity_noncanonical_pred: -; CHECK: # %bb.0: -; CHECK-NEXT: psllw $7, %xmm0 -; CHECK-NEXT: pmovmskb %xmm0, %eax -; CHECK-NEXT: popcntl %eax, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: xorb $1, %al -; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: retq +; NOPOPCNT-LABEL: canonical_nonparity_noncanonical_pred: +; NOPOPCNT: # %bb.0: +; NOPOPCNT-NEXT: psllw $7, %xmm0 +; NOPOPCNT-NEXT: pmovmskb %xmm0, %eax +; NOPOPCNT-NEXT: xorb %ah, %al +; NOPOPCNT-NEXT: setp %al +; NOPOPCNT-NEXT: retq +; +; POPCNT-LABEL: canonical_nonparity_noncanonical_pred: +; POPCNT: # %bb.0: +; POPCNT-NEXT: psllw $7, %xmm0 +; POPCNT-NEXT: pmovmskb %xmm0, %eax +; POPCNT-NEXT: popcntl %eax, %eax +; POPCNT-NEXT: andl $1, %eax +; POPCNT-NEXT: xorb $1, %al +; POPCNT-NEXT: # kill: def $al killed $al killed $eax +; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) %i3 = and i16 %i2, 1 From eb3f20e8fa4b76e0103f15623a2fc3b27fb03f85 Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Fri, 21 Jan 2022 07:59:27 +0000 Subject: [PATCH 285/946] [clang-tidy] Remove gsl::at suggestion from cppcoreguidelines-pro-bounds-constant-array-index Currently the fix hint is hardcoded to gsl::at(). This poses a problem for people who, for a number of reasons, don't want or cannot use the GSL library (introducing a new third-party dependency into a project is not a minor task). In these situations, the fix hint does more harm than good as it creates confusion as to what the fix should be. People can even misinterpret the fix "gsl::at" as e.g. "std::array::at", which can lead to even more trouble (e.g. when having guidelines that disallow exceptions). Furthermore, this is not a requirement from the C++ Core Guidelines. simply that array indexing needs to be safe. Each project should be able to decide upon a strategy for safe indexing. The fix-it is kept for people who want to use the GSL library. Differential Revision: https://reviews.llvm.org/D117857 --- .../cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp | 3 +-- clang-tools-extra/docs/ReleaseNotes.rst | 6 ++++++ .../cppcoreguidelines-pro-bounds-constant-array-index.rst | 2 ++ ...guidelines-pro-bounds-constant-array-index-gslheader.cpp | 6 +++--- .../cppcoreguidelines-pro-bounds-constant-array-index.cpp | 6 +++--- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp index 9cbe1fa65c139..59886ee4a3ebc 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp @@ -76,8 +76,7 @@ void ProBoundsConstantArrayIndexCheck::check( auto Diag = diag(Matched->getExprLoc(), "do not use array subscript when the index is " - "not an integer constant expression; use gsl::at() " - "instead"); + "not an integer constant expression"); if (!GslHeader.empty()) { Diag << FixItHint::CreateInsertion(BaseRange.getBegin(), "gsl::at(") << FixItHint::CreateReplacement( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index e3c1c4b9411bb..1f7228d5732bd 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -159,6 +159,12 @@ Changes in existing checks - Removed default setting ``cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors = "true"``, to match the current state of the C++ Core Guidelines. +- Removed suggestion ``use gsl::at`` from warning message in the + ``cppcoreguidelines-pro-bounds-constant-array-index`` check, since that is not + a requirement from the C++ Core Guidelines. This allows people to choose + their own safe indexing strategy. The fix-it is kept for those who want to + use the GSL library. + - Updated :doc:`google-readability-casting ` to diagnose and fix functional casts, to achieve feature parity with the corresponding ``cpplint.py`` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst index 4528f2ac6bef6..2a598f2592019 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst @@ -11,6 +11,8 @@ arrays, see the `-Warray-bounds` Clang diagnostic. This rule is part of the "Bounds safety" profile of the C++ Core Guidelines, see https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Pro-bounds-arrayindex. +Optionally, this check can generate fixes using ``gsl::at`` for indexing. + Options ------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp index 71b957d84740b..87550cbe84335 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp @@ -27,10 +27,10 @@ constexpr int const_index(int base) { void f(std::array a, int pos) { a [ pos / 2 /*comment*/] = 1; - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead [cppcoreguidelines-pro-bounds-constant-array-index] + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression [cppcoreguidelines-pro-bounds-constant-array-index] // CHECK-FIXES: gsl::at(a, pos / 2 /*comment*/) = 1; int j = a[pos - 1]; - // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression // CHECK-FIXES: int j = gsl::at(a, pos - 1); a.at(pos-1) = 2; // OK, at() instead of [] @@ -54,7 +54,7 @@ void g() { int a[10]; for (int i = 0; i < 10; ++i) { a[i] = i; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression // CHECK-FIXES: gsl::at(a, i) = i; gsl::at(a, i) = i; // OK, gsl::at() instead of [] } diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp index 351941099343a..7d5390ddecfb9 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp @@ -25,9 +25,9 @@ constexpr int const_index(int base) { void f(std::array a, int pos) { a [ pos / 2 /*comment*/] = 1; - // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead [cppcoreguidelines-pro-bounds-constant-array-index] + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression [cppcoreguidelines-pro-bounds-constant-array-index] int j = a[pos - 1]; - // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression a.at(pos-1) = 2; // OK, at() instead of [] gsl::at(a, pos-1) = 2; // OK, gsl::at() instead of [] @@ -50,7 +50,7 @@ void g() { int a[10]; for (int i = 0; i < 10; ++i) { a[i] = i; - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression // CHECK-FIXES: gsl::at(a, i) = i; gsl::at(a, i) = i; // OK, gsl::at() instead of [] } From 153359180a9d5a6cea3985db5d3396218dc6252d Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Thu, 20 Jan 2022 21:50:26 +0100 Subject: [PATCH 286/946] [AVR] Remove regalloc workaround for LDDWRdPtrQ Background: https://github.com/avr-rust/rust-legacy-fork/issues/126 In short, this workaround was introduced to fix a "ran out of registers during regalloc" issue. The root cause has since been fixed in https://reviews.llvm.org/D54218 so this workaround can be removed. There is one test that changes a little bit, removing a single instruction. I also compiled compiler-rt before and after this patch but didn't see a difference. So presumably the impact is very low. Still, it's nice to be able to remove such a workaround. Differential Revision: https://reviews.llvm.org/D117831 --- llvm/lib/Target/AVR/AVRInstrInfo.td | 2 +- llvm/lib/Target/AVR/AVRRegisterInfo.td | 20 -------------------- llvm/test/CodeGen/AVR/lpmx.ll | 22 ++++++++++------------ 3 files changed, 11 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index a365bc868683d..5cbf3baef5466 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1394,7 +1394,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { // ldd Rd, P+q // ldd Rd+1, P+q+1 let Constraints = "@earlyclobber $dst" in def LDDWRdPtrQ - : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND + : Pseudo<(outs DREGS : $dst), (ins memri : $memri), diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td index bb4e86ca0536f..c5fda788fe4d7 100644 --- a/llvm/lib/Target/AVR/AVRRegisterInfo.td +++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td @@ -178,26 +178,6 @@ def DREGSMOVW : RegisterClass<"AVR", [i16], 8, R29R28, R17R16, R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2, R1R0)>; -// The 16-bit DREGS register class, excluding the Z pointer register. -// -// This is used by instructions which cause high pointer register -// contention which leads to an assertion in the register allocator. -// -// There is no technical reason why instructions that use this class -// cannot use Z; it's simply a workaround a regalloc bug. -// -// More information can be found in PR39553. -def DREGS_WITHOUT_YZ_WORKAROUND - : RegisterClass<"AVR", [i16], 8, - ( - // Return value and arguments. - add R25R24, R19R18, R21R20, R23R22, - // Scratch registers. - R27R26, - // Callee saved registers. - R17R16, R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2, - R1R0)>; - // 16-bit register class for immediate instructions. def DLDREGS : RegisterClass<"AVR", [i16], 8, ( diff --git a/llvm/test/CodeGen/AVR/lpmx.ll b/llvm/test/CodeGen/AVR/lpmx.ll index 4a78ce705e7e5..c5162e2efb503 100644 --- a/llvm/test/CodeGen/AVR/lpmx.ll +++ b/llvm/test/CodeGen/AVR/lpmx.ll @@ -22,13 +22,12 @@ define i16 @foo0(i16 %a) addrspace(1) { ; CHECK-O0-NEXT: out 61, r28 ; CHECK-O0-NEXT: std Y+1, r24 ; CHECK-O0-NEXT: std Y+2, r25 -; CHECK-O0-NEXT: ldd r24, Y+1 -; CHECK-O0-NEXT: ldd r25, Y+2 -; CHECK-O0-NEXT: lsl r24 -; CHECK-O0-NEXT: rol r25 -; CHECK-O0-NEXT: subi r24, -lo8(arr0) -; CHECK-O0-NEXT: sbci r25, -hi8(arr0) -; CHECK-O0-NEXT: movw r30, r24 +; CHECK-O0-NEXT: ldd r30, Y+1 +; CHECK-O0-NEXT: ldd r31, Y+2 +; CHECK-O0-NEXT: lsl r30 +; CHECK-O0-NEXT: rol r31 +; CHECK-O0-NEXT: subi r30, -lo8(arr0) +; CHECK-O0-NEXT: sbci r31, -hi8(arr0) ; CHECK-O0-NEXT: lpm r24, Z+ ; CHECK-O0-NEXT: lpm r25, Z ; CHECK-O0-NEXT: adiw r28, 2 @@ -95,11 +94,10 @@ define i8 @foo1(i16 %a) addrspace(1) { ; CHECK-O0-NEXT: out 61, r28 ; CHECK-O0-NEXT: std Y+1, r24 ; CHECK-O0-NEXT: std Y+2, r25 -; CHECK-O0-NEXT: ldd r24, Y+1 -; CHECK-O0-NEXT: ldd r25, Y+2 -; CHECK-O0-NEXT: subi r24, -lo8(arr1) -; CHECK-O0-NEXT: sbci r25, -hi8(arr1) -; CHECK-O0-NEXT: movw r30, r24 +; CHECK-O0-NEXT: ldd r30, Y+1 +; CHECK-O0-NEXT: ldd r31, Y+2 +; CHECK-O0-NEXT: subi r30, -lo8(arr1) +; CHECK-O0-NEXT: sbci r31, -hi8(arr1) ; CHECK-O0-NEXT: lpm r24, Z ; CHECK-O0-NEXT: adiw r28, 2 ; CHECK-O0-NEXT: in r0, 63 From 116ab78694dd2ad903c3fb101d48e01855282bf8 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Sun, 16 Jan 2022 08:42:48 +0100 Subject: [PATCH 287/946] [AVR] Make use of the constant value 0 in R1 The register R1 is defined to have the constant value 0 in the avr-gcc calling convention (which we follow). Unfortunately, we don't really make use of it. This patch replaces `LDI 0` instructions with a copy from R1. This reduces code size: my AVR build of compiler-rt goes from 50660 to 50240 bytes of code size, which is a 0.8% reduction. Presumably it will also improve execution speed, although I didn't measure this. Differential Revision: https://reviews.llvm.org/D117425 --- llvm/lib/Target/AVR/AVRISelLowering.cpp | 14 ++++++++++++++ llvm/lib/Target/AVR/AVRISelLowering.h | 2 ++ llvm/lib/Target/AVR/AVRInstrInfo.td | 4 ++++ llvm/test/CodeGen/AVR/smul-with-overflow.ll | 2 +- llvm/test/CodeGen/AVR/store-undef.ll | 3 +-- llvm/test/CodeGen/AVR/umul-with-overflow.ll | 2 +- 6 files changed, 23 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp index 894f4829fe3f1..a58fedf6cd364 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1695,6 +1695,18 @@ MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI, return BB; } +// Insert a read from R1, which almost always contains the value 0. +MachineBasicBlock * +AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + MachineBasicBlock::iterator I(MI); + BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY)) + .add(MI.getOperand(0)) + .addReg(AVR::R1); + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock * AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -1717,6 +1729,8 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case AVR::MULRdRr: case AVR::MULSRdRr: return insertMul(MI, MBB); + case AVR::CopyR1: + return insertCopyR1(MI, MBB); } assert((Opc == AVR::Select16 || Opc == AVR::Select8) && diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h index 223a47372ef7b..116417b615669 100644 --- a/llvm/lib/Target/AVR/AVRISelLowering.h +++ b/llvm/lib/Target/AVR/AVRISelLowering.h @@ -187,6 +187,8 @@ class AVRTargetLowering : public TargetLowering { private: MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *insertCopyR1(MachineInstr &MI, + MachineBasicBlock *BB) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 5cbf3baef5466..2b96dc0b833ad 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -2390,6 +2390,10 @@ def Asr16 : ShiftPseudo<(outs DREGS : $src, i8 : $cnt))]>; +// lowered to a copy from R1, which contains the value zero. +let usesCustomInserter=1 in +def CopyR1 : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AVR/smul-with-overflow.ll b/llvm/test/CodeGen/AVR/smul-with-overflow.ll index f2d29161b4d32..cffb84fa0f212 100644 --- a/llvm/test/CodeGen/AVR/smul-with-overflow.ll +++ b/llvm/test/CodeGen/AVR/smul-with-overflow.ll @@ -18,7 +18,7 @@ entry-block: ; CHECK: ldi [[RET:r[0-9]+]], 1 ; CHECK: cp {{.*}}[[HIGH]], {{.*}}[[LOW]] ; CHECK: brne [[LABEL:.LBB[_0-9]+]] -; CHECK: ldi {{.*}}[[RET]], 0 +; CHECK: mov {{.*}}[[RET]], r1 ; CHECK: {{.*}}[[LABEL]] ; CHECK: ret } diff --git a/llvm/test/CodeGen/AVR/store-undef.ll b/llvm/test/CodeGen/AVR/store-undef.ll index 1f395b331ca26..4ea1a572a02d5 100644 --- a/llvm/test/CodeGen/AVR/store-undef.ll +++ b/llvm/test/CodeGen/AVR/store-undef.ll @@ -6,8 +6,7 @@ ; CHECK-LABEL: foo define void @foo() { - ; CHECK: ldi [[SRC:r[0-9]+]], 0 - ; CHECK-NEXT: st [[PTRREG:X|Y|Z]], [[SRC]] + ; CHECK: st [[PTRREG:X|Y|Z]], r1 store i8 0, i8* undef, align 4 ret void } diff --git a/llvm/test/CodeGen/AVR/umul-with-overflow.ll b/llvm/test/CodeGen/AVR/umul-with-overflow.ll index c3c4a30f87ccf..6df0738151543 100644 --- a/llvm/test/CodeGen/AVR/umul-with-overflow.ll +++ b/llvm/test/CodeGen/AVR/umul-with-overflow.ll @@ -14,7 +14,7 @@ entry-block: ; CHECK: ldi [[RET:r[0-9]+]], 1 ; CHECK: cpi {{.*}}[[HIGH]], 0 ; CHECK: brne [[LABEL:.LBB[_0-9]+]] -; CHECK: ldi {{.*}}[[RET]], 0 +; CHECK: mov {{.*}}[[RET]], r1 ; CHECK: {{.*}}[[LABEL]] ; CHECK: ret } From 7c66aaddb128dc0f342830c1efaeb7a278bfc48c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 16:20:34 +0000 Subject: [PATCH 288/946] [DAG] Fold (X & Y) != 0 --> zextOrTrunc(X & Y) iff everything but LSB is known zero (PR51312) Fixes parity codegen issue where we know all but the lowest bit is zero, we can replace the ICMPNE with 0 comparison with a ext/trunc Differential Revision: https://reviews.llvm.org/D117983 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 19 +++++++++++++++---- llvm/test/CodeGen/X86/parity-vec.ll | 4 ++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3b53a5b8b7532..269c332ae28a1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3254,17 +3254,29 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const { - // Match these patterns in any of their permutations: - // (X & Y) == Y - // (X & Y) != Y if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) std::swap(N0, N1); + SelectionDAG &DAG = DCI.DAG; EVT OpVT = N0.getValueType(); if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || (Cond != ISD::SETEQ && Cond != ISD::SETNE)) return SDValue(); + // (X & Y) != 0 --> zextOrTrunc(X & Y) + // iff everything but LSB is known zero: + if (Cond == ISD::SETNE && isNullConstant(N1) && + (getBooleanContents(VT) == TargetLowering::UndefinedBooleanContent || + getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent)) { + unsigned NumEltBits = OpVT.getScalarSizeInBits(); + APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1); + if (DAG.MaskedValueIsZero(N0, UpperBits)) + return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT); + } + + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y SDValue X, Y; if (N0.getOperand(0) == N1) { X = N0.getOperand(1); @@ -3276,7 +3288,6 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } - SelectionDAG &DAG = DCI.DAG; SDValue Zero = DAG.getConstant(0, DL, OpVT); if (DAG.isKnownToBeAPowerOfTwo(Y)) { // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll index 60d9d4be68e79..ed64bb5eddf49 100644 --- a/llvm/test/CodeGen/X86/parity-vec.ll +++ b/llvm/test/CodeGen/X86/parity-vec.ll @@ -36,8 +36,8 @@ define i1 @canonical_parity(<16 x i1> %x) { ; POPCNT-NEXT: psllw $7, %xmm0 ; POPCNT-NEXT: pmovmskb %xmm0, %eax ; POPCNT-NEXT: popcntl %eax, %eax -; POPCNT-NEXT: testb $1, %al -; POPCNT-NEXT: setne %al +; POPCNT-NEXT: andl $1, %eax +; POPCNT-NEXT: # kill: def $al killed $al killed $eax ; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) From 2e26633af0c88ea23e3e8783ef60e621f282d3fb Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 23 Jan 2022 11:11:26 -0500 Subject: [PATCH 289/946] [IR] document and update ctlz/cttz intrinsics to optionally return poison rather than undef The behavior in Analysis (knownbits) implements poison semantics already, and we expect the transforms (for example, in instcombine) derived from those semantics, so this patch changes the LangRef and remaining code to be consistent. This is one more step in removing "undef" from LLVM. Without this, I think https://github.com/llvm/llvm-project/issues/53330 has a legitimate complaint because that report wants to allow subsequent code to mask off bits, and that is allowed with undef values. The clang builtins are not actually documented anywhere AFAICT, but we might want to add that to remove more uncertainty. Differential Revision: https://reviews.llvm.org/D117912 --- llvm/docs/LangRef.rst | 42 ++++++-------- llvm/lib/Analysis/ConstantFolding.cpp | 4 +- llvm/lib/Analysis/ValueTracking.cpp | 4 +- .../InstCombine/InstCombineCalls.cpp | 6 +- .../InstCombine/InstCombineSelect.cpp | 6 +- .../InstCombine/intrinsic-select.ll | 2 +- .../test/Transforms/InstCombine/intrinsics.ll | 56 +++++++++---------- .../InstSimplify/ConstProp/bitcount.ll | 36 ++++++------ 8 files changed, 75 insertions(+), 81 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index f748fc5d4d213..aa010193d114b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14958,12 +14958,8 @@ targets support all bit widths or vector types, however. :: - declare i8 @llvm.ctlz.i8 (i8 , i1 ) - declare i16 @llvm.ctlz.i16 (i16 , i1 ) - declare i32 @llvm.ctlz.i32 (i32 , i1 ) - declare i64 @llvm.ctlz.i64 (i64 , i1 ) - declare i256 @llvm.ctlz.i256(i256 , i1 ) - declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32> , i1 ) + declare i8 @llvm.ctlz.i8 (i8 , i1 ) + declare <2 x i37> @llvm.ctlz.v2i37(<2 x i37> , i1 ) Overview: """"""""" @@ -14978,11 +14974,12 @@ The first argument is the value to be counted. This argument may be of any integer type, or a vector with integer element type. The return type must match the first argument type. -The second argument must be a constant and is a flag to indicate whether -the intrinsic should ensure that a zero as the first argument produces a -defined result. Historically some architectures did not provide a -defined result for zero values as efficiently, and many algorithms are -now predicated on avoiding zero-value inputs. +The second argument is a constant flag that indicates whether the intrinsic +returns a valid result if the first argument is zero. If the first +argument is zero and the second argument is true, the result is poison. +Historically some architectures did not provide a defined result for zero +values as efficiently, and many algorithms are now predicated on avoiding +zero-value inputs. Semantics: """""""""" @@ -14990,7 +14987,7 @@ Semantics: The '``llvm.ctlz``' intrinsic counts the leading (most significant) zeros in a variable, or within each element of the vector. If ``src == 0`` then the result is the size in bits of the type of ``src`` -if ``is_zero_undef == 0`` and ``undef`` otherwise. For example, +if ``is_zero_poison == 0`` and ``poison`` otherwise. For example, ``llvm.ctlz(i32 2) = 30``. '``llvm.cttz.*``' Intrinsic @@ -15005,12 +15002,8 @@ support all bit widths or vector types, however. :: - declare i8 @llvm.cttz.i8 (i8 , i1 ) - declare i16 @llvm.cttz.i16 (i16 , i1 ) - declare i32 @llvm.cttz.i32 (i32 , i1 ) - declare i64 @llvm.cttz.i64 (i64 , i1 ) - declare i256 @llvm.cttz.i256(i256 , i1 ) - declare <2 x i32> @llvm.cttz.v2i32(<2 x i32> , i1 ) + declare i42 @llvm.cttz.i42 (i42 , i1 ) + declare <2 x i32> @llvm.cttz.v2i32(<2 x i32> , i1 ) Overview: """"""""" @@ -15025,11 +15018,12 @@ The first argument is the value to be counted. This argument may be of any integer type, or a vector with integer element type. The return type must match the first argument type. -The second argument must be a constant and is a flag to indicate whether -the intrinsic should ensure that a zero as the first argument produces a -defined result. Historically some architectures did not provide a -defined result for zero values as efficiently, and many algorithms are -now predicated on avoiding zero-value inputs. +The second argument is a constant flag that indicates whether the intrinsic +returns a valid result if the first argument is zero. If the first +argument is zero and the second argument is true, the result is poison. +Historically some architectures did not provide a defined result for zero +values as efficiently, and many algorithms are now predicated on avoiding +zero-value inputs. Semantics: """""""""" @@ -15037,7 +15031,7 @@ Semantics: The '``llvm.cttz``' intrinsic counts the trailing (least significant) zeros in a variable, or within each element of a vector. If ``src == 0`` then the result is the size in bits of the type of ``src`` if -``is_zero_undef == 0`` and ``undef`` otherwise. For example, +``is_zero_poison == 0`` and ``poison`` otherwise. For example, ``llvm.cttz(2) = 1``. .. _int_overflow: diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 38c9cc7b9df29..772316e7469d9 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -2572,9 +2572,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, case Intrinsic::ctlz: assert(C1 && "Must be constant int"); - // cttz(0, 1) and ctlz(0, 1) are undef. + // cttz(0, 1) and ctlz(0, 1) are poison. if (C1->isOne() && (!C0 || C0->isZero())) - return UndefValue::get(Ty); + return PoisonValue::get(Ty); if (!C0) return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::cttz) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 942dbe043ab3f..34358739f9a85 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1593,7 +1593,7 @@ static void computeKnownBitsFromOperator(const Operator *I, computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. unsigned PossibleLZ = Known2.countMaxLeadingZeros(); - // If this call is undefined for 0, the result will be less than 2^n. + // If this call is poison for 0 input, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleLZ = std::min(PossibleLZ, BitWidth - 1); unsigned LowBits = Log2_32(PossibleLZ)+1; @@ -1604,7 +1604,7 @@ static void computeKnownBitsFromOperator(const Operator *I, computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // If we have a known 1, its position is our upper bound. unsigned PossibleTZ = Known2.countMaxTrailingZeros(); - // If this call is undefined for 0, the result will be less than 2^n. + // If this call is poison for 0 input, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) PossibleTZ = std::min(PossibleTZ, BitWidth - 1); unsigned LowBits = Log2_32(PossibleTZ)+1; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index f63a186166ecc..3a3f169d2f516 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -494,7 +494,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { // ctlz/cttz i1 Op0 --> not Op0 if (match(Op1, m_Zero())) return BinaryOperator::CreateNot(Op0); - // If zero is undef, then the input can be assumed to be "true", so the + // If zero is poison, then the input can be assumed to be "true", so the // instruction simplifies to "false". assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1"); return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType())); @@ -519,7 +519,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { } // Zext doesn't change the number of trailing zeros, so narrow: - // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsUndef' parameter is 'true'. + // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'. if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) { auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X, IC.Builder.getTrue()); @@ -556,7 +556,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { } // If the input to cttz/ctlz is known to be non-zero, - // then change the 'ZeroIsUndef' parameter to 'true' + // then change the 'ZeroIsPoison' parameter to 'true' // because we know the zero behavior can't affect the result. if (!Known.One.isZero() || isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index cbdf04572042c..65e60498ff954 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -943,7 +943,7 @@ static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal, } /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single -/// call to cttz/ctlz with flag 'is_zero_undef' cleared. +/// call to cttz/ctlz with flag 'is_zero_poison' cleared. /// /// For example, we can fold the following code sequence: /// \code @@ -987,7 +987,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, // sizeof in bits of 'Count'. unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits(); if (match(ValueOnZero, m_SpecificInt(SizeOfInBits))) { - // Explicitly clear the 'undef_on_zero' flag. It's always valid to go from + // Explicitly clear the 'is_zero_poison' flag. It's always valid to go from // true to false on this flag, so we can replace it for all users. II->setArgOperand(1, ConstantInt::getFalse(II->getContext())); return SelectArg; @@ -995,7 +995,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, // The ValueOnZero is not the bitwidth. But if the cttz/ctlz (and optional // zext/trunc) have one use (ending at the select), the cttz/ctlz result will - // not be used if the input is zero. Relax to 'undef_on_zero' for that case. + // not be used if the input is zero. Relax to 'zero is poison' for that case. if (II->hasOneUse() && SelectArg->hasOneUse() && !match(II->getArgOperand(1), m_One())) II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll index 92c5e3b38bd76..cbabbb8b4456f 100644 --- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll +++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll @@ -36,7 +36,7 @@ define i32 @ctlz_sel_const_true(i1 %b, i32 %x) { define <3 x i17> @ctlz_sel_const_false(<3 x i1> %b, <3 x i17> %x) { ; CHECK-LABEL: @ctlz_sel_const_false( ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i17> @llvm.ctlz.v3i17(<3 x i17> [[X:%.*]], i1 true) -; CHECK-NEXT: [[C:%.*]] = select <3 x i1> [[B:%.*]], <3 x i17> [[TMP1]], <3 x i17> +; CHECK-NEXT: [[C:%.*]] = select <3 x i1> [[B:%.*]], <3 x i17> [[TMP1]], <3 x i17> ; CHECK-NEXT: ret <3 x i17> [[C]] ; %s = select <3 x i1> %b, <3 x i17> %x, <3 x i17> diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll index cabd82c3044eb..fc1c1e5aed3a7 100644 --- a/llvm/test/Transforms/InstCombine/intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/intrinsics.ll @@ -79,8 +79,8 @@ define i1 @cttz_i1(i1 %arg) { ret i1 %cnt } -define i1 @cttz_i1_zero_is_undef(i1 %arg) { -; CHECK-LABEL: @cttz_i1_zero_is_undef( +define i1 @cttz_i1_zero_is_poison(i1 %arg) { +; CHECK-LABEL: @cttz_i1_zero_is_poison( ; CHECK-NEXT: ret i1 false ; %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 true) nounwind readnone @@ -96,8 +96,8 @@ define <2 x i1> @cttz_v2i1(<2 x i1> %arg) { ret <2 x i1> %cnt } -define <2 x i1> @cttz_v2i1_zero_is_undef(<2 x i1> %arg) { -; CHECK-LABEL: @cttz_v2i1_zero_is_undef( +define <2 x i1> @cttz_v2i1_zero_is_poison(<2 x i1> %arg) { +; CHECK-LABEL: @cttz_v2i1_zero_is_poison( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %cnt = call <2 x i1> @llvm.cttz.v2i1(<2 x i1> %arg, i1 true) nounwind readnone @@ -196,8 +196,8 @@ define i1 @ctlz_i1(i1 %arg) { ret i1 %cnt } -define i1 @ctlz_i1_zero_is_undef(i1 %arg) { -; CHECK-LABEL: @ctlz_i1_zero_is_undef( +define i1 @ctlz_i1_zero_is_poison(i1 %arg) { +; CHECK-LABEL: @ctlz_i1_zero_is_poison( ; CHECK-NEXT: ret i1 false ; %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 true) nounwind readnone @@ -213,8 +213,8 @@ define <2 x i1> @ctlz_v2i1(<2 x i1> %arg) { ret <2 x i1> %cnt } -define <2 x i1> @ctlz_v2i1_zero_is_undef(<2 x i1> %arg) { -; CHECK-LABEL: @ctlz_v2i1_zero_is_undef( +define <2 x i1> @ctlz_v2i1_zero_is_poison(<2 x i1> %arg) { +; CHECK-LABEL: @ctlz_v2i1_zero_is_poison( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %cnt = call <2 x i1> @llvm.ctlz.v2i1(<2 x i1> %arg, i1 true) nounwind readnone @@ -283,24 +283,24 @@ define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) { ret <2 x i1> %res } -define i32 @ctlz_undef(i32 %Value) { -; CHECK-LABEL: @ctlz_undef( -; CHECK-NEXT: ret i32 undef +define i32 @ctlz_poison(i32 %Value) { +; CHECK-LABEL: @ctlz_poison( +; CHECK-NEXT: ret i32 poison ; %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true) ret i32 %ctlz } -define <2 x i32> @ctlz_undef_vec(<2 x i32> %Value) { -; CHECK-LABEL: @ctlz_undef_vec( -; CHECK-NEXT: ret <2 x i32> undef +define <2 x i32> @ctlz_poison_vec(<2 x i32> %Value) { +; CHECK-LABEL: @ctlz_poison_vec( +; CHECK-NEXT: ret <2 x i32> poison ; %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> zeroinitializer, i1 true) ret <2 x i32> %ctlz } -define i32 @ctlz_make_undef(i32 %a) { -; CHECK-LABEL: @ctlz_make_undef( +define i32 @ctlz_no_zero(i32 %a) { +; CHECK-LABEL: @ctlz_no_zero( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[A:%.*]], 8 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[OR]], i1 true), !range [[RNG2:![0-9]+]] ; CHECK-NEXT: ret i32 [[CTLZ]] @@ -310,8 +310,8 @@ define i32 @ctlz_make_undef(i32 %a) { ret i32 %ctlz } -define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) { -; CHECK-LABEL: @ctlz_make_undef_vec( +define <2 x i32> @ctlz_no_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @ctlz_no_zero_vec( ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], ; CHECK-NEXT: [[CTLZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[CTLZ]] @@ -321,24 +321,24 @@ define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) { ret <2 x i32> %ctlz } -define i32 @cttz_undef(i32 %Value) nounwind { -; CHECK-LABEL: @cttz_undef( -; CHECK-NEXT: ret i32 undef +define i32 @cttz_poison(i32 %Value) { +; CHECK-LABEL: @cttz_poison( +; CHECK-NEXT: ret i32 poison ; %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true) ret i32 %cttz } -define <2 x i32> @cttz_undef_vec(<2 x i32> %Value) nounwind { -; CHECK-LABEL: @cttz_undef_vec( -; CHECK-NEXT: ret <2 x i32> undef +define <2 x i32> @cttz_poison_vec(<2 x i32> %Value) { +; CHECK-LABEL: @cttz_poison_vec( +; CHECK-NEXT: ret <2 x i32> poison ; %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> zeroinitializer, i1 true) ret <2 x i32> %cttz } -define i32 @cttz_make_undef(i32 %a) { -; CHECK-LABEL: @cttz_make_undef( +define i32 @cttz_no_zero(i32 %a) { +; CHECK-LABEL: @cttz_no_zero( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[A:%.*]], 8 ; CHECK-NEXT: [[CTTZ:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[OR]], i1 true), !range [[RNG3:![0-9]+]] ; CHECK-NEXT: ret i32 [[CTTZ]] @@ -348,8 +348,8 @@ define i32 @cttz_make_undef(i32 %a) { ret i32 %cttz } -define <2 x i32> @cttz_make_undef_vec(<2 x i32> %a) { -; CHECK-LABEL: @cttz_make_undef_vec( +define <2 x i32> @cttz_no_zero_vec(<2 x i32> %a) { +; CHECK-LABEL: @cttz_no_zero_vec( ; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[A:%.*]], ; CHECK-NEXT: [[CTTZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) ; CHECK-NEXT: ret <2 x i32> [[CTTZ]] diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll b/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll index f737653a6f3bc..5d020e9677e3b 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll @@ -48,9 +48,9 @@ define i32 @cttz_zero_defined() { ret i32 %x } -define i32 @cttz_zero_undefined() { -; CHECK-LABEL: @cttz_zero_undefined( -; CHECK-NEXT: ret i32 undef +define i32 @cttz_zero_is_poison() { +; CHECK-LABEL: @cttz_zero_is_poison( +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.cttz.i32(i32 0, i1 true) ret i32 %x @@ -64,9 +64,9 @@ define i33 @ctlz_zero_defined() { ret i33 %x } -define i33 @ctlz_zero_undefined() { -; CHECK-LABEL: @ctlz_zero_undefined( -; CHECK-NEXT: ret i33 undef +define i33 @ctlz_zero_is_poison() { +; CHECK-LABEL: @ctlz_zero_is_poison( +; CHECK-NEXT: ret i33 poison ; %x = call i33 @llvm.ctlz.i33(i33 0, i1 true) ret i33 %x @@ -88,9 +88,9 @@ define i32 @cttz_undef_defined() { ret i32 %x } -define i32 @cttz_undef_undefined() { -; CHECK-LABEL: @cttz_undef_undefined( -; CHECK-NEXT: ret i32 undef +define i32 @cttz_undef_zero_is_poison() { +; CHECK-LABEL: @cttz_undef_zero_is_poison( +; CHECK-NEXT: ret i32 poison ; %x = call i32 @llvm.cttz.i32(i32 undef, i1 true) ret i32 %x @@ -104,9 +104,9 @@ define i33 @ctlz_undef_defined() { ret i33 %x } -define i33 @ctlz_undef_undefined() { -; CHECK-LABEL: @ctlz_undef_undefined( -; CHECK-NEXT: ret i33 undef +define i33 @ctlz_undef_zero_is_poison() { +; CHECK-LABEL: @ctlz_undef_zero_is_poison( +; CHECK-NEXT: ret i33 poison ; %x = call i33 @llvm.ctlz.i33(i33 undef, i1 true) ret i33 %x @@ -144,9 +144,9 @@ define <2 x i32> @cttz_vector_undef_defined() { ret <2 x i32> %x } -define <2 x i32> @cttz_vector_undef_undefined() { -; CHECK-LABEL: @cttz_vector_undef_undefined( -; CHECK-NEXT: ret <2 x i32> undef +define <2 x i32> @cttz_vector_undef_zero_is_poison() { +; CHECK-LABEL: @cttz_vector_undef_zero_is_poison( +; CHECK-NEXT: ret <2 x i32> poison ; %x = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> , i1 true) ret <2 x i32> %x @@ -168,9 +168,9 @@ define <2 x i33> @ctlz_vector_undef_defined() { ret <2 x i33> %x } -define <2 x i33> @ctlz_vector_undef_undefined() { -; CHECK-LABEL: @ctlz_vector_undef_undefined( -; CHECK-NEXT: ret <2 x i33> undef +define <2 x i33> @ctlz_vector_undef_zero_is_poison() { +; CHECK-LABEL: @ctlz_vector_undef_zero_is_poison( +; CHECK-NEXT: ret <2 x i33> poison ; %x = call <2 x i33> @llvm.ctlz.v2i33(<2 x i33> , i1 true) ret <2 x i33> %x From d2e8fb331835fcc565929720781a5fd64e66fc17 Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 1 Jan 2022 22:47:22 -0700 Subject: [PATCH 290/946] [clang-tidy] Add readability-duplicate-include check Looks for duplicate includes and removes them. Every time an include directive is processed, check a vector of filenames to see if the included file has already been included. If so, it issues a warning and a replacement to remove the entire line containing the duplicated include directive. When a macro is defined or undefined, the vector of filenames is cleared. This enables including the same file multiple times, but getting different expansions based on the set of active macros at the time of inclusion. For example: #undef NDEBUG #include "assertion.h" // ...code with assertions enabled #define NDEBUG #include "assertion.h" // ...code with assertions disabled Since macros are redefined between the inclusion of assertion.h, they are not flagged as redundant. Differential Revision: https://reviews.llvm.org/D7982 --- .../clang-tidy/readability/CMakeLists.txt | 1 + .../readability/DuplicateIncludeCheck.cpp | 116 ++++++++++++++++++ .../readability/DuplicateIncludeCheck.h | 35 ++++++ .../readability/ReadabilityTidyModule.cpp | 3 + clang-tools-extra/docs/ReleaseNotes.rst | 5 + .../docs/clang-tidy/checks/list.rst | 1 + .../checks/readability-duplicate-include.rst | 35 ++++++ .../readability-duplicate-include.h | 15 +++ .../readability-duplicate-include2.h | 1 + .../system/iostream | 1 + .../system/string.h | 1 + .../system/sys/types.h | 1 + .../system/types.h | 1 + .../readability-duplicate-include.cpp | 72 +++++++++++ 14 files changed, 288 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index eba0ab98cb37a..22ce8f62751ec 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -11,6 +11,7 @@ add_clang_library(clangTidyReadabilityModule ContainerSizeEmptyCheck.cpp ConvertMemberFunctionsToStatic.cpp DeleteNullPointerCheck.cpp + DuplicateIncludeCheck.cpp ElseAfterReturnCheck.cpp FunctionCognitiveComplexityCheck.cpp FunctionSizeCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp new file mode 100644 index 0000000000000..681b8399154a7 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp @@ -0,0 +1,116 @@ +//===--- DuplicateIncludeCheck.cpp - clang-tidy ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DuplicateIncludeCheck.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include + +namespace clang { +namespace tidy { +namespace readability { + +static SourceLocation advanceBeyondCurrentLine(const SourceManager &SM, + SourceLocation Start, + int Offset) { + const FileID Id = SM.getFileID(Start); + const unsigned LineNumber = SM.getSpellingLineNumber(Start); + while (SM.getFileID(Start) == Id && + SM.getSpellingLineNumber(Start.getLocWithOffset(Offset)) == LineNumber) + Start = Start.getLocWithOffset(Offset); + return Start; +} + +namespace { + +using FileList = SmallVector; + +class DuplicateIncludeCallbacks : public PPCallbacks { +public: + DuplicateIncludeCallbacks(DuplicateIncludeCheck &Check, + const SourceManager &SM) + : Check(Check), SM(SM) { + // The main file doesn't participate in the FileChanged notification. + Files.emplace_back(); + } + + void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override; + + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, + const Module *Imported, + SrcMgr::CharacteristicKind FileType) override; + + void MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) override; + + void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD, + const MacroDirective *Undef) override; + +private: + // A list of included files is kept for each file we enter. + SmallVector Files; + DuplicateIncludeCheck &Check; + const SourceManager &SM; +}; + +void DuplicateIncludeCallbacks::FileChanged(SourceLocation Loc, + FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) { + if (Reason == EnterFile) + Files.emplace_back(); + else + Files.pop_back(); +} + +void DuplicateIncludeCallbacks::InclusionDirective( + SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, + bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File, + StringRef SearchPath, StringRef RelativePath, const Module *Imported, + SrcMgr::CharacteristicKind FileType) { + if (llvm::find(Files.back(), FileName) != Files.back().end()) { + // We want to delete the entire line, so make sure that [Start,End] covers + // everything. + SourceLocation Start = + advanceBeyondCurrentLine(SM, HashLoc, -1).getLocWithOffset(-1); + SourceLocation End = + advanceBeyondCurrentLine(SM, FilenameRange.getEnd(), 1); + Check.diag(HashLoc, "duplicate include") + << FixItHint::CreateRemoval(SourceRange{Start, End}); + } else + Files.back().push_back(FileName); +} + +void DuplicateIncludeCallbacks::MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) { + Files.back().clear(); +} + +void DuplicateIncludeCallbacks::MacroUndefined(const Token &MacroNameTok, + const MacroDefinition &MD, + const MacroDirective *Undef) { + Files.back().clear(); +} + +} // namespace + +void DuplicateIncludeCheck::registerPPCallbacks( + const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) { + PP->addPPCallbacks(std::make_unique(*this, SM)); +} + +} // namespace readability +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h new file mode 100644 index 0000000000000..b213e3a4b73cc --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h @@ -0,0 +1,35 @@ +//===--- DuplicateIncludeCheck.h - clang-tidy -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace readability { + +/// \brief Find and remove duplicate #include directives. +/// +/// Only consecutive include directives without any other preprocessor +/// directives between them are analyzed. +class DuplicateIncludeCheck : public ClangTidyCheck { +public: + DuplicateIncludeCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + + void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP, + Preprocessor *ModuleExpanderPP) override; +}; + +} // namespace readability +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index 2d6540283ded5..b0493d43ff318 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -16,6 +16,7 @@ #include "ContainerSizeEmptyCheck.h" #include "ConvertMemberFunctionsToStatic.h" #include "DeleteNullPointerCheck.h" +#include "DuplicateIncludeCheck.h" #include "ElseAfterReturnCheck.h" #include "FunctionCognitiveComplexityCheck.h" #include "FunctionSizeCheck.h" @@ -71,6 +72,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-convert-member-functions-to-static"); CheckFactories.registerCheck( "readability-delete-null-pointer"); + CheckFactories.registerCheck( + "readability-duplicate-include"); CheckFactories.registerCheck( "readability-else-after-return"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 1f7228d5732bd..060af42521552 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -124,6 +124,11 @@ New checks Finds cases where code could use ``data()`` rather than the address of the element at index 0 in a container. +- New :doc:`readability-duplicate-include + ` check. + + Looks for duplicate includes and removes them. + - New :doc:`readability-identifier-length ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 9785c9f43a4b4..5878345bdfcfd 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -294,6 +294,7 @@ Clang-Tidy Checks `readability-container-size-empty `_, "Yes" `readability-convert-member-functions-to-static `_, "Yes" `readability-delete-null-pointer `_, "Yes" + `readability-duplicate-include `_, "Yes" `readability-else-after-return `_, "Yes" `readability-function-cognitive-complexity `_, `readability-function-size `_, diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst new file mode 100644 index 0000000000000..45df7e1b84f3f --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst @@ -0,0 +1,35 @@ +.. title:: clang-tidy - readability-duplicate-include + +readability-duplicate-include +============================= + +Looks for duplicate includes and removes them. The check maintains a list of +included files and looks for duplicates. If a macro is defined or undefined +then the list of included files is cleared. + +Examples: + +.. code-block:: c++ + + #include + #include + #include + +becomes + +.. code-block:: c++ + + #include + #include + +Because of the intervening macro definitions, this code remains unchanged: + +.. code-block:: c++ + + #undef NDEBUG + #include "assertion.h" + // ...code with assertions enabled + + #define NDEBUG + #include "assertion.h" + // ...code with assertions disabled diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h new file mode 100644 index 0000000000000..7d84adb816622 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h @@ -0,0 +1,15 @@ +#ifndef READABILITY_DUPLICATE_INCLUDE_H +#define READABILITY_DUPLICATE_INCLUDE_H + +extern int g; +#include "readability-duplicate-include2.h" +extern int h; +#include "readability-duplicate-include2.h" +extern int i; +// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include +// CHECK-FIXES: {{^extern int g;$}} +// CHECK-FIXES-NEXT: {{^#include "readability-duplicate-include2.h"$}} +// CHECK-FIXES-NEXT: {{^extern int h;$}} +// CHECK-FIXES-NEXT: {{^extern int i;$}} + +#endif diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h new file mode 100644 index 0000000000000..58dfa757ee7ae --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h @@ -0,0 +1 @@ +// This file is intentionally empty. diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream new file mode 100644 index 0000000000000..fcbabe12fc378 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream @@ -0,0 +1 @@ +// This file is intentionally empty. diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h new file mode 100644 index 0000000000000..fcbabe12fc378 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h @@ -0,0 +1 @@ +// This file is intentionally empty. diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h new file mode 100644 index 0000000000000..58dfa757ee7ae --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h @@ -0,0 +1 @@ +// This file is intentionally empty. diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h new file mode 100644 index 0000000000000..fcbabe12fc378 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h @@ -0,0 +1 @@ +// This file is intentionally empty. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp new file mode 100644 index 0000000000000..f9a3c70ef86f4 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp @@ -0,0 +1,72 @@ +// RUN: %check_clang_tidy %s readability-duplicate-include %t -- -- -isystem %S/Inputs/readability-duplicate-include/system -I %S/Inputs/readability-duplicate-include + +int a; +#include +int b; +#include +int c; +// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include [readability-duplicate-include] +// CHECK-FIXES: {{^int a;$}} +// CHECK-FIXES-NEXT: {{^#include $}} +// CHECK-FIXES-NEXT: {{^int b;$}} +// CHECK-FIXES-NEXT: {{^int c;$}} + +int d; +#include +int e; +#include // extra stuff that will also be removed +int f; +// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include +// CHECK-FIXES: {{^int d;$}} +// CHECK-FIXES-NEXT: {{^#include $}} +// CHECK-FIXES-NEXT: {{^int e;$}} +// CHECK-FIXES-NEXT: {{^int f;$}} + +int g; +#include "readability-duplicate-include.h" +int h; +#include "readability-duplicate-include.h" +int i; +// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include +// CHECK-FIXES: {{^int g;$}} +// CHECK-FIXES-NEXT: {{^#include "readability-duplicate-include.h"$}} +// CHECK-FIXES-NEXT: {{^int h;$}} +// CHECK-FIXES-NEXT: {{^int i;$}} + +#include + +int j; +#include +int k; +#include +int l; +// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include +// CHECK-FIXES: {{^int j;$}} +// CHECK-FIXES-NEXT: {{^#include $}} +// CHECK-FIXES-NEXT: {{^int k;$}} +// CHECK-FIXES-NEXT: {{^int l;$}} + +int m; + # include // lots of space +int n; +// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: duplicate include +// CHECK-FIXES: {{^int m;$}} +// CHECK-FIXES-NEXT: {{^int n;$}} + +// defining a macro in the main file resets the included file cache +#define ARBITRARY_MACRO +int o; +#include +int p; +// CHECK-FIXES: {{^int o;$}} +// CHECK-FIXES-NEXT: {{^#include $}} +// CHECK-FIXES-NEXT: {{^int p;$}} + +// undefining a macro resets the cache +#undef ARBITRARY_MACRO +int q; +#include +int r; +// CHECK-FIXES: {{^int q;$}} +// CHECK-FIXES-NEXT: {{^#include $}} +// CHECK-FIXES-NEXT: {{^int r;$}} From 6605057992b15183663f0d918b7707f371862fd7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 16:28:38 +0000 Subject: [PATCH 291/946] Revert rG7c66aaddb128dc0f342830c1efaeb7a278bfc48c "[DAG] Fold (X & Y) != 0 --> zextOrTrunc(X & Y) iff everything but LSB is known zero (PR51312)" Noticed a typo in the getBooleanContents call just after I pressed commit :( --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 19 ++++--------------- llvm/test/CodeGen/X86/parity-vec.ll | 4 ++-- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 269c332ae28a1..3b53a5b8b7532 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3254,29 +3254,17 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const { + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) std::swap(N0, N1); - SelectionDAG &DAG = DCI.DAG; EVT OpVT = N0.getValueType(); if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || (Cond != ISD::SETEQ && Cond != ISD::SETNE)) return SDValue(); - // (X & Y) != 0 --> zextOrTrunc(X & Y) - // iff everything but LSB is known zero: - if (Cond == ISD::SETNE && isNullConstant(N1) && - (getBooleanContents(VT) == TargetLowering::UndefinedBooleanContent || - getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent)) { - unsigned NumEltBits = OpVT.getScalarSizeInBits(); - APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1); - if (DAG.MaskedValueIsZero(N0, UpperBits)) - return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT); - } - - // Match these patterns in any of their permutations: - // (X & Y) == Y - // (X & Y) != Y SDValue X, Y; if (N0.getOperand(0) == N1) { X = N0.getOperand(1); @@ -3288,6 +3276,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } + SelectionDAG &DAG = DCI.DAG; SDValue Zero = DAG.getConstant(0, DL, OpVT); if (DAG.isKnownToBeAPowerOfTwo(Y)) { // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll index ed64bb5eddf49..60d9d4be68e79 100644 --- a/llvm/test/CodeGen/X86/parity-vec.ll +++ b/llvm/test/CodeGen/X86/parity-vec.ll @@ -36,8 +36,8 @@ define i1 @canonical_parity(<16 x i1> %x) { ; POPCNT-NEXT: psllw $7, %xmm0 ; POPCNT-NEXT: pmovmskb %xmm0, %eax ; POPCNT-NEXT: popcntl %eax, %eax -; POPCNT-NEXT: andl $1, %eax -; POPCNT-NEXT: # kill: def $al killed $al killed $eax +; POPCNT-NEXT: testb $1, %al +; POPCNT-NEXT: setne %al ; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) From 631f3e621586d1d9a1e57ba0698e7eb35496d9fe Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Sun, 23 Jan 2022 16:30:34 +0000 Subject: [PATCH 292/946] [gn build] Port d2e8fb331835 --- .../secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn index 660fcb70eba0a..1e24a52be4725 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn @@ -19,6 +19,7 @@ static_library("readability") { "ContainerSizeEmptyCheck.cpp", "ConvertMemberFunctionsToStatic.cpp", "DeleteNullPointerCheck.cpp", + "DuplicateIncludeCheck.cpp", "ElseAfterReturnCheck.cpp", "FunctionCognitiveComplexityCheck.cpp", "FunctionSizeCheck.cpp", From accc07e65465094dc5e12e78bee45b4d459c4ccd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 16:36:18 +0000 Subject: [PATCH 293/946] [DAG] Fold (X & Y) != 0 --> zextOrTrunc(X & Y) iff everything but LSB is known zero (PR51312) Fixes parity codegen issue where we know all but the lowest bit is zero, we can replace the ICMPNE with 0 comparison with a ext/trunc Differential Revision: https://reviews.llvm.org/D117983 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 19 +++++++++++++++---- llvm/test/CodeGen/X86/parity-vec.ll | 4 ++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3b53a5b8b7532..a98c21f16c712 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3254,17 +3254,29 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const { - // Match these patterns in any of their permutations: - // (X & Y) == Y - // (X & Y) != Y if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) std::swap(N0, N1); + SelectionDAG &DAG = DCI.DAG; EVT OpVT = N0.getValueType(); if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || (Cond != ISD::SETEQ && Cond != ISD::SETNE)) return SDValue(); + // (X & Y) != 0 --> zextOrTrunc(X & Y) + // iff everything but LSB is known zero: + if (Cond == ISD::SETNE && isNullConstant(N1) && + (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent || + getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) { + unsigned NumEltBits = OpVT.getScalarSizeInBits(); + APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1); + if (DAG.MaskedValueIsZero(N0, UpperBits)) + return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT); + } + + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y SDValue X, Y; if (N0.getOperand(0) == N1) { X = N0.getOperand(1); @@ -3276,7 +3288,6 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } - SelectionDAG &DAG = DCI.DAG; SDValue Zero = DAG.getConstant(0, DL, OpVT); if (DAG.isKnownToBeAPowerOfTwo(Y)) { // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll index 60d9d4be68e79..ed64bb5eddf49 100644 --- a/llvm/test/CodeGen/X86/parity-vec.ll +++ b/llvm/test/CodeGen/X86/parity-vec.ll @@ -36,8 +36,8 @@ define i1 @canonical_parity(<16 x i1> %x) { ; POPCNT-NEXT: psllw $7, %xmm0 ; POPCNT-NEXT: pmovmskb %xmm0, %eax ; POPCNT-NEXT: popcntl %eax, %eax -; POPCNT-NEXT: testb $1, %al -; POPCNT-NEXT: setne %al +; POPCNT-NEXT: andl $1, %eax +; POPCNT-NEXT: # kill: def $al killed $al killed $eax ; POPCNT-NEXT: retq %i1 = bitcast <16 x i1> %x to i16 %i2 = call i16 @llvm.ctpop.i16(i16 %i1) From 0b799791807e6b23a568526484f6cdaf0984cf02 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 22 Jan 2022 23:05:19 -0800 Subject: [PATCH 294/946] [RISCV] Merge some rvv intrinsic test cases that only differ by XLen type. Instead of having a test for i32 XLen and i64 XLen, use sed to replace iXLen with i32/i64 before running llc. This change covers all of the floating point tests. --- llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll | 1356 ----------------- .../RISCV/rvv/{vfadd-rv32.ll => vfadd.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll | 692 --------- .../RISCV/rvv/{vfclass-rv32.ll => vfclass.ll} | 186 +-- llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll | 617 -------- .../rvv/{vfcvt-f-x-rv32.ll => vfcvt-f-x.ll} | 216 +-- .../test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll | 617 -------- .../rvv/{vfcvt-f-xu-rv32.ll => vfcvt-f-xu.ll} | 216 +-- .../CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll | 617 -------- ...vfcvt-rtz-x-f-rv64.ll => vfcvt-rtz-x-f.ll} | 216 +-- .../CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll | 617 -------- ...cvt-rtz-xu-f-rv32.ll => vfcvt-rtz-xu-f.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll | 617 -------- .../rvv/{vfcvt-x-f-rv32.ll => vfcvt-x-f.ll} | 216 +-- .../test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll | 617 -------- .../rvv/{vfcvt-xu-f-rv32.ll => vfcvt-xu-f.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll | 1355 ---------------- .../RISCV/rvv/{vfdiv-rv32.ll => vfdiv.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll | 1106 -------------- .../RISCV/rvv/{vfmacc-rv64.ll => vfmacc.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll | 1106 -------------- .../RISCV/rvv/{vfmadd-rv32.ll => vfmadd.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll | 1355 ---------------- .../RISCV/rvv/{vfmax-rv64.ll => vfmax.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll | 902 ----------- .../RISCV/rvv/{vfmerge-rv64.ll => vfmerge.ll} | 246 +-- llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll | 1355 ---------------- .../RISCV/rvv/{vfmin-rv64.ll => vfmin.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll | 1106 -------------- .../RISCV/rvv/{vfmsac-rv32.ll => vfmsac.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll | 1106 -------------- .../RISCV/rvv/{vfmsub-rv32.ll => vfmsub.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll | 1355 ---------------- .../RISCV/rvv/{vfmul-rv32.ll => vfmul.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll | 197 --- .../rvv/{vfmv.s.f-rv32.ll => vfmv.s.f.ll} | 95 +- llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll | 482 ------ .../rvv/{vfmv.v.f-rv32.ll => vfmv.v.f.ll} | 156 +- .../test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll | 380 ----- .../rvv/{vfncvt-f-f-rv32.ll => vfncvt-f-f.ll} | 132 +- .../test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll | 380 ----- .../rvv/{vfncvt-f-x-rv32.ll => vfncvt-f-x.ll} | 132 +- .../CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll | 380 ----- .../{vfncvt-f-xu-rv32.ll => vfncvt-f-xu.ll} | 132 +- .../CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll | 380 ----- ...ncvt-rod-f-f-rv32.ll => vfncvt-rod-f-f.ll} | 132 +- .../CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll | 632 -------- ...ncvt-rtz-x-f-rv32.ll => vfncvt-rtz-x-f.ll} | 216 +-- .../CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll | 632 -------- ...vt-rtz-xu-f-rv32.ll => vfncvt-rtz-xu-f.ll} | 216 +-- .../test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll | 632 -------- .../rvv/{vfncvt-x-f-rv32.ll => vfncvt-x-f.ll} | 216 +-- .../CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll | 632 -------- .../{vfncvt-xu-f-rv32.ll => vfncvt-xu-f.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll | 1106 -------------- .../RISCV/rvv/{vfnmacc-rv32.ll => vfnmacc.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll | 1106 -------------- .../RISCV/rvv/{vfnmadd-rv32.ll => vfnmadd.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll | 1106 -------------- .../RISCV/rvv/{vfnmsac-rv32.ll => vfnmsac.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll | 1106 -------------- .../RISCV/rvv/{vfnmsub-rv64.ll => vfnmsub.ll} | 294 ++-- llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll | 677 -------- .../RISCV/rvv/{vfrdiv-rv32.ll => vfrdiv.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll | 617 -------- .../RISCV/rvv/{vfrec7-rv32.ll => vfrec7.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll | 692 --------- .../rvv/{vfredmax-rv32.ll => vfredmax.ll} | 186 +-- llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll | 692 --------- .../rvv/{vfredmin-rv32.ll => vfredmin.ll} | 186 +-- llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll | 692 --------- .../rvv/{vfredosum-rv32.ll => vfredosum.ll} | 186 +-- llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll | 692 --------- .../rvv/{vfredusum-rv32.ll => vfredusum.ll} | 186 +-- llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll | 617 -------- .../rvv/{vfrsqrt7-rv32.ll => vfrsqrt7.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll | 678 --------- .../RISCV/rvv/{vfrsub-rv32.ll => vfrsub.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll | 1355 ---------------- .../RISCV/rvv/{vfsgnj-rv32.ll => vfsgnj.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll | 1355 ---------------- .../RISCV/rvv/{vfsgnjn-rv32.ll => vfsgnjn.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll | 1355 ---------------- .../RISCV/rvv/{vfsgnjx-rv32.ll => vfsgnjx.ll} | 426 +++--- .../CodeGen/RISCV/rvv/vfslide1down-rv64.ll | 677 -------- .../{vfslide1down-rv32.ll => vfslide1down.ll} | 216 +-- .../test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll | 692 --------- .../rvv/{vfslide1up-rv32.ll => vfslide1up.ll} | 216 +-- llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll | 548 ------- .../RISCV/rvv/{vfsqrt-rv64.ll => vfsqrt.ll} | 306 ++-- llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll | 1356 ----------------- .../RISCV/rvv/{vfsub-rv32.ll => vfsub.ll} | 426 +++--- llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll | 830 ---------- .../RISCV/rvv/{vfwadd-rv32.ll => vfwadd.ll} | 258 ++-- llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll | 1248 --------------- .../rvv/{vfwadd.w-rv32.ll => vfwadd.w.ll} | 362 ++--- .../test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll | 380 ----- .../rvv/{vfwcvt-f-f-rv32.ll => vfwcvt-f-f.ll} | 132 +- .../test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll | 632 -------- .../rvv/{vfwcvt-f-x-rv32.ll => vfwcvt-f-x.ll} | 216 +-- .../CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll | 632 -------- .../{vfwcvt-f-xu-rv32.ll => vfwcvt-f-xu.ll} | 216 +-- .../CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll | 380 ----- ...wcvt-rtz-x-f-rv32.ll => vfwcvt-rtz-x-f.ll} | 132 +- .../CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll | 380 ----- ...vt-rtz-xu-f-rv64.ll => vfwcvt-rtz-xu-f.ll} | 132 +- .../test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll | 380 ----- .../rvv/{vfwcvt-x-f-rv32.ll => vfwcvt-x-f.ll} | 132 +- .../CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll | 380 ----- .../{vfwcvt-xu-f-rv32.ll => vfwcvt-xu-f.ll} | 132 +- llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll | 830 ---------- .../RISCV/rvv/{vfwmacc-rv32.ll => vfwmacc.ll} | 222 +-- llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll | 830 ---------- .../RISCV/rvv/{vfwmsac-rv32.ll => vfwmsac.ll} | 222 +-- llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll | 830 ---------- .../RISCV/rvv/{vfwmul-rv64.ll => vfwmul.ll} | 258 ++-- llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll | 830 ---------- .../rvv/{vfwnmacc-rv32.ll => vfwnmacc.ll} | 222 +-- llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll | 830 ---------- .../rvv/{vfwnmsac-rv32.ll => vfwnmsac.ll} | 222 +-- .../test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll | 508 ------ .../rvv/{vfwredosum-rv32.ll => vfwredosum.ll} | 138 +- .../test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll | 508 ------ .../rvv/{vfwredusum-rv32.ll => vfwredusum.ll} | 138 +- llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll | 830 ---------- .../RISCV/rvv/{vfwsub-rv32.ll => vfwsub.ll} | 258 ++-- llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll | 1248 --------------- .../rvv/{vfwsub.w-rv64.ll => vfwsub.w.ll} | 362 ++--- 128 files changed, 7848 insertions(+), 58637 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfadd-rv32.ll => vfadd.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfclass-rv32.ll => vfclass.ll} (91%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-f-x-rv32.ll => vfcvt-f-x.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-f-xu-rv32.ll => vfcvt-f-xu.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-rtz-x-f-rv64.ll => vfcvt-rtz-x-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-rtz-xu-f-rv32.ll => vfcvt-rtz-xu-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-x-f-rv32.ll => vfcvt-x-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-xu-f-rv32.ll => vfcvt-xu-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfdiv-rv32.ll => vfdiv.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmacc-rv64.ll => vfmacc.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmadd-rv32.ll => vfmadd.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmax-rv64.ll => vfmax.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmerge-rv64.ll => vfmerge.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmin-rv64.ll => vfmin.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmsac-rv32.ll => vfmsac.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmsub-rv32.ll => vfmsub.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmul-rv32.ll => vfmul.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmv.s.f-rv32.ll => vfmv.s.f.ll} (74%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfmv.v.f-rv32.ll => vfmv.v.f.ll} (82%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-f-f-rv32.ll => vfncvt-f-f.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-f-x-rv32.ll => vfncvt-f-x.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-f-xu-rv32.ll => vfncvt-f-xu.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-rod-f-f-rv32.ll => vfncvt-rod-f-f.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-rtz-x-f-rv32.ll => vfncvt-rtz-x-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-rtz-xu-f-rv32.ll => vfncvt-rtz-xu-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-x-f-rv32.ll => vfncvt-x-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-xu-f-rv32.ll => vfncvt-xu-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfnmacc-rv32.ll => vfnmacc.ll} (90%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfnmadd-rv32.ll => vfnmadd.ll} (90%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfnmsac-rv32.ll => vfnmsac.ll} (90%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfnmsub-rv64.ll => vfnmsub.ll} (90%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfrdiv-rv32.ll => vfrdiv.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfrec7-rv32.ll => vfrec7.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfredmax-rv32.ll => vfredmax.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfredmin-rv32.ll => vfredmin.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfredosum-rv32.ll => vfredosum.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfredusum-rv32.ll => vfredusum.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfrsqrt7-rv32.ll => vfrsqrt7.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfrsub-rv32.ll => vfrsub.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfsgnj-rv32.ll => vfsgnj.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfsgnjn-rv32.ll => vfsgnjn.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfsgnjx-rv32.ll => vfsgnjx.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfslide1down-rv32.ll => vfslide1down.ll} (85%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfslide1up-rv32.ll => vfslide1up.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfsqrt-rv64.ll => vfsqrt.ll} (80%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfsub-rv32.ll => vfsub.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwadd-rv32.ll => vfwadd.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwadd.w-rv32.ll => vfwadd.w.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-f-f-rv32.ll => vfwcvt-f-f.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-f-x-rv32.ll => vfwcvt-f-x.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-f-xu-rv32.ll => vfwcvt-f-xu.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-rtz-x-f-rv32.ll => vfwcvt-rtz-x-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-rtz-xu-f-rv64.ll => vfwcvt-rtz-xu-f.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-x-f-rv32.ll => vfwcvt-x-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-xu-f-rv32.ll => vfwcvt-xu-f.ll} (87%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwmacc-rv32.ll => vfwmacc.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwmsac-rv32.ll => vfwmsac.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwmul-rv64.ll => vfwmul.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwnmacc-rv32.ll => vfwnmacc.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwnmsac-rv32.ll => vfwnmsac.ll} (89%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwredosum-rv32.ll => vfwredosum.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwredusum-rv32.ll => vfwredusum.ll} (88%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwsub-rv32.ll => vfwsub.ll} (86%) delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll rename llvm/test/CodeGen/RISCV/rvv/{vfwsub.w-rv64.ll => vfwsub.w.ll} (88%) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll deleted file mode 100644 index b91d86befbc87..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll +++ /dev/null @@ -1,1356 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ -; RUN: -mattr=+d -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv1f16.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv1f16.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv2f16.nxv2f16( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv2f16.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv2f16.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv4f16.nxv4f16( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv4f16.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv4f16.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv8f16.nxv8f16( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv8f16.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv8f16.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv16f16.nxv16f16( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv16f16.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv16f16.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv32f16.nxv32f16( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv32f16.nxv32f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv32f16.nxv32f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv32f16.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv1f32.nxv1f32( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv1f32.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv1f32.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv2f32.nxv2f32( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv2f32.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv2f32.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv4f32.nxv4f32( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv4f32.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv8f32.nxv8f32( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv8f32.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv8f32.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv16f32.nxv16f32( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv16f32.nxv16f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv16f32.nxv16f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv16f32.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv1f64.nxv1f64( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv1f64.nxv1f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv2f64.nxv2f64( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv2f64.nxv2f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv2f64.nxv2f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv4f64.nxv4f64( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv4f64.nxv4f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv8f64.nxv8f64( - , - , - i64); - -define @intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv8f64.nxv8f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv8f64.nxv8f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv8f64.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfadd.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfadd.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfadd.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfadd.ll index 5df1881bffb23..041580b2b49d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfadd.mask.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfadd.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfadd.mask.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfadd.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfadd.mask.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfadd.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfadd.mask.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfadd.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfadd.mask.nxv16f16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfadd.nxv32f16.nxv32f16( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv32f16.nxv32f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfadd.mask.nxv32f16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfadd.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfadd.mask.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfadd.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfadd.mask.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfadd.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfadd.mask.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfadd.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfadd.mask.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfadd.nxv16f32.nxv16f32( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv16f32.nxv16f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfadd.mask.nxv16f32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfadd.nxv1f64.nxv1f64( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfadd.nxv2f64.nxv2f64( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv2f64.nxv2f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfadd.mask.nxv2f64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfadd.nxv4f64.nxv4f64( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv4f64.nxv4f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfadd.mask.nxv4f64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfadd.nxv8f64.nxv8f64( , , - i32); + iXLen); -define @intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv8f64.nxv8f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfadd.mask.nxv8f64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfadd.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfadd.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfadd.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfadd.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfadd.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfadd.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfadd.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfadd.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfadd.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfadd.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfadd.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfadd.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfadd.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfadd.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfadd.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfadd.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfadd.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfadd.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfadd.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfadd.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfadd.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfadd.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfadd.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfadd.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfadd.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfadd.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfadd.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfadd.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfadd.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfadd.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfadd.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll deleted file mode 100644 index c86e1f334712a..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll +++ /dev/null @@ -1,692 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfclass.nxv1i16( - , - i64); - -define @intrinsic_vfclass_v_nxv1i16_nxv1f16( -; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv1i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv1i16( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv1i16_nxv1f16( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfclass.v v8, v9, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv1i16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv2i16( - , - i64); - -define @intrinsic_vfclass_v_nxv2i16_nxv2f16( -; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv2i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv2i16( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv2i16_nxv2f16( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfclass.v v8, v9, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv2i16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv4i16( - , - i64); - -define @intrinsic_vfclass_v_nxv4i16_nxv4f16( -; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv4i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv4i16( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv4i16_nxv4f16( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfclass.v v8, v9, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv4i16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv8i16( - , - i64); - -define @intrinsic_vfclass_v_nxv8i16_nxv8f16( -; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv8i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv8i16( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv8i16_nxv8f16( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfclass.v v8, v10, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv8i16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv16i16( - , - i64); - -define @intrinsic_vfclass_v_nxv16i16_nxv16f16( -; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv16i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv16i16( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv16i16_nxv16f16( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfclass.v v8, v12, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv16i16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv32i16( - , - i64); - -define @intrinsic_vfclass_v_nxv32i16_nxv32f16( -; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv32i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv32i16( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv32i16_nxv32f16( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfclass.v v8, v16, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv32i16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv1i32( - , - i64); - -define @intrinsic_vfclass_v_nxv1i32_nxv1f32( -; CHECK-LABEL: intrinsic_vfclass_v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv1i32( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv1i32_nxv1f32( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfclass.v v8, v9, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv1i32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv2i32( - , - i64); - -define @intrinsic_vfclass_v_nxv2i32_nxv2f32( -; CHECK-LABEL: intrinsic_vfclass_v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv2i32( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv2i32_nxv2f32( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfclass.v v8, v9, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv2i32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv4i32( - , - i64); - -define @intrinsic_vfclass_v_nxv4i32_nxv4f32( -; CHECK-LABEL: intrinsic_vfclass_v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv4i32( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv4i32_nxv4f32( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfclass.v v8, v10, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv4i32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv8i32( - , - i64); - -define @intrinsic_vfclass_v_nxv8i32_nxv8f32( -; CHECK-LABEL: intrinsic_vfclass_v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv8i32( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv8i32_nxv8f32( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfclass.v v8, v12, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv8i32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv16i32( - , - i64); - -define @intrinsic_vfclass_v_nxv16i32_nxv16f32( -; CHECK-LABEL: intrinsic_vfclass_v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv16i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv16i32( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv16i32_nxv16f32( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfclass.v v8, v16, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv16i32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv1i64( - , - i64); - -define @intrinsic_vfclass_v_nxv1i64_nxv1f64( -; CHECK-LABEL: intrinsic_vfclass_v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv1i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv1i64( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv1i64_nxv1f64( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfclass.v v8, v9, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv1i64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv2i64( - , - i64); - -define @intrinsic_vfclass_v_nxv2i64_nxv2f64( -; CHECK-LABEL: intrinsic_vfclass_v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv2i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv2i64( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv2i64_nxv2f64( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfclass.v v8, v10, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv2i64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv4i64( - , - i64); - -define @intrinsic_vfclass_v_nxv4i64_nxv4f64( -; CHECK-LABEL: intrinsic_vfclass_v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv4i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv4i64( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv4i64_nxv4f64( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfclass.v v8, v12, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv4i64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfclass.nxv8i64( - , - i64); - -define @intrinsic_vfclass_v_nxv8i64_nxv8f64( -; CHECK-LABEL: intrinsic_vfclass_v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfclass.v v8, v8 -; CHECK-NEXT: ret - %0, - i64 %1) nounwind { -entry: - %a = call @llvm.riscv.vfclass.nxv8i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfclass.mask.nxv8i64( - , - , - , - i64); - -define @intrinsic_vfclass_mask_v_nxv8i64_nxv8f64( -; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfclass.v v8, v16, v0.t -; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { -entry: - %a = call @llvm.riscv.vfclass.mask.nxv8i64( - %0, - %1, - %2, - i64 %3) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass.ll similarity index 91% rename from llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfclass.ll index ae9df2aefa4d9..e6aa398310956 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfclass.ll @@ -1,9 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfclass.nxv1i16( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv1i16_nxv1f16( ; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1f16: @@ -12,11 +14,11 @@ define @intrinsic_vfclass_v_nxv1i16_nxv1f16( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv1i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -25,7 +27,7 @@ declare @llvm.riscv.vfclass.mask.nxv1i16( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv1i16_nxv1f16( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1f16: @@ -36,20 +38,20 @@ define @intrinsic_vfclass_mask_v_nxv1i16_nxv1f16( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv1i16( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv2i16( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv2i16_nxv2f16( ; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2f16: @@ -58,11 +60,11 @@ define @intrinsic_vfclass_v_nxv2i16_nxv2f16( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv2i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -71,7 +73,7 @@ declare @llvm.riscv.vfclass.mask.nxv2i16( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv2i16_nxv2f16( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2f16: @@ -82,20 +84,20 @@ define @intrinsic_vfclass_mask_v_nxv2i16_nxv2f16( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv2i16( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv4i16( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv4i16_nxv4f16( ; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4f16: @@ -104,11 +106,11 @@ define @intrinsic_vfclass_v_nxv4i16_nxv4f16( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv4i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -117,7 +119,7 @@ declare @llvm.riscv.vfclass.mask.nxv4i16( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv4i16_nxv4f16( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4f16: @@ -128,20 +130,20 @@ define @intrinsic_vfclass_mask_v_nxv4i16_nxv4f16( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv4i16( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv8i16( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv8i16_nxv8f16( ; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8f16: @@ -150,11 +152,11 @@ define @intrinsic_vfclass_v_nxv8i16_nxv8f16( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv8i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -163,7 +165,7 @@ declare @llvm.riscv.vfclass.mask.nxv8i16( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv8i16_nxv8f16( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8f16: @@ -174,20 +176,20 @@ define @intrinsic_vfclass_mask_v_nxv8i16_nxv8f16( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv8i16( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv16i16( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv16i16_nxv16f16( ; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16f16: @@ -196,11 +198,11 @@ define @intrinsic_vfclass_v_nxv16i16_nxv16f16( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv16i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -209,7 +211,7 @@ declare @llvm.riscv.vfclass.mask.nxv16i16( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv16i16_nxv16f16( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16f16: @@ -220,20 +222,20 @@ define @intrinsic_vfclass_mask_v_nxv16i16_nxv16f16( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv16i16( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv32i16( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv32i16_nxv32f16( ; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32f16: @@ -242,11 +244,11 @@ define @intrinsic_vfclass_v_nxv32i16_nxv32f16( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv32i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -255,7 +257,7 @@ declare @llvm.riscv.vfclass.mask.nxv32i16( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv32i16_nxv32f16( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32f16: @@ -266,20 +268,20 @@ define @intrinsic_vfclass_mask_v_nxv32i16_nxv32f16( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv32i16( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv1i32( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv1i32_nxv1f32( ; CHECK-LABEL: intrinsic_vfclass_v_nxv1i32_nxv1f32: @@ -288,11 +290,11 @@ define @intrinsic_vfclass_v_nxv1i32_nxv1f32( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -301,7 +303,7 @@ declare @llvm.riscv.vfclass.mask.nxv1i32( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv1i32_nxv1f32( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i32_nxv1f32: @@ -312,20 +314,20 @@ define @intrinsic_vfclass_mask_v_nxv1i32_nxv1f32( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv1i32( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv2i32( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv2i32_nxv2f32( ; CHECK-LABEL: intrinsic_vfclass_v_nxv2i32_nxv2f32: @@ -334,11 +336,11 @@ define @intrinsic_vfclass_v_nxv2i32_nxv2f32( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -347,7 +349,7 @@ declare @llvm.riscv.vfclass.mask.nxv2i32( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv2i32_nxv2f32( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i32_nxv2f32: @@ -358,20 +360,20 @@ define @intrinsic_vfclass_mask_v_nxv2i32_nxv2f32( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv2i32( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv4i32( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv4i32_nxv4f32( ; CHECK-LABEL: intrinsic_vfclass_v_nxv4i32_nxv4f32: @@ -380,11 +382,11 @@ define @intrinsic_vfclass_v_nxv4i32_nxv4f32( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -393,7 +395,7 @@ declare @llvm.riscv.vfclass.mask.nxv4i32( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv4i32_nxv4f32( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i32_nxv4f32: @@ -404,20 +406,20 @@ define @intrinsic_vfclass_mask_v_nxv4i32_nxv4f32( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv4i32( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv8i32( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv8i32_nxv8f32( ; CHECK-LABEL: intrinsic_vfclass_v_nxv8i32_nxv8f32: @@ -426,11 +428,11 @@ define @intrinsic_vfclass_v_nxv8i32_nxv8f32( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -439,7 +441,7 @@ declare @llvm.riscv.vfclass.mask.nxv8i32( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv8i32_nxv8f32( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i32_nxv8f32: @@ -450,20 +452,20 @@ define @intrinsic_vfclass_mask_v_nxv8i32_nxv8f32( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv8i32( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv16i32( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv16i32_nxv16f32( ; CHECK-LABEL: intrinsic_vfclass_v_nxv16i32_nxv16f32: @@ -472,11 +474,11 @@ define @intrinsic_vfclass_v_nxv16i32_nxv16f32( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv16i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -485,7 +487,7 @@ declare @llvm.riscv.vfclass.mask.nxv16i32( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv16i32_nxv16f32( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i32_nxv16f32: @@ -496,20 +498,20 @@ define @intrinsic_vfclass_mask_v_nxv16i32_nxv16f32( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv16i32( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv1i64( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv1i64_nxv1f64( ; CHECK-LABEL: intrinsic_vfclass_v_nxv1i64_nxv1f64: @@ -518,11 +520,11 @@ define @intrinsic_vfclass_v_nxv1i64_nxv1f64( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv1i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -531,7 +533,7 @@ declare @llvm.riscv.vfclass.mask.nxv1i64( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv1i64_nxv1f64( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i64_nxv1f64: @@ -542,20 +544,20 @@ define @intrinsic_vfclass_mask_v_nxv1i64_nxv1f64( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv1i64( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv2i64( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv2i64_nxv2f64( ; CHECK-LABEL: intrinsic_vfclass_v_nxv2i64_nxv2f64: @@ -564,11 +566,11 @@ define @intrinsic_vfclass_v_nxv2i64_nxv2f64( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv2i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -577,7 +579,7 @@ declare @llvm.riscv.vfclass.mask.nxv2i64( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv2i64_nxv2f64( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i64_nxv2f64: @@ -588,20 +590,20 @@ define @intrinsic_vfclass_mask_v_nxv2i64_nxv2f64( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv2i64( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv4i64( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv4i64_nxv4f64( ; CHECK-LABEL: intrinsic_vfclass_v_nxv4i64_nxv4f64: @@ -610,11 +612,11 @@ define @intrinsic_vfclass_v_nxv4i64_nxv4f64( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv4i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -623,7 +625,7 @@ declare @llvm.riscv.vfclass.mask.nxv4i64( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv4i64_nxv4f64( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i64_nxv4f64: @@ -634,20 +636,20 @@ define @intrinsic_vfclass_mask_v_nxv4i64_nxv4f64( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv4i64( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } declare @llvm.riscv.vfclass.nxv8i64( , - i32); + iXLen); define @intrinsic_vfclass_v_nxv8i64_nxv8f64( ; CHECK-LABEL: intrinsic_vfclass_v_nxv8i64_nxv8f64: @@ -656,11 +658,11 @@ define @intrinsic_vfclass_v_nxv8i64_nxv8f64( ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: ret %0, - i32 %1) nounwind { + iXLen %1) nounwind { entry: %a = call @llvm.riscv.vfclass.nxv8i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -669,7 +671,7 @@ declare @llvm.riscv.vfclass.mask.nxv8i64( , , , - i32); + iXLen); define @intrinsic_vfclass_mask_v_nxv8i64_nxv8f64( ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i64_nxv8f64: @@ -680,13 +682,13 @@ define @intrinsic_vfclass_mask_v_nxv8i64_nxv8f64( %0, %1, %2, - i32 %3) nounwind { + iXLen %3) nounwind { entry: %a = call @llvm.riscv.vfclass.mask.nxv8i64( %0, %1, %2, - i32 %3) + iXLen %3) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll deleted file mode 100644 index 65270dc06336d..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv1f16.nxv1i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv1f16.nxv1i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv2f16.nxv2i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv2f16.nxv2i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv4f16.nxv4i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv4f16.nxv4i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv8f16.nxv8i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv8f16.nxv8i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv16f16.nxv16i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv16f16.nxv16i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv32f16.nxv32i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv32f16.nxv32i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv1f32.nxv1i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv1f32.nxv1i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv2f32.nxv2i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv2f32.nxv2i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv4f32.nxv4i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv4f32.nxv4i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv8f32.nxv8i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv8f32.nxv8i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv16f32.nxv16i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv16f32.nxv16i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv1f64.nxv1i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv1f64.nxv1i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv2f64.nxv2i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv2f64.nxv2i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv4f64.nxv4i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv4f64.nxv4i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64( - , - i64); - -define @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.x.v.mask.nxv8f64.nxv8i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.f.x.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.x.v.mask.nxv8f64.nxv8i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x.ll index 5549960bb7736..e8d6257f1e652 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16( @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv1f16.nxv1i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16( @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv2f16.nxv2i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16( @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv4f16.nxv4i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16( @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv8f16.nxv8i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16( @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv16f16.nxv16i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16( @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv32f16.nxv32i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32( @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv1f32.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32( @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv2f32.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32( @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv4f32.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32( @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv8f32.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32( @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv16f32.nxv16i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64( @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv1f64.nxv1i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64( @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv2f64.nxv2i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64( @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv4f64.nxv4i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64( @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfcvt.f.x.v.mask.nxv8f64.nxv8i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll deleted file mode 100644 index 6fc87d15dac18..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f16.nxv1i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f16.nxv1i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f16.nxv2i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f16.nxv2i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f16.nxv4i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f16.nxv4i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f16.nxv8i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f16.nxv8i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f16.nxv16i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f16.nxv16i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv32f16.nxv32i16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv32f16.nxv32i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f32.nxv1i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f32.nxv1i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f32.nxv2i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f32.nxv2i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f32.nxv4i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f32.nxv4i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f32.nxv8i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f32.nxv8i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f32.nxv16i32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f32.nxv16i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f64.nxv1i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f64.nxv1i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f64.nxv2i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f64.nxv2i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f64.nxv4i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f64.nxv4i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64( - , - i64); - -define @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f64.nxv8i64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.f.xu.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f64.nxv8i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu.ll index 1c8c2a80c90db..82ec8ca7cc74d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16( @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f16.nxv1i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16( @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f16.nxv2i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16( @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f16.nxv4i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16( @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f16.nxv8i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16( @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f16.nxv16i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16( @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv32f16.nxv32i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32( @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f32.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32( @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f32.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32( @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f32.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32( @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f32.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32( @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f32.nxv16i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64( @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f64.nxv1i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64( @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f64.nxv2i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64( @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f64.nxv4i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64( , - i32); + iXLen); -define @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64( @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f64.nxv8i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll deleted file mode 100644 index 75c0a7ff62a47..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i16.nxv1f16( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i16.nxv1f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i16.nxv2f16( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i16.nxv2f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i16.nxv4f16( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i16.nxv4f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i16.nxv8f16( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i16.nxv8f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i16.nxv16f16( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i16.nxv16f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv32i16.nxv32f16( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv32i16.nxv32f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i32.nxv1f32( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i32.nxv1f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i32.nxv2f32( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i32.nxv2f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i32.nxv4f32( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i32.nxv4f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i32.nxv8f32( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i32.nxv8f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i32.nxv16f32( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i32.nxv16f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i64.nxv1f64( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i64.nxv1f64( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i64.nxv2f64( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i64.nxv2f64( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i64.nxv4f64( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i64.nxv4f64( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64( - , - i32); - -define @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i64.nxv8f64( - , - , - , - i32, - i32); - -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i64.nxv8f64( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f.ll index 9c3e2a03f1141..0dbc0f221e7e6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16( @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i16.nxv1f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16( @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i16.nxv2f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16( @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i16.nxv4f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16( @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i16.nxv8f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16( @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i16.nxv16f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16( @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv32i16.nxv32f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32( @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i32.nxv1f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32( @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i32.nxv2f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32( @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i32.nxv4f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32( @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i32.nxv8f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32( @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i32.nxv16f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64( @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i64.nxv1f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64( @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i64.nxv2f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64( @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i64.nxv4f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64( , - i64); + iXLen); -define @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64( %0, i64 %1) nounwind { +define @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64( @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i64.nxv8f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll deleted file mode 100644 index 3a309eea35dbd..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i16.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i16.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i16.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i16.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i16.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i16.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i16.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i16.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i16.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i16.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv32i16.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv32i16.nxv32f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64( - , - i64); - -define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f.ll index 966a5d6f85a0f..457a93587ec2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv32i16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64( @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll deleted file mode 100644 index f5984a512e0fe..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv1i16.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv1i16.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv2i16.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv2i16.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv4i16.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv4i16.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv8i16.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv8i16.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv16i16.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv16i16.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv32i16.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv32i16.nxv32f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv1i32.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv1i32.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv2i32.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv2i32.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv4i32.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv4i32.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv8i32.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv8i32.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv16i32.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv16i32.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv1i64.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv1i64.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv2i64.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv2i64.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv4i64.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv4i64.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64( - , - i64); - -define @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.x.f.v.mask.nxv8i64.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.x.f.v.mask.nxv8i64.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll index 26632717dfa9f..88205bb75ce3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16( @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv1i16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16( @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv2i16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16( @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv4i16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16( @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv8i16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16( @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv16i16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16( @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv32i16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32( @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv1i32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32( @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv2i32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32( @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv4i32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32( @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv8i32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32( @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv16i32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64( @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv1i64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64( @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv2i64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64( @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv4i64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64( @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfcvt.x.f.v.mask.nxv8i64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll deleted file mode 100644 index 8fb44d2d1ecf0..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i16.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i16.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i16.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i16.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i16.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i16.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i16.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i16.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i16.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i16.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv32i16.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv32i16.nxv32f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i32.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i32.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i32.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i32.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i32.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i32.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i32.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i32.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i32.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i32.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i64.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i64.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i64.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i64.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i64.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i64.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64( - , - i64); - -define @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i64.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfcvt.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i64.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll index e76b0db05446e..fb8a4797f0ae8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16( @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16( @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16( @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16( @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16( @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16( @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv32i16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32( @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32( @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32( @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32( @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32( @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64( @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64( @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64( @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64( @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll deleted file mode 100644 index 2d4a16e1bf4e8..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll +++ /dev/null @@ -1,1355 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfdiv.nxv1f16( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv2f16( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv4f16( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv8f16( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv16f16( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv32f16( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv32f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv32f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv1f32( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv2f32( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv4f32( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv8f32( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv16f32( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv16f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv16f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv1f64( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv1f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv1f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv2f64( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv2f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv2f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv4f64( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv4f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv4f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv8f64( - , - , - i64); - -define @intrinsic_vfdiv_vv_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv8f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv8f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfdiv_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfdiv_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfdiv_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfdiv_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfdiv_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfdiv_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfdiv_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfdiv_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfdiv_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfdiv_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfdiv_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfdiv_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfdiv_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfdiv_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfdiv.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfdiv_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfdiv.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfdiv_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfdiv.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfdiv.ll index 01bfb50ed9b49..0145f2ad764ec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfdiv.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfdiv.mask.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfdiv.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfdiv.mask.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfdiv.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfdiv.mask.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfdiv.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfdiv.mask.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfdiv.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfdiv.mask.nxv16f16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfdiv.nxv32f16.nxv32f16( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv32f16.nxv32f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfdiv.mask.nxv32f16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfdiv.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfdiv.mask.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfdiv.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfdiv.mask.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfdiv.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfdiv.mask.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfdiv.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfdiv.mask.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfdiv.nxv16f32.nxv16f32( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv16f32.nxv16f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfdiv.mask.nxv16f32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfdiv.nxv1f64.nxv1f64( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv1f64.nxv1f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfdiv.mask.nxv1f64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfdiv.nxv2f64.nxv2f64( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv2f64.nxv2f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfdiv.mask.nxv2f64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfdiv.nxv4f64.nxv4f64( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv4f64.nxv4f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfdiv.mask.nxv4f64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfdiv.nxv8f64.nxv8f64( , , - i32); + iXLen); -define @intrinsic_vfdiv_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv8f64.nxv8f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfdiv.mask.nxv8f64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfdiv.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfdiv.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfdiv.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfdiv.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfdiv.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfdiv.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfdiv.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfdiv.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfdiv.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfdiv.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfdiv.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfdiv.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfdiv.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfdiv.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfdiv.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfdiv.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfdiv.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfdiv.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfdiv.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfdiv.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfdiv.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfdiv.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfdiv.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfdiv.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfdiv.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfdiv.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfdiv.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfdiv.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfdiv.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfdiv.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfdiv.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll deleted file mode 100644 index 16d305bad846c..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s -declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv1f16.nxv1f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv2f16.nxv2f16( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv2f16.nxv2f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv4f16.nxv4f16( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv4f16.nxv4f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv8f16.nxv8f16( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv8f16.nxv8f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv16f16.nxv16f16( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv16f16.nxv16f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv1f32.nxv1f32( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv1f32.nxv1f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv2f32.nxv2f32( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv2f32.nxv2f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv4f32.nxv4f32( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv4f32.nxv4f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv8f32.nxv8f32( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv8f32.nxv8f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv1f64.nxv1f64( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv1f64.nxv1f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv2f64.nxv2f64( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv2f64.nxv2f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv4f64.nxv4f64( - , - , - , - i32); - -define @intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv4f64.nxv4f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64( - , - , - , - , - i32); - -define @intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv1f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv1f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv1f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv2f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv2f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv2f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv4f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv4f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv4f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv8f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv8f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv8f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv16f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv16f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv16f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv1f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv1f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv1f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv2f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv2f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv2f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv4f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv4f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv4f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv8f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv8f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv8f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv1f64.f64( - , - double, - , - i32); - -define @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv1f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv1f64.f64( - , - double, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv2f64.f64( - , - double, - , - i32); - -define @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv2f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv2f64.f64( - , - double, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfmacc.nxv4f64.f64( - , - double, - , - i32); - -define @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.nxv4f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmacc.mask.nxv4f64.f64( - , - double, - , - , - i32); - -define @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmacc.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i32 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmacc.ll index c5809888ff17b..5115a7548e2ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmacc.nxv1f16.nxv1f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfmacc.nxv2f16.nxv2f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfmacc.nxv4f16.nxv4f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfmacc.nxv8f16.nxv8f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfmacc.nxv16f16.nxv16f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfmacc.nxv1f32.nxv1f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfmacc.nxv2f32.nxv2f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfmacc.nxv4f32.nxv4f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfmacc.nxv8f32.nxv8f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfmacc.nxv1f64.nxv1f64( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfmacc.nxv2f64.nxv2f64( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfmacc.nxv4f64.nxv4f64( , , , - i64); + iXLen); -define @intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64( , , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfmacc.nxv1f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfmacc.mask.nxv1f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfmacc.nxv2f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfmacc.mask.nxv2f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfmacc.nxv4f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfmacc.mask.nxv4f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfmacc.nxv8f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfmacc.mask.nxv8f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfmacc.nxv16f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfmacc.mask.nxv16f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfmacc.nxv1f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfmacc.mask.nxv1f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfmacc.nxv2f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfmacc.mask.nxv2f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfmacc.nxv4f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfmacc.mask.nxv4f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfmacc.nxv8f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfmacc.mask.nxv8f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfmacc.nxv1f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfmacc.mask.nxv1f64.f64( double, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfmacc.nxv2f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfmacc.mask.nxv2f64.f64( double, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfmacc.nxv4f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfmacc.mask.nxv4f64.f64( double, , , - i64); + iXLen); -define @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i64 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll deleted file mode 100644 index afd41bd8a2122..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv1f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv2f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv2f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv8f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv8f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv16f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv16f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv1f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv1f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv4f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv4f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv8f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv8f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv2f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv2f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv4f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv4f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv1f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv1f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv1f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv2f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv2f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv2f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv4f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv4f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv4f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv8f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv8f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv8f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv16f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv16f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv16f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv1f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv1f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv1f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv2f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv2f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv2f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv4f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv4f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv4f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv8f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv8f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv8f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv1f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv1f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv1f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv2f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv2f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv2f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmadd.nxv4f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.nxv4f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmadd.mask.nxv4f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmadd.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmadd.ll index cfb32cfab4cdc..9313e440e500f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmadd.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfmadd.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfmadd.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfmadd.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfmadd.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfmadd.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfmadd.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfmadd.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfmadd.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfmadd.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfmadd.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfmadd.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfmadd.nxv1f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfmadd.mask.nxv1f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfmadd.nxv2f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfmadd.mask.nxv2f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfmadd.nxv4f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfmadd.mask.nxv4f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfmadd.nxv8f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfmadd.mask.nxv8f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfmadd.nxv16f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfmadd.mask.nxv16f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfmadd.nxv1f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfmadd.mask.nxv1f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfmadd.nxv2f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfmadd.mask.nxv2f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfmadd.nxv4f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfmadd.mask.nxv4f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfmadd.nxv8f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfmadd.mask.nxv8f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfmadd.nxv1f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfmadd.mask.nxv1f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfmadd.nxv2f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfmadd.mask.nxv2f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfmadd.nxv4f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfmadd.mask.nxv4f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll deleted file mode 100644 index 98b4cf71da14d..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll +++ /dev/null @@ -1,1355 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s -declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv1f16.nxv1f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv2f16.nxv2f16( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv2f16.nxv2f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv4f16.nxv4f16( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv4f16.nxv4f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv8f16.nxv8f16( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv8f16.nxv8f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv16f16.nxv16f16( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv16f16.nxv16f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv32f16.nxv32f16( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv32f16.nxv32f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv1f32.nxv1f32( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv1f32.nxv1f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv2f32.nxv2f32( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv2f32.nxv2f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv4f32.nxv4f32( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv4f32.nxv4f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv8f32.nxv8f32( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv8f32.nxv8f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv16f32.nxv16f32( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv16f32.nxv16f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv1f64.nxv1f64( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv1f64.nxv1f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv2f64.nxv2f64( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv2f64.nxv2f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv4f64.nxv4f64( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv4f64.nxv4f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv8f64.nxv8f64( - , - , - i32); - -define @intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv8f64.nxv8f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv1f16.f16( - , - half, - i32); - -define @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv1f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv1f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv2f16.f16( - , - half, - i32); - -define @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv2f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv2f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv4f16.f16( - , - half, - i32); - -define @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv4f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv4f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv8f16.f16( - , - half, - i32); - -define @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv8f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv8f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv16f16.f16( - , - half, - i32); - -define @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv16f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv16f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv32f16.f16( - , - half, - i32); - -define @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv32f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv32f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv1f32.f32( - , - float, - i32); - -define @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv1f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv1f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv2f32.f32( - , - float, - i32); - -define @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv2f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv2f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv4f32.f32( - , - float, - i32); - -define @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv4f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv4f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv8f32.f32( - , - float, - i32); - -define @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv8f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv8f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv16f32.f32( - , - float, - i32); - -define @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv16f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv16f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv1f64.f64( - , - double, - i32); - -define @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv1f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv1f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv2f64.f64( - , - double, - i32); - -define @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv2f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv2f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv4f64.f64( - , - double, - i32); - -define @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv4f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv4f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmax.nxv8f64.f64( - , - double, - i32); - -define @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.nxv8f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmax.mask.nxv8f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmax.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmax.ll index 4fc7319fb2b29..446981928b6cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv1f16.nxv1f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfmax.nxv2f16.nxv2f16( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv2f16.nxv2f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfmax.nxv4f16.nxv4f16( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv4f16.nxv4f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfmax.nxv8f16.nxv8f16( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv8f16.nxv8f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfmax.nxv16f16.nxv16f16( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv16f16.nxv16f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfmax.nxv32f16.nxv32f16( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv32f16.nxv32f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfmax.nxv1f32.nxv1f32( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv1f32.nxv1f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfmax.nxv2f32.nxv2f32( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv2f32.nxv2f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfmax.nxv4f32.nxv4f32( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv4f32.nxv4f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfmax.nxv8f32.nxv8f32( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv8f32.nxv8f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfmax.nxv16f32.nxv16f32( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv16f32.nxv16f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfmax.nxv1f64.nxv1f64( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv1f64.nxv1f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfmax.nxv2f64.nxv2f64( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv2f64.nxv2f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfmax.nxv4f64.nxv4f64( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv4f64.nxv4f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfmax.nxv8f64.nxv8f64( , , - i64); + iXLen); -define @intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv8f64.nxv8f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfmax.nxv1f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv1f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfmax.mask.nxv1f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfmax.nxv2f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv2f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfmax.mask.nxv2f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfmax.nxv4f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv4f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfmax.mask.nxv4f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfmax.nxv8f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv8f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfmax.mask.nxv8f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfmax.nxv16f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv16f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfmax.mask.nxv16f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfmax.nxv32f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv32f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfmax.mask.nxv32f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfmax.nxv1f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv1f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfmax.mask.nxv1f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfmax.nxv2f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv2f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfmax.mask.nxv2f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfmax.nxv4f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv4f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfmax.mask.nxv4f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfmax.nxv8f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv8f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfmax.mask.nxv8f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfmax.nxv16f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv16f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfmax.mask.nxv16f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfmax.nxv1f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv1f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfmax.mask.nxv1f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfmax.nxv2f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv2f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfmax.mask.nxv2f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfmax.nxv4f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv4f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfmax.mask.nxv4f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfmax.nxv8f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfmax.nxv8f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfmax.mask.nxv8f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll deleted file mode 100644 index 3dc1240d0f7b6..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll +++ /dev/null @@ -1,902 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s -declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f16.nxv1f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv1f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv2f16.nxv2f16( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f16.nxv2f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv2f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv4f16.nxv4f16( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f16.nxv4f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv4f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv8f16.nxv8f16( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f16.nxv8f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv8f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv16f16.nxv16f16( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv16f16.nxv16f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv16f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv16f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv32f16.nxv32f16( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv32f16.nxv32f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv32f16.f16( - , - half, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv32f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv1f32.nxv1f32( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f32.nxv1f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv1f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv2f32.nxv2f32( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f32.nxv2f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv2f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv4f32.nxv4f32( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f32.nxv4f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv4f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv8f32.nxv8f32( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f32.nxv8f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv8f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv16f32.nxv16f32( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv16f32.nxv16f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv16f32.f32( - , - float, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv16f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv1f64.nxv1f64( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f64.nxv1f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv1f64.f64( - , - double, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv2f64.nxv2f64( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f64.nxv2f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv2f64.f64( - , - double, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv4f64.nxv4f64( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f64.nxv4f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv4f64.f64( - , - double, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv8f64.nxv8f64( - , - , - , - i32); - -define @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f64.nxv8f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfmerge.nxv8f64.f64( - , - double, - , - i32); - -define @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f16.f16( - %0, - half zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f16.f16( - %0, - half zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f16.f16( - %0, - half zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f16.f16( - %0, - half zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv16f16.f16( - %0, - half zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv32f16.f16( - %0, - half zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f32.f32( - %0, - float zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f32.f32( - %0, - float zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f32.f32( - %0, - float zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f32.f32( - %0, - float zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv16f32.f32( - %0, - float zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv1f64.f64( - %0, - double zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv2f64.f64( - %0, - double zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv4f64.f64( - %0, - double zeroinitializer, - %1, - i32 %2) - - ret %a -} - -define @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmerge.nxv8f64.f64( - %0, - double zeroinitializer, - %1, - i32 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmerge.ll index b23d908c7edda..eb3efd1fa0373 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmerge.nxv1f16.nxv1f16( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -27,9 +29,9 @@ declare @llvm.riscv.vfmerge.nxv1f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -40,7 +42,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -49,9 +51,9 @@ declare @llvm.riscv.vfmerge.nxv2f16.nxv2f16( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -62,7 +64,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -71,9 +73,9 @@ declare @llvm.riscv.vfmerge.nxv2f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -84,7 +86,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -93,9 +95,9 @@ declare @llvm.riscv.vfmerge.nxv4f16.nxv4f16( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -115,9 +117,9 @@ declare @llvm.riscv.vfmerge.nxv4f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -128,7 +130,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -137,9 +139,9 @@ declare @llvm.riscv.vfmerge.nxv8f16.nxv8f16( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -150,7 +152,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -159,9 +161,9 @@ declare @llvm.riscv.vfmerge.nxv8f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -172,7 +174,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -181,9 +183,9 @@ declare @llvm.riscv.vfmerge.nxv16f16.nxv16f16( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -194,7 +196,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -203,9 +205,9 @@ declare @llvm.riscv.vfmerge.nxv16f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -216,7 +218,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -225,9 +227,9 @@ declare @llvm.riscv.vfmerge.nxv32f16.nxv32f16( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -238,7 +240,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -247,9 +249,9 @@ declare @llvm.riscv.vfmerge.nxv32f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -260,7 +262,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -269,9 +271,9 @@ declare @llvm.riscv.vfmerge.nxv1f32.nxv1f32( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -282,7 +284,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -291,9 +293,9 @@ declare @llvm.riscv.vfmerge.nxv1f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -304,7 +306,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -313,9 +315,9 @@ declare @llvm.riscv.vfmerge.nxv2f32.nxv2f32( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -326,7 +328,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -335,9 +337,9 @@ declare @llvm.riscv.vfmerge.nxv2f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -348,7 +350,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -357,9 +359,9 @@ declare @llvm.riscv.vfmerge.nxv4f32.nxv4f32( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -370,7 +372,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -379,9 +381,9 @@ declare @llvm.riscv.vfmerge.nxv4f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -392,7 +394,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -401,9 +403,9 @@ declare @llvm.riscv.vfmerge.nxv8f32.nxv8f32( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -414,7 +416,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -423,9 +425,9 @@ declare @llvm.riscv.vfmerge.nxv8f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -436,7 +438,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -445,9 +447,9 @@ declare @llvm.riscv.vfmerge.nxv16f32.nxv16f32( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -458,7 +460,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -467,9 +469,9 @@ declare @llvm.riscv.vfmerge.nxv16f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -480,7 +482,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -489,9 +491,9 @@ declare @llvm.riscv.vfmerge.nxv1f64.nxv1f64( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -502,7 +504,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfmerge.nxv1f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -524,7 +526,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -533,9 +535,9 @@ declare @llvm.riscv.vfmerge.nxv2f64.nxv2f64( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -546,7 +548,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -555,9 +557,9 @@ declare @llvm.riscv.vfmerge.nxv2f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -568,7 +570,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -577,9 +579,9 @@ declare @llvm.riscv.vfmerge.nxv4f64.nxv4f64( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -590,7 +592,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -599,9 +601,9 @@ declare @llvm.riscv.vfmerge.nxv4f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -612,7 +614,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -621,9 +623,9 @@ declare @llvm.riscv.vfmerge.nxv8f64.nxv8f64( , , , - i64); + iXLen); -define @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -634,7 +636,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -643,9 +645,9 @@ declare @llvm.riscv.vfmerge.nxv8f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -656,12 +658,12 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } -define @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -672,12 +674,12 @@ entry: %0, half zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -688,12 +690,12 @@ entry: %0, half zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -704,12 +706,12 @@ entry: %0, half zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -720,12 +722,12 @@ entry: %0, half zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -736,12 +738,12 @@ entry: %0, half zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -752,12 +754,12 @@ entry: %0, half zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -768,12 +770,12 @@ entry: %0, float zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -784,12 +786,12 @@ entry: %0, float zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -800,12 +802,12 @@ entry: %0, float zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -816,12 +818,12 @@ entry: %0, float zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -832,12 +834,12 @@ entry: %0, float zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -848,12 +850,12 @@ entry: %0, double zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -864,12 +866,12 @@ entry: %0, double zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -880,12 +882,12 @@ entry: %0, double zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -896,7 +898,7 @@ entry: %0, double zeroinitializer, %1, - i64 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll deleted file mode 100644 index 0861a787440e4..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll +++ /dev/null @@ -1,1355 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s -declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv1f16.nxv1f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv2f16.nxv2f16( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv2f16.nxv2f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv4f16.nxv4f16( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv4f16.nxv4f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv8f16.nxv8f16( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv8f16.nxv8f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv16f16.nxv16f16( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv16f16.nxv16f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv32f16.nxv32f16( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv32f16.nxv32f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv1f32.nxv1f32( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv1f32.nxv1f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv2f32.nxv2f32( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv2f32.nxv2f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv4f32.nxv4f32( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv4f32.nxv4f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv8f32.nxv8f32( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv8f32.nxv8f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv16f32.nxv16f32( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv16f32.nxv16f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv1f64.nxv1f64( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv1f64.nxv1f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv2f64.nxv2f64( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv2f64.nxv2f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv4f64.nxv4f64( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv4f64.nxv4f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv8f64.nxv8f64( - , - , - i32); - -define @intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv8f64.nxv8f64( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64( - , - , - , - , - i32, - i32); - -define @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv1f16.f16( - , - half, - i32); - -define @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv1f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv1f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv2f16.f16( - , - half, - i32); - -define @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv2f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv2f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv4f16.f16( - , - half, - i32); - -define @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv4f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv4f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv8f16.f16( - , - half, - i32); - -define @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv8f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv8f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv16f16.f16( - , - half, - i32); - -define @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv16f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv16f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv32f16.f16( - , - half, - i32); - -define @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv32f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv32f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv1f32.f32( - , - float, - i32); - -define @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv1f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv1f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv2f32.f32( - , - float, - i32); - -define @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv2f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv2f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv4f32.f32( - , - float, - i32); - -define @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv4f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv4f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv8f32.f32( - , - float, - i32); - -define @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv8f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv8f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv16f32.f32( - , - float, - i32); - -define @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv16f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv16f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv1f64.f64( - , - double, - i32); - -define @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv1f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv1f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv2f64.f64( - , - double, - i32); - -define @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv2f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv2f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv4f64.f64( - , - double, - i32); - -define @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv4f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv4f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfmin.nxv8f64.f64( - , - double, - i32); - -define @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.nxv8f64.f64( - %0, - double %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfmin.mask.nxv8f64.f64( - , - , - double, - , - i32, - i32); - -define @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmin.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i32 %4, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmin.ll index e647fe51ffb17..e151e9fb695de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv1f16.nxv1f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfmin.nxv2f16.nxv2f16( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv2f16.nxv2f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfmin.nxv4f16.nxv4f16( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv4f16.nxv4f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfmin.nxv8f16.nxv8f16( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv8f16.nxv8f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfmin.nxv16f16.nxv16f16( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv16f16.nxv16f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfmin.nxv32f16.nxv32f16( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv32f16.nxv32f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfmin.nxv1f32.nxv1f32( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv1f32.nxv1f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfmin.nxv2f32.nxv2f32( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv2f32.nxv2f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfmin.nxv4f32.nxv4f32( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv4f32.nxv4f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfmin.nxv8f32.nxv8f32( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv8f32.nxv8f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfmin.nxv16f32.nxv16f32( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv16f32.nxv16f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfmin.nxv1f64.nxv1f64( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv1f64.nxv1f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfmin.nxv2f64.nxv2f64( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv2f64.nxv2f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfmin.nxv4f64.nxv4f64( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv4f64.nxv4f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfmin.nxv8f64.nxv8f64( , , - i64); + iXLen); -define @intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { +define @intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv8f64.nxv8f64( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfmin.nxv1f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv1f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfmin.mask.nxv1f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfmin.nxv2f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv2f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfmin.mask.nxv2f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfmin.nxv4f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv4f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfmin.mask.nxv4f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfmin.nxv8f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv8f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfmin.mask.nxv8f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfmin.nxv16f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv16f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfmin.mask.nxv16f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfmin.nxv32f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv32f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfmin.mask.nxv32f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfmin.nxv1f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv1f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfmin.mask.nxv1f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfmin.nxv2f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv2f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfmin.mask.nxv2f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfmin.nxv4f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv4f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfmin.mask.nxv4f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfmin.nxv8f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv8f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfmin.mask.nxv8f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfmin.nxv16f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv16f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfmin.mask.nxv16f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfmin.nxv1f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv1f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfmin.mask.nxv1f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfmin.nxv2f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv2f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfmin.mask.nxv2f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfmin.nxv4f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv4f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfmin.mask.nxv4f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfmin.nxv8f64.f64( , double, - i64); + iXLen); -define @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { +define @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfmin.nxv8f64.f64( %0, double %1, - i64 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfmin.mask.nxv8f64.f64( , double, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { +define @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll deleted file mode 100644 index 2a5fb2896aa56..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv1f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv2f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv2f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv8f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv8f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv16f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv16f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv1f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv1f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv4f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv4f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv8f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv8f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv2f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv2f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv4f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv4f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv1f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv1f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv1f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv2f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv2f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv2f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv4f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv4f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv4f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv8f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv8f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv8f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv16f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv16f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv16f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv1f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv1f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv1f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv2f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv2f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv2f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv4f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv4f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv4f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv8f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv8f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv8f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv1f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv1f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv1f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv2f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv2f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv2f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsac.nxv4f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.nxv4f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsac.mask.nxv4f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsac.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmsac.ll index c8407dfe64730..cf9df7550fcc7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmsac.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfmsac.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfmsac.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfmsac.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfmsac.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfmsac.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfmsac.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfmsac.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfmsac.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfmsac.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfmsac.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfmsac.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfmsac.nxv1f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfmsac.mask.nxv1f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfmsac.nxv2f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfmsac.mask.nxv2f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfmsac.nxv4f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfmsac.mask.nxv4f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfmsac.nxv8f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfmsac.mask.nxv8f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfmsac.nxv16f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfmsac.mask.nxv16f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfmsac.nxv1f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfmsac.mask.nxv1f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfmsac.nxv2f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfmsac.mask.nxv2f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfmsac.nxv4f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfmsac.mask.nxv4f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfmsac.nxv8f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfmsac.mask.nxv8f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfmsac.nxv1f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfmsac.mask.nxv1f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfmsac.nxv2f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfmsac.mask.nxv2f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfmsac.nxv4f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfmsac.mask.nxv4f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll deleted file mode 100644 index 70efc0da21f5a..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv1f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv2f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv2f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv8f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv8f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv16f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv16f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv1f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv1f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv4f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv4f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv8f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv8f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv2f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv2f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv4f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv4f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv1f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv1f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv1f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv2f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv2f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv2f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv4f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv4f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv4f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv8f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv8f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv8f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv16f16.f16( - , - half, - , - i64); - -define @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv16f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv16f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv1f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv1f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv1f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv2f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv2f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv2f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv4f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv4f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv4f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv8f32.f32( - , - float, - , - i64); - -define @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv8f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv8f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv1f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv1f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv1f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv2f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv2f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv2f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfmsub.nxv4f64.f64( - , - double, - , - i64); - -define @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.nxv4f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfmsub.mask.nxv4f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmsub.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmsub.ll index 620c3dcb1025a..d071893ceb085 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmsub.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfmsub.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfmsub.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfmsub.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfmsub.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfmsub.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfmsub.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfmsub.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfmsub.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfmsub.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfmsub.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfmsub.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfmsub.nxv1f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfmsub.mask.nxv1f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfmsub.nxv2f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfmsub.mask.nxv2f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfmsub.nxv4f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfmsub.mask.nxv4f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfmsub.nxv8f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfmsub.mask.nxv8f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfmsub.nxv16f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfmsub.mask.nxv16f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfmsub.nxv1f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfmsub.mask.nxv1f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfmsub.nxv2f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfmsub.mask.nxv2f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfmsub.nxv4f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfmsub.mask.nxv4f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfmsub.nxv8f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfmsub.mask.nxv8f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfmsub.nxv1f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfmsub.mask.nxv1f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfmsub.nxv2f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfmsub.mask.nxv2f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfmsub.nxv4f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfmsub.mask.nxv4f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll deleted file mode 100644 index 08aa64b6de7fc..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll +++ /dev/null @@ -1,1355 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfmul.nxv1f16( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv2f16( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv4f16( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv8f16( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv16f16( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv32f16( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv32f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv32f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv1f32( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv2f32( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv4f32( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv8f32( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv16f32( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv16f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv16f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv1f64( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv1f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv1f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv2f64( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv2f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv2f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv4f64( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv4f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv4f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv8f64( - , - , - i64); - -define @intrinsic_vfmul_vv_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv8f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv8f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfmul_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfmul_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfmul_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfmul_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfmul_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfmul_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfmul_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfmul_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfmul_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfmul_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfmul_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfmul_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfmul_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfmul_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfmul.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfmul_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfmul.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfmul_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmul.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmul.ll index 50ebccd92e64c..0f4c738025617 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmul.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfmul.mask.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfmul.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfmul.mask.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfmul.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfmul.mask.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfmul.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfmul.mask.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfmul.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfmul.mask.nxv16f16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfmul.nxv32f16.nxv32f16( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv32f16.nxv32f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfmul.mask.nxv32f16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfmul.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfmul.mask.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfmul.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfmul.mask.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfmul.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfmul.mask.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfmul.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfmul.mask.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfmul.nxv16f32.nxv16f32( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv16f32.nxv16f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfmul.mask.nxv16f32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfmul.nxv1f64.nxv1f64( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfmul.mask.nxv1f64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfmul.nxv2f64.nxv2f64( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv2f64.nxv2f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfmul.mask.nxv2f64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfmul.nxv4f64.nxv4f64( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv4f64.nxv4f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfmul.mask.nxv4f64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfmul.nxv8f64.nxv8f64( , , - i32); + iXLen); -define @intrinsic_vfmul_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfmul_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv8f64.nxv8f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfmul.mask.nxv8f64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfmul.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfmul.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfmul.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfmul.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfmul.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfmul.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfmul.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfmul.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfmul.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfmul.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfmul.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfmul.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfmul.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfmul.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfmul.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfmul.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfmul.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfmul.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfmul.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfmul.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfmul.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfmul.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfmul.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfmul.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfmul.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfmul.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfmul.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfmul.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfmul.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfmul.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfmul.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll deleted file mode 100644 index ee619309b9e3c..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll +++ /dev/null @@ -1,197 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s - -declare @llvm.riscv.vfmv.s.f.nxv1f16(, half, i64) - -define @intrinsic_vfmv.s.f_f_nxv1f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv1f16( %0, half %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv2f16(, half, i64) - -define @intrinsic_vfmv.s.f_f_nxv2f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv2f16( %0, half %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv4f16(, half, i64) - -define @intrinsic_vfmv.s.f_f_nxv4f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv4f16( %0, half %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv8f16(, half, i64) - -define @intrinsic_vfmv.s.f_f_nxv8f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv8f16( %0, half %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv16f16(, half, i64) - -define @intrinsic_vfmv.s.f_f_nxv16f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv16f16( %0, half %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv32f16(, half, i64) - -define @intrinsic_vfmv.s.f_f_nxv32f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv32f16( %0, half %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv1f32(, float, i64) - -define @intrinsic_vfmv.s.f_f_nxv1f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv1f32( %0, float %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv2f32(, float, i64) - -define @intrinsic_vfmv.s.f_f_nxv2f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv2f32( %0, float %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv4f32(, float, i64) - -define @intrinsic_vfmv.s.f_f_nxv4f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv4f32( %0, float %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv8f32(, float, i64) - -define @intrinsic_vfmv.s.f_f_nxv8f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv8f32( %0, float %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv16f32(, float, i64) - -define @intrinsic_vfmv.s.f_f_nxv16f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv16f32( %0, float %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv1f64(, double, i64) - -define @intrinsic_vfmv.s.f_f_nxv1f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv1f64( %0, double %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv2f64(, double, i64) - -define @intrinsic_vfmv.s.f_f_nxv2f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv2f64( %0, double %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv4f64(, double, i64) - -define @intrinsic_vfmv.s.f_f_nxv4f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv4f64( %0, double %1, i64 %2) - ret %a -} - -declare @llvm.riscv.vfmv.s.f.nxv8f64(, double, i64) - -define @intrinsic_vfmv.s.f_f_nxv8f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfmv.s.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.s.f.nxv8f64( %0, double %1, i64 %2) - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll similarity index 74% rename from llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll index 4d47c000788fb..8464dc2f62997 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll @@ -1,197 +1,200 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s -declare @llvm.riscv.vfmv.s.f.nxv1f16(, half, i32) +declare @llvm.riscv.vfmv.s.f.nxv1f16(, half, iXLen) -define @intrinsic_vfmv.s.f_f_nxv1f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv1f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv1f16( %0, half %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv1f16( %0, half %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv2f16(, half, i32) +declare @llvm.riscv.vfmv.s.f.nxv2f16(, half, iXLen) -define @intrinsic_vfmv.s.f_f_nxv2f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv2f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv2f16( %0, half %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv2f16( %0, half %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv4f16(, half, i32) +declare @llvm.riscv.vfmv.s.f.nxv4f16(, half, iXLen) -define @intrinsic_vfmv.s.f_f_nxv4f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv4f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv4f16( %0, half %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv4f16( %0, half %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv8f16(, half, i32) +declare @llvm.riscv.vfmv.s.f.nxv8f16(, half, iXLen) -define @intrinsic_vfmv.s.f_f_nxv8f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv8f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv8f16( %0, half %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv8f16( %0, half %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv16f16(, half, i32) +declare @llvm.riscv.vfmv.s.f.nxv16f16(, half, iXLen) -define @intrinsic_vfmv.s.f_f_nxv16f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv16f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv16f16( %0, half %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv16f16( %0, half %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv32f16(, half, i32) +declare @llvm.riscv.vfmv.s.f.nxv32f16(, half, iXLen) -define @intrinsic_vfmv.s.f_f_nxv32f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv32f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv32f16( %0, half %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv32f16( %0, half %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv1f32(, float, i32) +declare @llvm.riscv.vfmv.s.f.nxv1f32(, float, iXLen) -define @intrinsic_vfmv.s.f_f_nxv1f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv1f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv1f32( %0, float %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv1f32( %0, float %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv2f32(, float, i32) +declare @llvm.riscv.vfmv.s.f.nxv2f32(, float, iXLen) -define @intrinsic_vfmv.s.f_f_nxv2f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv2f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv2f32( %0, float %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv2f32( %0, float %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv4f32(, float, i32) +declare @llvm.riscv.vfmv.s.f.nxv4f32(, float, iXLen) -define @intrinsic_vfmv.s.f_f_nxv4f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv4f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv4f32( %0, float %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv4f32( %0, float %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv8f32(, float, i32) +declare @llvm.riscv.vfmv.s.f.nxv8f32(, float, iXLen) -define @intrinsic_vfmv.s.f_f_nxv8f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv8f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv8f32( %0, float %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv8f32( %0, float %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv16f32(, float, i32) +declare @llvm.riscv.vfmv.s.f.nxv16f32(, float, iXLen) -define @intrinsic_vfmv.s.f_f_nxv16f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv16f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv16f32( %0, float %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv16f32( %0, float %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv1f64(, double, i32) +declare @llvm.riscv.vfmv.s.f.nxv1f64(, double, iXLen) -define @intrinsic_vfmv.s.f_f_nxv1f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv1f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv1f64( %0, double %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv1f64( %0, double %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv2f64(, double, i32) +declare @llvm.riscv.vfmv.s.f.nxv2f64(, double, iXLen) -define @intrinsic_vfmv.s.f_f_nxv2f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv2f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv2f64( %0, double %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv2f64( %0, double %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv4f64(, double, i32) +declare @llvm.riscv.vfmv.s.f.nxv4f64(, double, iXLen) -define @intrinsic_vfmv.s.f_f_nxv4f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv4f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv4f64( %0, double %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv4f64( %0, double %1, iXLen %2) ret %a } -declare @llvm.riscv.vfmv.s.f.nxv8f64(, double, i32) +declare @llvm.riscv.vfmv.s.f.nxv8f64(, double, iXLen) -define @intrinsic_vfmv.s.f_f_nxv8f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfmv.s.f_f_nxv8f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: - %a = call @llvm.riscv.vfmv.s.f.nxv8f64( %0, double %1, i32 %2) + %a = call @llvm.riscv.vfmv.s.f.nxv8f64( %0, double %1, iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll deleted file mode 100644 index 1c4d9f6e89dc4..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll +++ /dev/null @@ -1,482 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -target-abi lp64d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfmv.v.f.nxv1f16( - half, - i64); - -define @intrinsic_vfmv.v.f_f_nxv1f16(half %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv1f16( - half %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv2f16( - half, - i64); - -define @intrinsic_vfmv.v.f_f_nxv2f16(half %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv2f16( - half %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv4f16( - half, - i64); - -define @intrinsic_vfmv.v.f_f_nxv4f16(half %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv4f16( - half %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv8f16( - half, - i64); - -define @intrinsic_vfmv.v.f_f_nxv8f16(half %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv8f16( - half %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv16f16( - half, - i64); - -define @intrinsic_vfmv.v.f_f_nxv16f16(half %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv16f16( - half %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv32f16( - half, - i64); - -define @intrinsic_vfmv.v.f_f_nxv32f16(half %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv32f16( - half %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv1f32( - float, - i64); - -define @intrinsic_vfmv.v.f_f_nxv1f32(float %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv1f32( - float %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv2f32( - float, - i64); - -define @intrinsic_vfmv.v.f_f_nxv2f32(float %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv2f32( - float %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv4f32( - float, - i64); - -define @intrinsic_vfmv.v.f_f_nxv4f32(float %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv4f32( - float %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv8f32( - float, - i64); - -define @intrinsic_vfmv.v.f_f_nxv8f32(float %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv8f32( - float %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv16f32( - float, - i64); - -define @intrinsic_vfmv.v.f_f_nxv16f32(float %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv16f32( - float %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv1f64( - double, - i64); - -define @intrinsic_vfmv.v.f_f_nxv1f64(double %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv1f64( - double %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv2f64( - double, - i64); - -define @intrinsic_vfmv.v.f_f_nxv2f64(double %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv2f64( - double %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv4f64( - double, - i64); - -define @intrinsic_vfmv.v.f_f_nxv4f64(double %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv4f64( - double %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfmv.v.f.nxv8f64( - double, - i64); - -define @intrinsic_vfmv.v.f_f_nxv8f64(double %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv8f64( - double %0, - i64 %1) - - ret %a -} - -define @intrinsic_vfmv.v.f_zero_nxv1f16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv1f16( - half 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv2f16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv2f16( - half 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv4f16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv4f16( - half 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv8f16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv8f16( - half 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv16f16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv16f16( - half 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv32f16(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv32f16( - half 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv1f32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv1f32( - float 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv2f32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv2f32( - float 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv4f32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv4f32( - float 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv8f32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv8f32( - float 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv16f32(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv16f32( - float 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv1f64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv1f64( - double 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv2f64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv2f64( - double 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv4f64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv4f64( - double 0.0, - i64 %0) - - ret %a -} - -define @intrinsic_vmv.v.i_zero_nxv8f64(i64 %0) nounwind { -; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfmv.v.f.nxv8f64( - double 0.0, - i64 %0) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll similarity index 82% rename from llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll index b4acc57dcd81e..6e0613e3e49ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -target-abi ilp32d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmv.v.f.nxv1f16( half, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv1f16(half %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv1f16(half %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,16 +16,16 @@ define @intrinsic_vfmv.v.f_f_nxv1f16(half %0, i32 %1) nounwi entry: %a = call @llvm.riscv.vfmv.v.f.nxv1f16( half %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv2f16( half, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv2f16(half %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv2f16(half %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -32,16 +34,16 @@ define @intrinsic_vfmv.v.f_f_nxv2f16(half %0, i32 %1) nounwi entry: %a = call @llvm.riscv.vfmv.v.f.nxv2f16( half %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv4f16( half, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv4f16(half %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv4f16(half %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -50,16 +52,16 @@ define @intrinsic_vfmv.v.f_f_nxv4f16(half %0, i32 %1) nounwi entry: %a = call @llvm.riscv.vfmv.v.f.nxv4f16( half %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv8f16( half, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv8f16(half %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv8f16(half %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -68,16 +70,16 @@ define @intrinsic_vfmv.v.f_f_nxv8f16(half %0, i32 %1) nounwi entry: %a = call @llvm.riscv.vfmv.v.f.nxv8f16( half %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv16f16( half, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv16f16(half %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv16f16(half %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -86,16 +88,16 @@ define @intrinsic_vfmv.v.f_f_nxv16f16(half %0, i32 %1) noun entry: %a = call @llvm.riscv.vfmv.v.f.nxv16f16( half %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv32f16( half, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv32f16(half %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv32f16(half %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -104,16 +106,16 @@ define @intrinsic_vfmv.v.f_f_nxv32f16(half %0, i32 %1) noun entry: %a = call @llvm.riscv.vfmv.v.f.nxv32f16( half %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv1f32( float, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv1f32(float %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv1f32(float %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -122,16 +124,16 @@ define @intrinsic_vfmv.v.f_f_nxv1f32(float %0, i32 %1) noun entry: %a = call @llvm.riscv.vfmv.v.f.nxv1f32( float %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv2f32( float, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv2f32(float %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv2f32(float %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -140,16 +142,16 @@ define @intrinsic_vfmv.v.f_f_nxv2f32(float %0, i32 %1) noun entry: %a = call @llvm.riscv.vfmv.v.f.nxv2f32( float %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv4f32( float, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv4f32(float %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv4f32(float %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -158,16 +160,16 @@ define @intrinsic_vfmv.v.f_f_nxv4f32(float %0, i32 %1) noun entry: %a = call @llvm.riscv.vfmv.v.f.nxv4f32( float %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv8f32( float, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv8f32(float %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv8f32(float %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -176,16 +178,16 @@ define @intrinsic_vfmv.v.f_f_nxv8f32(float %0, i32 %1) noun entry: %a = call @llvm.riscv.vfmv.v.f.nxv8f32( float %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv16f32( float, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv16f32(float %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv16f32(float %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -194,16 +196,16 @@ define @intrinsic_vfmv.v.f_f_nxv16f32(float %0, i32 %1) no entry: %a = call @llvm.riscv.vfmv.v.f.nxv16f32( float %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv1f64( double, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv1f64(double %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv1f64(double %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -212,16 +214,16 @@ define @intrinsic_vfmv.v.f_f_nxv1f64(double %0, i32 %1) no entry: %a = call @llvm.riscv.vfmv.v.f.nxv1f64( double %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv2f64( double, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv2f64(double %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv2f64(double %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -230,16 +232,16 @@ define @intrinsic_vfmv.v.f_f_nxv2f64(double %0, i32 %1) no entry: %a = call @llvm.riscv.vfmv.v.f.nxv2f64( double %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv4f64( double, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv4f64(double %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv4f64(double %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -248,16 +250,16 @@ define @intrinsic_vfmv.v.f_f_nxv4f64(double %0, i32 %1) no entry: %a = call @llvm.riscv.vfmv.v.f.nxv4f64( double %0, - i32 %1) + iXLen %1) ret %a } declare @llvm.riscv.vfmv.v.f.nxv8f64( double, - i32); + iXLen); -define @intrinsic_vfmv.v.f_f_nxv8f64(double %0, i32 %1) nounwind { +define @intrinsic_vfmv.v.f_f_nxv8f64(double %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -266,12 +268,12 @@ define @intrinsic_vfmv.v.f_f_nxv8f64(double %0, i32 %1) no entry: %a = call @llvm.riscv.vfmv.v.f.nxv8f64( double %0, - i32 %1) + iXLen %1) ret %a } -define @intrinsic_vfmv.v.f_zero_nxv1f16(i32 %0) nounwind { +define @intrinsic_vfmv.v.f_zero_nxv1f16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -280,12 +282,12 @@ define @intrinsic_vfmv.v.f_zero_nxv1f16(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv1f16( half 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv2f16(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv2f16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -294,12 +296,12 @@ define @intrinsic_vmv.v.i_zero_nxv2f16(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv2f16( half 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv4f16(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv4f16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -308,12 +310,12 @@ define @intrinsic_vmv.v.i_zero_nxv4f16(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv4f16( half 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv8f16(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv8f16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -322,12 +324,12 @@ define @intrinsic_vmv.v.i_zero_nxv8f16(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv8f16( half 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv16f16(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv16f16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -336,12 +338,12 @@ define @intrinsic_vmv.v.i_zero_nxv16f16(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv16f16( half 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv32f16(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv32f16(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -350,12 +352,12 @@ define @intrinsic_vmv.v.i_zero_nxv32f16(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv32f16( half 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv1f32(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv1f32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -364,12 +366,12 @@ define @intrinsic_vmv.v.i_zero_nxv1f32(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv1f32( float 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv2f32(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv2f32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -378,12 +380,12 @@ define @intrinsic_vmv.v.i_zero_nxv2f32(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv2f32( float 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv4f32(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv4f32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -392,12 +394,12 @@ define @intrinsic_vmv.v.i_zero_nxv4f32(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv4f32( float 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv8f32(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv8f32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,12 +408,12 @@ define @intrinsic_vmv.v.i_zero_nxv8f32(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv8f32( float 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv16f32(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv16f32(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -420,12 +422,12 @@ define @intrinsic_vmv.v.i_zero_nxv16f32(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv16f32( float 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv1f64(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv1f64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -434,12 +436,12 @@ define @intrinsic_vmv.v.i_zero_nxv1f64(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv1f64( double 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv2f64(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv2f64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -448,12 +450,12 @@ define @intrinsic_vmv.v.i_zero_nxv2f64(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv2f64( double 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv4f64(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv4f64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -462,12 +464,12 @@ define @intrinsic_vmv.v.i_zero_nxv4f64(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv4f64( double 0.0, - i32 %0) + iXLen %0) ret %a } -define @intrinsic_vmv.v.i_zero_nxv8f64(i32 %0) nounwind { +define @intrinsic_vmv.v.i_zero_nxv8f64(iXLen %0) nounwind { ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -476,7 +478,7 @@ define @intrinsic_vmv.v.i_zero_nxv8f64(i32 %0) nounwind { entry: %a = call @llvm.riscv.vfmv.v.f.nxv8f64( double 0.0, - i32 %0) + iXLen %0) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll deleted file mode 100644 index 2e35f75eb89c0..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv1f16.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv1f16.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv2f16.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv2f16.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv4f16.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv4f16.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv8f16.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv8f16.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv16f16.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv16f16.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv1f32.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv1f32.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv2f32.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv2f32.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv4f32.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv4f32.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64( - , - i64); - -define @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.f.w.mask.nxv8f32.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.f.w.mask.nxv8f32.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll index 014ff81bada19..e757261e7363d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32( @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv1f16.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32( @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv2f16.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32( @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv4f16.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32( @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv8f16.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32( @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv16f16.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64( @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv1f32.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64( @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv2f32.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64( @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv4f32.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64( @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.f.f.w.mask.nxv8f32.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll deleted file mode 100644 index d8d55df8e4589..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv1f16.nxv1i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv1f16.nxv1i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv2f16.nxv2i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv2f16.nxv2i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv4f16.nxv4i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv4f16.nxv4i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv8f16.nxv8i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv8f16.nxv8i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv16f16.nxv16i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv16f16.nxv16i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv1f32.nxv1i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv1f32.nxv1i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv2f32.nxv2i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv2f32.nxv2i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv4f32.nxv4i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv4f32.nxv4i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64( - , - i64); - -define @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.x.w.mask.nxv8f32.nxv8i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.x.w.mask.nxv8f32.nxv8i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll index ab5f66bf692fa..eedc7c1633999 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32( @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv1f16.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32( @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv2f16.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32( @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv4f16.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32( @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv8f16.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32( @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv16f16.nxv16i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64( @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv1f32.nxv1i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64( @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv2f32.nxv2i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64( @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv4f32.nxv4i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64( @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.f.x.w.mask.nxv8f32.nxv8i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll deleted file mode 100644 index 32a23932b074b..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f16.nxv1i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f16.nxv1i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f16.nxv2i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f16.nxv2i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f16.nxv4i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f16.nxv4i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f16.nxv8i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f16.nxv8i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv16f16.nxv16i32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv16f16.nxv16i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f32.nxv1i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f32.nxv1i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f32.nxv2i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f32.nxv2i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f32.nxv4i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f32.nxv4i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64( - , - i64); - -define @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f32.nxv8i64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.f.xu.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f32.nxv8i64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll index 4835d4e5c5916..e6842b749492c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32( @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f16.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32( @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f16.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32( @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f16.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32( @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f16.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32( @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv16f16.nxv16i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64( @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f32.nxv1i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64( @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f32.nxv2i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64( @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f32.nxv4i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64( , - i32); + iXLen); -define @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64( @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f32.nxv8i64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll deleted file mode 100644 index 4020c1d5d1a34..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f16.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f16.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f16.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f16.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f16.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f16.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f16.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f16.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16f16.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16f16.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f32.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f32.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f32.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f32.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f32.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f32.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64( - , - i64); - -define @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f32.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.rod.f.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f32.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll index b464fdde6db25..2a7c30939f108 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32( @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f16.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32( @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f16.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32( @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f16.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32( @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f16.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32( @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16f16.nxv16f32 , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64( @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f32.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64( @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f32.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64( @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f32.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64( @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f32.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll deleted file mode 100644 index ad695704aae0a..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll +++ /dev/null @@ -1,632 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i16.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i16.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i16.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i16.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i16.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i16.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i16.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i16.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i16.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i16.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i32.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i32.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i32.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i32.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i32.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i32.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64( - , - i64); - -define @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i32.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i32.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll index 227210e6f2f01..9a14df186dd51 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16( @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16( @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16( @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16( @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16( @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16( @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32( @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i16.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32( @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i16.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32( @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i16.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -374,16 +376,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -393,7 +395,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32( @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -402,10 +404,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i16.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -416,16 +418,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -435,7 +437,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32( @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -444,10 +446,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i16.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -458,16 +460,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -477,7 +479,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64( @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -486,10 +488,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i32.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -500,16 +502,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -519,7 +521,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64( @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -528,10 +530,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i32.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -542,16 +544,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -561,7 +563,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64( @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -570,10 +572,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i32.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -584,16 +586,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -603,7 +605,7 @@ define @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64( @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -612,10 +614,10 @@ declare @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i32.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -626,7 +628,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll deleted file mode 100644 index f7f873dd05155..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll +++ /dev/null @@ -1,632 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i16.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i16.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i16.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i16.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i16.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i16.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i16.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i16.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i16.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i16.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i32.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i32.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i32.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i32.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i32.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i32.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64( - , - i64); - -define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i32.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i32.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll index 4bfe331db7e00..24d75d6d09b19 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i16.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i16.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i16.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -374,16 +376,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -393,7 +395,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -402,10 +404,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i16.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -416,16 +418,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -435,7 +437,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -444,10 +446,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i16.nxv16f32 , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -458,16 +460,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -477,7 +479,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -486,10 +488,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i32.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -500,16 +502,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -519,7 +521,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -528,10 +530,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i32.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -542,16 +544,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -561,7 +563,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -570,10 +572,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i32.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -584,16 +586,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -603,7 +605,7 @@ define @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64( @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -612,10 +614,10 @@ declare @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i32.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -626,7 +628,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll deleted file mode 100644 index d78d695f3df20..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll +++ /dev/null @@ -1,632 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv1i16.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv1i16.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv2i16.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv2i16.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv4i16.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv4i16.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv8i16.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv8i16.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv16i16.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv16i16.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv1i32.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv1i32.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv2i32.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv2i32.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv4i32.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv4i32.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64( - , - i64); - -define @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.x.f.w.mask.nxv8i32.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.x.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.x.f.w.mask.nxv8i32.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll index 6c97455f1992e..0b8f9c62e50ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16( @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16( @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16( @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16( @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16( @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16( @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32( @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv1i16.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32( @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv2i16.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32( @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv4i16.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -374,16 +376,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -393,7 +395,7 @@ define @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32( @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -402,10 +404,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv8i16.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -416,16 +418,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -435,7 +437,7 @@ define @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32( @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -444,10 +446,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv16i16.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -458,16 +460,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -477,7 +479,7 @@ define @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64( @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -486,10 +488,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv1i32.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -500,16 +502,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -519,7 +521,7 @@ define @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64( @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -528,10 +530,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv2i32.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -542,16 +544,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -561,7 +563,7 @@ define @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64( @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -570,10 +572,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv4i32.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -584,16 +586,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -603,7 +605,7 @@ define @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64( @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -612,10 +614,10 @@ declare @llvm.riscv.vfncvt.x.f.w.mask.nxv8i32.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -626,7 +628,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll deleted file mode 100644 index c7bb913f3797f..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll +++ /dev/null @@ -1,632 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i16.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i16.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i16.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i16.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i16.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i16.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i16.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i16.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i16.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i16.nxv16f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i32.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i32.nxv1f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v10, v8 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i32.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i32.nxv2f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v12, v8 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i32.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i32.nxv4f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64( - , - i64); - -define @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v16, v8 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i32.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfncvt.xu.f.w v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i32.nxv8f64( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll index 4981f8b16d741..7d802cabd3f77 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16( @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16( @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16( @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16( @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16( @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16( @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32( @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i16.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32( @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i16.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32( @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i16.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -374,16 +376,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -393,7 +395,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32( @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -402,10 +404,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i16.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -416,16 +418,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -435,7 +437,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32( @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -444,10 +446,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i16.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -458,16 +460,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -477,7 +479,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64( @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -486,10 +488,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i32.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -500,16 +502,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -519,7 +521,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64( @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -528,10 +530,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i32.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -542,16 +544,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -561,7 +563,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64( @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -570,10 +572,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i32.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -584,16 +586,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -603,7 +605,7 @@ define @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64( @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -612,10 +614,10 @@ declare @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i32.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -626,7 +628,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll deleted file mode 100644 index f8419e81f7d06..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv2f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv2f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv8f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv8f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv16f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv16f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv1f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv1f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv4f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv4f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv8f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv8f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv2f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv2f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv4f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv4f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv1f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv1f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv1f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv2f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv2f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv2f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv4f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv4f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv4f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv8f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv8f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv8f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv16f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv16f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv16f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv1f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv1f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv1f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv2f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv2f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv2f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv4f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv4f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv4f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv8f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv8f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv8f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv1f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv1f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv1f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv2f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv2f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv2f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmacc.nxv4f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.nxv4f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmacc.mask.nxv4f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmacc.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc.ll similarity index 90% rename from llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfnmacc.ll index fa1767202c126..d46c29f3be78b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfnmacc.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfnmacc.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfnmacc.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfnmacc.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfnmacc.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfnmacc.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfnmacc.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfnmacc.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfnmacc.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfnmacc.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfnmacc.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfnmacc.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfnmacc.nxv1f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfnmacc.nxv2f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfnmacc.nxv4f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfnmacc.nxv8f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfnmacc.nxv16f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv16f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfnmacc.nxv1f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfnmacc.nxv2f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfnmacc.nxv4f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfnmacc.nxv8f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv8f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfnmacc.nxv1f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv1f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfnmacc.nxv2f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv2f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfnmacc.nxv4f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfnmacc.mask.nxv4f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll deleted file mode 100644 index ab407427952ac..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv2f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv2f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv8f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv8f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv16f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv16f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv1f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv1f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv4f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv4f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv8f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv8f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv2f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv2f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv4f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv4f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv1f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv1f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv1f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv2f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv2f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv2f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv4f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv4f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv4f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv8f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv8f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv8f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv16f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv16f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv16f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv1f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv1f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv1f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv2f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv2f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv2f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv4f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv4f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv4f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv8f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv8f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv8f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv1f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv1f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv1f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv2f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv2f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv2f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmadd.nxv4f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.nxv4f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmadd.mask.nxv4f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmadd.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd.ll similarity index 90% rename from llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfnmadd.ll index e0d33062322b8..44810af5ab31b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfnmadd.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfnmadd.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfnmadd.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfnmadd.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfnmadd.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfnmadd.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfnmadd.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfnmadd.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfnmadd.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfnmadd.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfnmadd.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfnmadd.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfnmadd.nxv1f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfnmadd.nxv2f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfnmadd.nxv4f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfnmadd.nxv8f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfnmadd.nxv16f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv16f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfnmadd.nxv1f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfnmadd.nxv2f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfnmadd.nxv4f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfnmadd.nxv8f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv8f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfnmadd.nxv1f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv1f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfnmadd.nxv2f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv2f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfnmadd.nxv4f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfnmadd.mask.nxv4f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll deleted file mode 100644 index 58e489618bc4e..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv2f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv2f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv8f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv8f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv16f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv16f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv1f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv1f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv4f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv4f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv8f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv8f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv2f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv2f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv4f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv4f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv1f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv1f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv1f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv2f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv2f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv2f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv4f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv4f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv4f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv8f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv8f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv8f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv16f16.f16( - , - half, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv16f16.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv16f16.f16( - , - half, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv1f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv1f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv1f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv2f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv2f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv2f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv4f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv4f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv4f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv8f32.f32( - , - float, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv8f32.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv8f32.f32( - , - float, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv1f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv1f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv1f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv2f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv2f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv2f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsac.nxv4f64.f64( - , - double, - , - i64); - -define @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.nxv4f64.f64( - %0, - double %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsac.mask.nxv4f64.f64( - , - double, - , - , - i64); - -define @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsac.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac.ll similarity index 90% rename from llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfnmsac.ll index 834938c7d6e74..ff1bcfa86d3a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfnmsac.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfnmsac.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfnmsac.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfnmsac.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfnmsac.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfnmsac.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfnmsac.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfnmsac.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfnmsac.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfnmsac.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfnmsac.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfnmsac.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfnmsac.nxv1f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfnmsac.nxv2f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfnmsac.nxv4f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfnmsac.nxv8f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfnmsac.nxv16f16.f16( , half, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv16f16.f16( half, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfnmsac.nxv1f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfnmsac.nxv2f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfnmsac.nxv4f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfnmsac.nxv8f32.f32( , float, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv8f32.f32( float, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfnmsac.nxv1f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv1f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfnmsac.nxv2f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv2f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfnmsac.nxv4f64.f64( , double, , - i32); + iXLen); -define @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { +define @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfnmsac.mask.nxv4f64.f64( double, , , - i32); + iXLen); -define @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll deleted file mode 100644 index 67dbb5a92dfa3..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll +++ /dev/null @@ -1,1106 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s -declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv2f16.nxv2f16( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv2f16.nxv2f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv4f16.nxv4f16( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv4f16.nxv4f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv8f16.nxv8f16( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv8f16.nxv8f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv16f16.nxv16f16( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv16f16.nxv16f16( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv1f32.nxv1f32( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv1f32.nxv1f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv2f32.nxv2f32( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv2f32.nxv2f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv4f32.nxv4f32( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv4f32.nxv4f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv8f32.nxv8f32( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv8f32.nxv8f32( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv1f64.nxv1f64( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv1f64.nxv1f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv2f64.nxv2f64( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v10, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv2f64.nxv2f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv4f64.nxv4f64( - , - , - , - i32); - -define @intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v12, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv4f64.nxv4f64( - %0, - %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64( - , - , - , - , - i32); - -define @intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv1f16.f16( - , - half, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv1f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv1f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv1f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv2f16.f16( - , - half, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv2f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv2f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv2f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv4f16.f16( - , - half, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv4f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv4f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv4f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv8f16.f16( - , - half, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv8f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv8f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv8f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv16f16.f16( - , - half, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv16f16.f16( - %0, - half %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv16f16.f16( - , - half, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv16f16.f16( - %0, - half %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv1f32.f32( - , - float, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv1f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv1f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv1f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv2f32.f32( - , - float, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv2f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv2f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv2f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv4f32.f32( - , - float, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv4f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv4f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv4f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv8f32.f32( - , - float, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv8f32.f32( - %0, - float %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv8f32.f32( - , - float, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv8f32.f32( - %0, - float %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv1f64.f64( - , - double, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv1f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv1f64.f64( - , - double, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv1f64.f64( - %0, - double %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv2f64.f64( - , - double, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv2f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv2f64.f64( - , - double, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv2f64.f64( - %0, - double %1, - %2, - %3, - i32 %4) - - ret %a -} - -declare @llvm.riscv.vfnmsub.nxv4f64.f64( - , - double, - , - i32); - -define @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.nxv4f64.f64( - %0, - double %1, - %2, - i32 %3) - - ret %a -} - -declare @llvm.riscv.vfnmsub.mask.nxv4f64.f64( - , - double, - , - , - i32); - -define @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfnmsub.mask.nxv4f64.f64( - %0, - double %1, - %2, - %3, - i32 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub.ll similarity index 90% rename from llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfnmsub.ll index 07b23dbfb066d..e6ca32f34752c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfnmsub.nxv1f16.nxv1f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfnmsub.nxv2f16.nxv2f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfnmsub.nxv4f16.nxv4f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfnmsub.nxv8f16.nxv8f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfnmsub.nxv16f16.nxv16f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfnmsub.nxv1f32.nxv1f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfnmsub.nxv2f32.nxv2f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfnmsub.nxv4f32.nxv4f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfnmsub.nxv8f32.nxv8f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfnmsub.nxv1f64.nxv1f64( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfnmsub.nxv2f64.nxv2f64( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfnmsub.nxv4f64.nxv4f64( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64( , , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfnmsub.nxv1f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfnmsub.nxv2f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfnmsub.nxv4f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -662,7 +664,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -686,7 +688,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfnmsub.nxv8f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -708,7 +710,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -732,7 +734,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfnmsub.nxv16f16.f16( , half, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -754,7 +756,7 @@ entry: %0, half %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv16f16.f16( half, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -778,7 +780,7 @@ entry: half %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfnmsub.nxv1f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -833,9 +835,9 @@ declare @llvm.riscv.vfnmsub.nxv2f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -846,7 +848,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -856,9 +858,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -870,7 +872,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -879,9 +881,9 @@ declare @llvm.riscv.vfnmsub.nxv4f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -892,7 +894,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -902,9 +904,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -916,7 +918,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -925,9 +927,9 @@ declare @llvm.riscv.vfnmsub.nxv8f32.f32( , float, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -938,7 +940,7 @@ entry: %0, float %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -948,9 +950,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv8f32.f32( float, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -962,7 +964,7 @@ entry: float %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -971,9 +973,9 @@ declare @llvm.riscv.vfnmsub.nxv1f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -984,7 +986,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -994,9 +996,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv1f64.f64( double, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -1008,7 +1010,7 @@ entry: double %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -1017,9 +1019,9 @@ declare @llvm.riscv.vfnmsub.nxv2f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1030,7 +1032,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -1040,9 +1042,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv2f64.f64( double, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -1054,7 +1056,7 @@ entry: double %1, %2, %3, - i64 %4) + iXLen %4) ret %a } @@ -1063,9 +1065,9 @@ declare @llvm.riscv.vfnmsub.nxv4f64.f64( , double, , - i64); + iXLen); -define @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, i64 %3) nounwind { +define @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1076,7 +1078,7 @@ entry: %0, double %1, %2, - i64 %3) + iXLen %3) ret %a } @@ -1086,9 +1088,9 @@ declare @llvm.riscv.vfnmsub.mask.nxv4f64.f64( double, , , - i64); + iXLen); -define @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64( %0, double %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -1100,7 +1102,7 @@ entry: double %1, %2, %3, - i64 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll deleted file mode 100644 index ccdd6ad371896..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll +++ /dev/null @@ -1,677 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfrdiv.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfrdiv_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfrdiv_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfrdiv_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfrdiv_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfrdiv_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfrdiv_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfrdiv_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfrdiv_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfrdiv_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfrdiv_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfrdiv_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfrdiv_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfrdiv_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfrdiv_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrdiv.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfrdiv_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrdiv.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrdiv_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrdiv.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfrdiv.ll index 1d502c84b1981..58dc39b995055 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfrdiv.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -266,7 +268,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -274,9 +276,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -286,7 +288,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -296,10 +298,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -311,7 +313,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -319,9 +321,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -331,7 +333,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -341,10 +343,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -356,7 +358,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -364,9 +366,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -376,7 +378,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -386,10 +388,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -401,7 +403,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -409,9 +411,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -421,7 +423,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -431,10 +433,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -446,7 +448,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -454,9 +456,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -466,7 +468,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -476,10 +478,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -491,7 +493,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -499,9 +501,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -511,7 +513,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -521,10 +523,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -536,7 +538,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -544,9 +546,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -556,7 +558,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -566,10 +568,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -581,7 +583,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -589,9 +591,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -601,7 +603,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -611,10 +613,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -626,7 +628,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -634,9 +636,9 @@ entry: declare @llvm.riscv.vfrdiv.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -646,7 +648,7 @@ entry: %a = call @llvm.riscv.vfrdiv.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -656,10 +658,10 @@ declare @llvm.riscv.vfrdiv.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -671,7 +673,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll deleted file mode 100644 index 4e0fe7b9fc62f..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfrec7.nxv1f16( - , - i64); - -define @intrinsic_vfrec7_v_nxv1f16_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv1f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv2f16( - , - i64); - -define @intrinsic_vfrec7_v_nxv2f16_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv2f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv4f16( - , - i64); - -define @intrinsic_vfrec7_v_nxv4f16_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrec7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv4f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv8f16( - , - i64); - -define @intrinsic_vfrec7_v_nxv8f16_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv8f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv16f16( - , - i64); - -define @intrinsic_vfrec7_v_nxv16f16_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv16f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv32f16( - , - i64); - -define @intrinsic_vfrec7_v_nxv32f16_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrec7.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv32f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv1f32( - , - i64); - -define @intrinsic_vfrec7_v_nxv1f32_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv1f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv2f32( - , - i64); - -define @intrinsic_vfrec7_v_nxv2f32_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrec7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv2f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv4f32( - , - i64); - -define @intrinsic_vfrec7_v_nxv4f32_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv4f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv8f32( - , - i64); - -define @intrinsic_vfrec7_v_nxv8f32_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv8f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv16f32( - , - i64); - -define @intrinsic_vfrec7_v_nxv16f32_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrec7.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv16f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv1f64( - , - i64); - -define @intrinsic_vfrec7_v_nxv1f64_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrec7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv1f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv2f64( - , - i64); - -define @intrinsic_vfrec7_v_nxv2f64_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrec7.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv2f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv4f64( - , - i64); - -define @intrinsic_vfrec7_v_nxv4f64_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrec7.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv4f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrec7.nxv8f64( - , - i64); - -define @intrinsic_vfrec7_v_nxv8f64_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrec7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrec7.mask.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrec7.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrec7.mask.nxv8f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfrec7.ll index 30897b95deea2..3be9f912d7f2a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfrec7.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv1f16_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv1f16_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfrec7_v_nxv1f16_nxv1f16( @llvm.riscv.vfrec7.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfrec7.mask.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv2f16_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv2f16_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfrec7_v_nxv2f16_nxv2f16( @llvm.riscv.vfrec7.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfrec7.mask.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv4f16_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv4f16_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfrec7_v_nxv4f16_nxv4f16( @llvm.riscv.vfrec7.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfrec7.mask.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv8f16_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv8f16_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfrec7_v_nxv8f16_nxv8f16( @llvm.riscv.vfrec7.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfrec7.mask.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv16f16_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv16f16_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfrec7_v_nxv16f16_nxv16f16( @llvm.riscv.vfrec7.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfrec7.mask.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv32f16_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv32f16_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfrec7_v_nxv32f16_nxv32f16( @llvm.riscv.vfrec7.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfrec7.mask.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv1f32_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv1f32_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfrec7_v_nxv1f32_nxv1f32( @llvm.riscv.vfrec7.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfrec7.mask.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv2f32_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv2f32_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfrec7_v_nxv2f32_nxv2f32( @llvm.riscv.vfrec7.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfrec7.mask.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv4f32_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv4f32_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfrec7_v_nxv4f32_nxv4f32( @llvm.riscv.vfrec7.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfrec7.mask.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv8f32_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv8f32_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfrec7_v_nxv8f32_nxv8f32( @llvm.riscv.vfrec7.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfrec7.mask.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv16f32_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv16f32_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfrec7_v_nxv16f32_nxv16f32( @llvm.riscv.vfrec7.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfrec7.mask.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv1f64_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv1f64_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfrec7_v_nxv1f64_nxv1f64( @llvm.riscv.vfrec7.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfrec7.mask.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv2f64_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv2f64_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfrec7_v_nxv2f64_nxv2f64( @llvm.riscv.vfrec7.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfrec7.mask.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv4f64_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv4f64_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfrec7_v_nxv4f64_nxv4f64( @llvm.riscv.vfrec7.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfrec7.mask.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrec7.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfrec7_v_nxv8f64_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfrec7_v_nxv8f64_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfrec7_v_nxv8f64_nxv8f64( @llvm.riscv.vfrec7.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfrec7.mask.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll deleted file mode 100644 index bb173e91ecf31..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll +++ /dev/null @@ -1,692 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfredmax.nxv4f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv4f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv4f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv4f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv4f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv4f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv4f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv4f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv4f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv4f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv4f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv4f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv4f16.nxv32f16( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv4f16.nxv32f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv32f16( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv4f16.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv2f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv2f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv2f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv2f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv2f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv2f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv2f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv2f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv2f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv2f32.nxv16f32( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv2f32.nxv16f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv16f32( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv2f32.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv1f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv1f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv1f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv1f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv1f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv1f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmax.nxv1f64.nxv8f64( - , - , - , - i64); - -define @intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.nxv1f64.nxv8f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv8f64( - , - , - , - , - i64); - -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredmax.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmax.mask.nxv1f64.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfredmax.ll index 25ed3f1ab3676..0a2d72bf382aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredmax.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfredmax.nxv4f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv1f16.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfredmax.nxv4f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv2f16.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfredmax.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv4f16.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfredmax.nxv4f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv8f16.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfredmax.nxv4f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv16f16.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfredmax.nxv4f16.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfredmax.mask.nxv4f16.nxv32f16.nxv32i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfredmax.nxv2f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv1f32.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfredmax.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv2f32.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfredmax.nxv2f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv4f32.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfredmax.nxv2f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv8f32.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfredmax.nxv2f32.nxv16f32( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfredmax.mask.nxv2f32.nxv16f32.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfredmax.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv1f64.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfredmax.nxv1f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv2f64.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -594,7 +596,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfredmax.nxv1f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv4f64.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -640,7 +642,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfredmax.nxv1f64.nxv8f64( , , , - i32); + iXLen); -define @intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -662,7 +664,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfredmax.mask.nxv1f64.nxv8f64.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -686,7 +688,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll deleted file mode 100644 index d04ef7a6707de..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll +++ /dev/null @@ -1,692 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfredmin.nxv4f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv4f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv4f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv4f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv4f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv4f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv4f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv4f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv4f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv4f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv4f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv4f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv4f16.nxv32f16( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv4f16.nxv32f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv32f16( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv4f16.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv2f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv2f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv2f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv2f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv2f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv2f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv2f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv2f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv2f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv2f32.nxv16f32( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv2f32.nxv16f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv16f32( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv2f32.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv1f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv1f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv1f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv1f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv1f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv1f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredmin.nxv1f64.nxv8f64( - , - , - , - i64); - -define @intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.nxv1f64.nxv8f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv8f64( - , - , - , - , - i64); - -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredmin.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredmin.mask.nxv1f64.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfredmin.ll index 9561be7b6fc09..4d0301d3485ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredmin.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfredmin.nxv4f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv1f16.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfredmin.nxv4f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv2f16.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfredmin.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv4f16.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfredmin.nxv4f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv8f16.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfredmin.nxv4f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv16f16.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfredmin.nxv4f16.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfredmin.mask.nxv4f16.nxv32f16.nxv32i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfredmin.nxv2f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv1f32.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfredmin.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv2f32.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfredmin.nxv2f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv4f32.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfredmin.nxv2f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv8f32.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfredmin.nxv2f32.nxv16f32( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfredmin.mask.nxv2f32.nxv16f32.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfredmin.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv1f64.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfredmin.nxv1f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv2f64.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -594,7 +596,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfredmin.nxv1f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv4f64.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -640,7 +642,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfredmin.nxv1f64.nxv8f64( , , , - i32); + iXLen); -define @intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -662,7 +664,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfredmin.mask.nxv1f64.nxv8f64.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -686,7 +688,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll deleted file mode 100644 index 8c42e43d9094d..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll +++ /dev/null @@ -1,692 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfredosum.nxv4f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv4f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv4f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv4f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv4f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv4f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv4f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv4f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv4f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv4f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv4f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv4f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv4f16.nxv32f16( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv4f16.nxv32f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv32f16( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv4f16.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv2f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv2f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv2f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv2f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv2f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv2f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv2f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv2f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv2f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv2f32.nxv16f32( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv2f32.nxv16f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv16f32( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv2f32.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv1f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv1f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv1f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv1f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv1f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv1f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredosum.nxv1f64.nxv8f64( - , - , - , - i64); - -define @intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.nxv1f64.nxv8f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv8f64( - , - , - , - , - i64); - -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredosum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredosum.mask.nxv1f64.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfredosum.ll index 1f1e68e0dbc9b..b814315d90cd1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredosum.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfredosum.nxv4f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv1f16.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfredosum.nxv4f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv2f16.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfredosum.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv4f16.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfredosum.nxv4f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv8f16.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfredosum.nxv4f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv16f16.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfredosum.nxv4f16.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfredosum.mask.nxv4f16.nxv32f16.nxv32i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfredosum.nxv2f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv1f32.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfredosum.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv2f32.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfredosum.nxv2f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv4f32.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfredosum.nxv2f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv8f32.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfredosum.nxv2f32.nxv16f32( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfredosum.mask.nxv2f32.nxv16f32.nxv16i1 , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfredosum.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv1f64.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfredosum.nxv1f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv2f64.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -594,7 +596,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfredosum.nxv1f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv4f64.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -640,7 +642,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfredosum.nxv1f64.nxv8f64( , , , - i32); + iXLen); -define @intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -662,7 +664,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfredosum.mask.nxv1f64.nxv8f64.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -686,7 +688,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll deleted file mode 100644 index 9264397afa0e7..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll +++ /dev/null @@ -1,692 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfredusum.nxv4f16.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv4f16.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv4f16.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv4f16.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv4f16.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv4f16.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv4f16.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv4f16.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv4f16.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv4f16.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv4f16.nxv32f16( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv4f16.nxv32f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv2f32.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv2f32.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv2f32.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv2f32.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv2f32.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv2f32.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv2f32.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv2f32.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv2f32.nxv16f32( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv2f32.nxv16f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv1f64.nxv1f64( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv1f64.nxv1f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv1f64.nxv2f64( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv1f64.nxv2f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv1f64.nxv4f64( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv1f64.nxv4f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfredusum.nxv1f64.nxv8f64( - , - , - , - i64); - -define @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.nxv1f64.nxv8f64( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64( - , - , - , - , - i64); - -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu -; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfredusum.ll index 7b3691c74887c..e6ff649ab398c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfredusum.nxv4f16.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfredusum.nxv4f16.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfredusum.nxv4f16.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfredusum.nxv4f16.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfredusum.nxv4f16.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16.nxv16i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfredusum.nxv4f16.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16.nxv32i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfredusum.nxv2f32.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfredusum.nxv2f32.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfredusum.nxv2f32.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfredusum.nxv2f32.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfredusum.nxv2f32.nxv16f32( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32.nxv16i1 , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfredusum.nxv1f64.nxv1f64( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64.nxv1i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu @@ -548,7 +550,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfredusum.nxv1f64.nxv2f64( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64.nxv2i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu @@ -594,7 +596,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfredusum.nxv1f64.nxv4f64( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64.nxv4i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu @@ -640,7 +642,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfredusum.nxv1f64.nxv8f64( , , , - i32); + iXLen); -define @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -662,7 +664,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64.nxv8i1( , , , - i32); + iXLen); -define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu @@ -686,7 +688,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll deleted file mode 100644 index cf9d7a4c0af5a..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll +++ /dev/null @@ -1,617 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfrsqrt7.nxv1f16( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv1f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv2f16( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv2f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv4f16( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv4f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv8f16( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv8f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv16f16( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv16f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv32f16( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv32f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv32f16( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv32f16( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv1f32( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv1f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv2f32( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv2f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv4f32( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv4f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv8f32( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv8f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv16f32( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv16f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv16f32( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv16f32( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv1f64( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv1f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv1f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv1f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv2f64( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv2f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv2f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv2f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv4f64( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv4f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv4f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv4f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.nxv8f64( - , - i64); - -define @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.nxv8f64( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfrsqrt7.mask.nxv8f64( - , - , - , - i64, - i64); - -define @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrsqrt7.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsqrt7.mask.nxv8f64( - %1, - %2, - %0, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfrsqrt7.ll index d0198a85b0c52..a521b6c2f2b57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfrsqrt7.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -14,7 +16,7 @@ define @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16( @llvm.riscv.vfrsqrt7.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -23,10 +25,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -37,16 +39,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -55,7 +57,7 @@ define @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16( @llvm.riscv.vfrsqrt7.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -64,10 +66,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -78,16 +80,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -96,7 +98,7 @@ define @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16( @llvm.riscv.vfrsqrt7.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -105,10 +107,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -119,16 +121,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -137,7 +139,7 @@ define @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16( @llvm.riscv.vfrsqrt7.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -146,10 +148,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -160,16 +162,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -178,7 +180,7 @@ define @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16( @llvm.riscv.vfrsqrt7.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -187,10 +189,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,16 +203,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv32f16( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -219,7 +221,7 @@ define @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16( @llvm.riscv.vfrsqrt7.nxv32f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -228,10 +230,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -242,16 +244,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -260,7 +262,7 @@ define @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32( @llvm.riscv.vfrsqrt7.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -269,10 +271,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -283,16 +285,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -301,7 +303,7 @@ define @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32( @llvm.riscv.vfrsqrt7.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -310,10 +312,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -324,16 +326,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -342,7 +344,7 @@ define @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32( @llvm.riscv.vfrsqrt7.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -351,10 +353,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -365,16 +367,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -383,7 +385,7 @@ define @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32( @llvm.riscv.vfrsqrt7.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -392,10 +394,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -406,16 +408,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv16f32( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -424,7 +426,7 @@ define @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32( @llvm.riscv.vfrsqrt7.nxv16f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -447,16 +449,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv1f64( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -465,7 +467,7 @@ define @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64( @llvm.riscv.vfrsqrt7.nxv1f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -474,10 +476,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -488,16 +490,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv2f64( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -506,7 +508,7 @@ define @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64( @llvm.riscv.vfrsqrt7.nxv2f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -515,10 +517,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -529,16 +531,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv4f64( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -547,7 +549,7 @@ define @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64( @llvm.riscv.vfrsqrt7.nxv4f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -556,10 +558,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -570,16 +572,16 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfrsqrt7.nxv8f64( , - i32); + iXLen); -define @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64( %0, i32 %1) nounwind { +define @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -588,7 +590,7 @@ define @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64( @llvm.riscv.vfrsqrt7.nxv8f64( %0, - i32 %1) + iXLen %1) ret %a } @@ -597,10 +599,10 @@ declare @llvm.riscv.vfrsqrt7.mask.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -611,7 +613,7 @@ entry: %1, %2, %0, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll deleted file mode 100644 index 0477554c91419..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll +++ /dev/null @@ -1,678 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ -; RUN: -mattr=+d -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfrsub.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfrsub.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfrsub.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfrsub.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfrsub.ll index eab5b2a414e18..3fb281562088c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfrsub.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfrsub.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfrsub.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfrsub.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfrsub.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfrsub.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfrsub.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfrsub.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfrsub.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfrsub.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfrsub.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfrsub.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -266,7 +268,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -274,9 +276,9 @@ entry: declare @llvm.riscv.vfrsub.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -286,7 +288,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -296,10 +298,10 @@ declare @llvm.riscv.vfrsub.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -311,7 +313,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -319,9 +321,9 @@ entry: declare @llvm.riscv.vfrsub.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -331,7 +333,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -341,10 +343,10 @@ declare @llvm.riscv.vfrsub.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -356,7 +358,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -364,9 +366,9 @@ entry: declare @llvm.riscv.vfrsub.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -376,7 +378,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -386,10 +388,10 @@ declare @llvm.riscv.vfrsub.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -401,7 +403,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -409,9 +411,9 @@ entry: declare @llvm.riscv.vfrsub.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -421,7 +423,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -431,10 +433,10 @@ declare @llvm.riscv.vfrsub.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -446,7 +448,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -454,9 +456,9 @@ entry: declare @llvm.riscv.vfrsub.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -466,7 +468,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -476,10 +478,10 @@ declare @llvm.riscv.vfrsub.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -491,7 +493,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -499,9 +501,9 @@ entry: declare @llvm.riscv.vfrsub.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -511,7 +513,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -521,10 +523,10 @@ declare @llvm.riscv.vfrsub.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -536,7 +538,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -544,9 +546,9 @@ entry: declare @llvm.riscv.vfrsub.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -556,7 +558,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -566,10 +568,10 @@ declare @llvm.riscv.vfrsub.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -581,7 +583,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -589,9 +591,9 @@ entry: declare @llvm.riscv.vfrsub.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -601,7 +603,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -611,10 +613,10 @@ declare @llvm.riscv.vfrsub.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -626,7 +628,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -634,9 +636,9 @@ entry: declare @llvm.riscv.vfrsub.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -646,7 +648,7 @@ entry: %a = call @llvm.riscv.vfrsub.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -656,10 +658,10 @@ declare @llvm.riscv.vfrsub.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -671,7 +673,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll deleted file mode 100644 index d71fb8fb25352..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll +++ /dev/null @@ -1,1355 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfsgnj.nxv1f16( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv2f16( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv4f16( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv8f16( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv16f16( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv32f16( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv32f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv32f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv1f32( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv2f32( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv4f32( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv8f32( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv16f32( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv16f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv16f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv1f64( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv1f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv1f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv2f64( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv2f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv2f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv4f64( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv4f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv4f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv8f64( - , - , - i64); - -define @intrinsic_vfsgnj_vv_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv8f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv8f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnj_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnj_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnj_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnj_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnj_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnj_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnj_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnj_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnj_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnj_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnj_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnj_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnj_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnj_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnj.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnj_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnj.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnj_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnj.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll index c0e999a2433d0..65a5592775cb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsgnj.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv32f16.nxv32f16( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv32f16.nxv32f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv32f16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv16f32.nxv16f32( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv16f32.nxv16f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv1f64.nxv1f64( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv1f64.nxv1f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv2f64.nxv2f64( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv2f64.nxv2f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv4f64.nxv4f64( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv4f64.nxv4f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv8f64.nxv8f64( , , - i32); + iXLen); -define @intrinsic_vfsgnj_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv8f64.nxv8f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfsgnj.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfsgnj.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfsgnj.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll deleted file mode 100644 index f751fd7406389..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll +++ /dev/null @@ -1,1355 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfsgnjn.nxv1f16( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv2f16( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv4f16( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv8f16( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv16f16( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv32f16( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv32f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv32f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv1f32( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv2f32( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv4f32( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv8f32( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv16f32( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv16f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv16f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv1f64( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv1f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv1f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv2f64( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv2f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv2f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv4f64( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv4f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv4f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv8f64( - , - , - i64); - -define @intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv8f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv8f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjn_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjn_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjn_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjn_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjn_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjn_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjn_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjn_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjn_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjn_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjn_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjn_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjn_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjn_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjn_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjn.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjn_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjn.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll index 0287f9ea2cbf3..f16c8a6db1fb9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv32f16.nxv32f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv32f16.nxv32f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv32f16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv16f32.nxv16f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv16f32.nxv16f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv1f64.nxv1f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv1f64.nxv1f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv2f64.nxv2f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv2f64.nxv2f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv4f64.nxv4f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv4f64.nxv4f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv8f64.nxv8f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv8f64.nxv8f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfsgnjn.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfsgnjn.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfsgnjn.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll deleted file mode 100644 index 4ae69f0d4f613..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll +++ /dev/null @@ -1,1355 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfsgnjx.nxv1f16( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv2f16( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv4f16( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv8f16( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv16f16( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv32f16( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv32f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv32f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv1f32( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv2f32( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv4f32( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv8f32( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv16f32( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv16f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv16f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv1f64( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv1f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv1f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv2f64( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv2f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv2f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv4f64( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv4f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv4f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv8f64( - , - , - i64); - -define @intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv8f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv8f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjx_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjx_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjx_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjx_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjx_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfsgnjx_vf_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjx_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjx_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjx_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjx_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfsgnjx_vf_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjx_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjx_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjx_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfsgnjx_vf_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsgnjx.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsgnjx_mask_vf_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsgnjx.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll index fba4eceb23fb6..edfd578ce8aa0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv32f16.nxv32f16( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv32f16.nxv32f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv32f16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv16f32.nxv16f32( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv16f32.nxv16f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv1f64.nxv1f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv1f64.nxv1f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv2f64.nxv2f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv2f64.nxv2f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv4f64.nxv4f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv4f64.nxv4f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv8f64.nxv8f64( , , - i32); + iXLen); -define @intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv8f64.nxv8f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfsgnjx.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfsgnjx.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfsgnjx.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll deleted file mode 100644 index a0ba31ea26686..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll +++ /dev/null @@ -1,677 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfslide1down.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1down.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1down.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1down.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down.ll similarity index 85% rename from llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfslide1down.ll index 5baadc48d857e..6cbba483a8d9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfslide1down.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -266,7 +268,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -274,9 +276,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -286,7 +288,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -296,10 +298,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -311,7 +313,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -319,9 +321,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -331,7 +333,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -341,10 +343,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -356,7 +358,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -364,9 +366,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -376,7 +378,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -386,10 +388,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -401,7 +403,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -409,9 +411,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -421,7 +423,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -431,10 +433,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -446,7 +448,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -454,9 +456,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -466,7 +468,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -476,10 +478,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -491,7 +493,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -499,9 +501,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -511,7 +513,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -521,10 +523,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -536,7 +538,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -544,9 +546,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -556,7 +558,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -566,10 +568,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -581,7 +583,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -589,9 +591,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -601,7 +603,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -611,10 +613,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -626,7 +628,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -634,9 +636,9 @@ entry: declare @llvm.riscv.vfslide1down.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -646,7 +648,7 @@ entry: %a = call @llvm.riscv.vfslide1down.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -656,10 +658,10 @@ declare @llvm.riscv.vfslide1down.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -671,7 +673,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll deleted file mode 100644 index 4b7d1fe55e244..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll +++ /dev/null @@ -1,692 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfslide1up.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfslide1up.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 -; CHECK-NEXT: vmv.v.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfslide1up.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfslide1up.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfslide1up.ll index 271bf70522bfa..695cf7aab3f6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfslide1up.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -17,7 +19,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -27,10 +29,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -42,7 +44,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -50,9 +52,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -63,7 +65,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -73,10 +75,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -88,7 +90,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -96,9 +98,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -109,7 +111,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -119,10 +121,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -134,7 +136,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -142,9 +144,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -155,7 +157,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -165,10 +167,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -180,7 +182,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -188,9 +190,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,7 +203,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -211,10 +213,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -226,7 +228,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -234,9 +236,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -247,7 +249,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -257,10 +259,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -272,7 +274,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -280,9 +282,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -293,7 +295,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -303,10 +305,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -318,7 +320,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -326,9 +328,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -339,7 +341,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -349,10 +351,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -364,7 +366,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -372,9 +374,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -385,7 +387,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -395,10 +397,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -410,7 +412,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -418,9 +420,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -431,7 +433,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -441,10 +443,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -456,7 +458,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -464,9 +466,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -477,7 +479,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -487,10 +489,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -502,7 +504,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -510,9 +512,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -523,7 +525,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -533,10 +535,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -548,7 +550,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -556,9 +558,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -569,7 +571,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -579,10 +581,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -594,7 +596,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -602,9 +604,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -615,7 +617,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -625,10 +627,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -640,7 +642,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -648,9 +650,9 @@ entry: declare @llvm.riscv.vfslide1up.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -661,7 +663,7 @@ entry: %a = call @llvm.riscv.vfslide1up.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -671,10 +673,10 @@ declare @llvm.riscv.vfslide1up.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -686,7 +688,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll deleted file mode 100644 index 3b86fd763f3c9..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll +++ /dev/null @@ -1,548 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfsqrt.nxv1f16( - , - i32); - -define @intrinsic_vfsqrt_v_nxv1f16_nxv1f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv1f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv1f16( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv1f16( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv2f16( - , - i32); - -define @intrinsic_vfsqrt_v_nxv2f16_nxv2f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv2f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv2f16( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv2f16( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv4f16( - , - i32); - -define @intrinsic_vfsqrt_v_nxv4f16_nxv4f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv4f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv4f16( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv4f16( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv8f16( - , - i32); - -define @intrinsic_vfsqrt_v_nxv8f16_nxv8f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv8f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv8f16( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv8f16( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv16f16( - , - i32); - -define @intrinsic_vfsqrt_v_nxv16f16_nxv16f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv16f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv16f16( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv16f16( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv32f16( - , - i32); - -define @intrinsic_vfsqrt_v_nxv32f16_nxv32f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv32f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv1f32( - , - i32); - -define @intrinsic_vfsqrt_v_nxv1f32_nxv1f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv1f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv1f32( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv1f32( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv2f32( - , - i32); - -define @intrinsic_vfsqrt_v_nxv2f32_nxv2f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv2f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv2f32( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv2f32( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv4f32( - , - i32); - -define @intrinsic_vfsqrt_v_nxv4f32_nxv4f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv4f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv4f32( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv4f32( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv8f32( - , - i32); - -define @intrinsic_vfsqrt_v_nxv8f32_nxv8f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv8f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv8f32( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv8f32( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv16f32( - , - i32); - -define @intrinsic_vfsqrt_v_nxv16f32_nxv16f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv16f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv1f64( - , - i32); - -define @intrinsic_vfsqrt_v_nxv1f64_nxv1f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv1f64( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv1f64( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv1f64( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv2f64( - , - i32); - -define @intrinsic_vfsqrt_v_nxv2f64_nxv2f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv2f64( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv2f64( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv2f64( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv4f64( - , - i32); - -define @intrinsic_vfsqrt_v_nxv4f64_nxv4f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv4f64( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.mask.nxv4f64( - , - , - , - i32, - i32); - -define @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.mask.nxv4f64( - %1, - %2, - %0, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfsqrt.nxv8f64( - , - i32); - -define @intrinsic_vfsqrt_v_nxv8f64_nxv8f64( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsqrt.v v8, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsqrt.nxv8f64( - %0, - i32 %1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt.ll similarity index 80% rename from llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfsqrt.ll index c810a516f3b35..d944375d645cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt.ll @@ -1,22 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsqrt.nxv1f16( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv1f16_nxv1f16( +define @intrinsic_vfsqrt_v_nxv1f16_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv1f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -25,45 +25,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv1f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16( +define @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv1f16( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv2f16( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv2f16_nxv2f16( +define @intrinsic_vfsqrt_v_nxv2f16_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv2f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -72,45 +66,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv2f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16( +define @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv2f16( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv4f16( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv4f16_nxv4f16( +define @intrinsic_vfsqrt_v_nxv4f16_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv4f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -119,45 +107,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv4f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16( +define @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv4f16( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv8f16( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv8f16_nxv8f16( +define @intrinsic_vfsqrt_v_nxv8f16_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv8f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -166,45 +148,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv8f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16( +define @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v10, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv8f16( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv16f16( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv16f16_nxv16f16( +define @intrinsic_vfsqrt_v_nxv16f16_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv16f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -213,45 +189,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv16f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16( +define @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v12, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv16f16( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv32f16( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv32f16_nxv32f16( +define @intrinsic_vfsqrt_v_nxv32f16_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv32f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -260,45 +230,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv32f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16( +define @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v16, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv32f16( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv1f32( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv1f32_nxv1f32( +define @intrinsic_vfsqrt_v_nxv1f32_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv1f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -307,45 +271,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv1f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32( +define @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv1f32( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv2f32( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv2f32_nxv2f32( +define @intrinsic_vfsqrt_v_nxv2f32_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv2f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -354,45 +312,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv2f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32( +define @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv2f32( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv4f32( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv4f32_nxv4f32( +define @intrinsic_vfsqrt_v_nxv4f32_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv4f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -401,45 +353,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv4f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32( +define @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v10, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv4f32( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv8f32( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv8f32_nxv8f32( +define @intrinsic_vfsqrt_v_nxv8f32_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv8f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -448,45 +394,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv8f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32( +define @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v12, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv8f32( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv16f32( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv16f32_nxv16f32( +define @intrinsic_vfsqrt_v_nxv16f32_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv16f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -495,45 +435,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv16f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32( +define @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v16, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv16f32( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv1f64( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv1f64_nxv1f64( +define @intrinsic_vfsqrt_v_nxv1f64_nxv1f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv1f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -542,45 +476,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv1f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64( +define @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv1f64( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv2f64( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv2f64_nxv2f64( +define @intrinsic_vfsqrt_v_nxv2f64_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv2f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -589,45 +517,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv2f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64( +define @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v10, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv2f64( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv4f64( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv4f64_nxv4f64( +define @intrinsic_vfsqrt_v_nxv4f64_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv4f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -636,45 +558,39 @@ declare @llvm.riscv.vfsqrt.mask.nxv4f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64( +define @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v12, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv4f64( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfsqrt.nxv8f64( , - i64); + iXLen); -define @intrinsic_vfsqrt_v_nxv8f64_nxv8f64( +define @intrinsic_vfsqrt_v_nxv8f64_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %0, - i64 %1) nounwind { entry: %a = call @llvm.riscv.vfsqrt.nxv8f64( %0, - i64 %1) + iXLen %1) ret %a } @@ -683,25 +599,21 @@ declare @llvm.riscv.vfsqrt.mask.nxv8f64( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64( +define @intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu ; CHECK-NEXT: vfsqrt.v v8, v16, v0.t ; CHECK-NEXT: ret - %0, - %1, - %2, - i64 %3) nounwind { entry: %a = call @llvm.riscv.vfsqrt.mask.nxv8f64( %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll deleted file mode 100644 index 7445cfb806d43..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll +++ /dev/null @@ -1,1356 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \ -; RUN: -mattr=+d -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv1f16.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv1f16.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv2f16.nxv2f16( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv2f16.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv2f16.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv4f16.nxv4f16( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv4f16.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv4f16.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv8f16.nxv8f16( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv8f16.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv8f16.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv16f16.nxv16f16( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv16f16.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv16f16.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv32f16.nxv32f16( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv32f16.nxv32f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv32f16.nxv32f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv32f16.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv1f32.nxv1f32( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv1f32.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv1f32.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv2f32.nxv2f32( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv2f32.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv4f32.nxv4f32( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv4f32.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv8f32.nxv8f32( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv8f32.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv8f32.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv16f32.nxv16f32( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv16f32.nxv16f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv16f32.nxv16f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv16f32.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv1f64.nxv1f64( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv1f64.nxv1f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv1f64.nxv1f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv1f64.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv2f64.nxv2f64( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv2f64.nxv2f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv2f64.nxv2f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv2f64.nxv2f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv4f64.nxv4f64( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv4f64.nxv4f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv4f64.nxv4f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv4f64.nxv4f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv8f64.nxv8f64( - , - , - i64); - -define @intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv8f64.nxv8f64( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv8f64.nxv8f64( - , - , - , - , - i64, - i64); - -define @intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv8f64.nxv8f64( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv32f16.f16( - , - half, - i64); - -define @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv32f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv32f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv32f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv16f32.f32( - , - float, - i64); - -define @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv16f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv16f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv16f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv1f64.f64( - , - double, - i64); - -define @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv1f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv1f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv1f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv2f64.f64( - , - double, - i64); - -define @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv2f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv2f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv2f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv4f64.f64( - , - double, - i64); - -define @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv4f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv4f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv4f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfsub.nxv8f64.f64( - , - double, - i64); - -define @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.nxv8f64.f64( - %0, - double %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfsub.mask.nxv8f64.f64( - , - , - double, - , - i64, - i64); - -define @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfsub.mask.nxv8f64.f64( - %0, - %1, - double %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfsub.ll index 86371c1685fc0..645fb340ffa30 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfsub.mask.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfsub.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfsub.mask.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfsub.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfsub.mask.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfsub.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfsub.mask.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfsub.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfsub.mask.nxv16f16.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -221,7 +223,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -229,9 +231,9 @@ entry: declare @llvm.riscv.vfsub.nxv32f16.nxv32f16( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -241,7 +243,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv32f16.nxv32f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -251,10 +253,10 @@ declare @llvm.riscv.vfsub.mask.nxv32f16.nxv32f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re16.v v24, (a0) @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfsub.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfsub.mask.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfsub.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfsub.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfsub.mask.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -402,7 +404,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -410,9 +412,9 @@ entry: declare @llvm.riscv.vfsub.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -422,7 +424,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -432,10 +434,10 @@ declare @llvm.riscv.vfsub.mask.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -447,7 +449,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -455,9 +457,9 @@ entry: declare @llvm.riscv.vfsub.nxv16f32.nxv16f32( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -467,7 +469,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv16f32.nxv16f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -477,10 +479,10 @@ declare @llvm.riscv.vfsub.mask.nxv16f32.nxv16f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) @@ -493,7 +495,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfsub.nxv1f64.nxv1f64( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfsub.mask.nxv1f64.nxv1f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfsub.nxv2f64.nxv2f64( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv2f64.nxv2f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfsub.mask.nxv2f64.nxv2f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfsub.nxv4f64.nxv4f64( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv4f64.nxv4f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfsub.mask.nxv4f64.nxv4f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfsub.nxv8f64.nxv8f64( , , - i32); + iXLen); -define @intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { +define @intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv8f64.nxv8f64( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfsub.mask.nxv8f64.nxv8f64( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) @@ -674,7 +676,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -682,9 +684,9 @@ entry: declare @llvm.riscv.vfsub.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -694,7 +696,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -704,10 +706,10 @@ declare @llvm.riscv.vfsub.mask.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -719,7 +721,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -727,9 +729,9 @@ entry: declare @llvm.riscv.vfsub.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -739,7 +741,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -749,10 +751,10 @@ declare @llvm.riscv.vfsub.mask.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -764,7 +766,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -772,9 +774,9 @@ entry: declare @llvm.riscv.vfsub.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -784,7 +786,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -794,10 +796,10 @@ declare @llvm.riscv.vfsub.mask.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -809,7 +811,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -817,9 +819,9 @@ entry: declare @llvm.riscv.vfsub.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -829,7 +831,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -839,10 +841,10 @@ declare @llvm.riscv.vfsub.mask.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -854,7 +856,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -862,9 +864,9 @@ entry: declare @llvm.riscv.vfsub.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -874,7 +876,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -884,10 +886,10 @@ declare @llvm.riscv.vfsub.mask.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -899,7 +901,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -907,9 +909,9 @@ entry: declare @llvm.riscv.vfsub.nxv32f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -919,7 +921,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv32f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -929,10 +931,10 @@ declare @llvm.riscv.vfsub.mask.nxv32f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu @@ -944,7 +946,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -952,9 +954,9 @@ entry: declare @llvm.riscv.vfsub.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -964,7 +966,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -974,10 +976,10 @@ declare @llvm.riscv.vfsub.mask.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -989,7 +991,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -997,9 +999,9 @@ entry: declare @llvm.riscv.vfsub.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1009,7 +1011,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1019,10 +1021,10 @@ declare @llvm.riscv.vfsub.mask.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1034,7 +1036,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1042,9 +1044,9 @@ entry: declare @llvm.riscv.vfsub.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1054,7 +1056,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1064,10 +1066,10 @@ declare @llvm.riscv.vfsub.mask.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1079,7 +1081,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1087,9 +1089,9 @@ entry: declare @llvm.riscv.vfsub.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1099,7 +1101,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1109,10 +1111,10 @@ declare @llvm.riscv.vfsub.mask.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1124,7 +1126,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1132,9 +1134,9 @@ entry: declare @llvm.riscv.vfsub.nxv16f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1144,7 +1146,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv16f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -1154,10 +1156,10 @@ declare @llvm.riscv.vfsub.mask.nxv16f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu @@ -1169,7 +1171,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1177,9 +1179,9 @@ entry: declare @llvm.riscv.vfsub.nxv1f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1189,7 +1191,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv1f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1199,10 +1201,10 @@ declare @llvm.riscv.vfsub.mask.nxv1f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu @@ -1214,7 +1216,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1222,9 +1224,9 @@ entry: declare @llvm.riscv.vfsub.nxv2f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1234,7 +1236,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv2f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1244,10 +1246,10 @@ declare @llvm.riscv.vfsub.mask.nxv2f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu @@ -1259,7 +1261,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1267,9 +1269,9 @@ entry: declare @llvm.riscv.vfsub.nxv4f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1279,7 +1281,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv4f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1289,10 +1291,10 @@ declare @llvm.riscv.vfsub.mask.nxv4f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu @@ -1304,7 +1306,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -1312,9 +1314,9 @@ entry: declare @llvm.riscv.vfsub.nxv8f64.f64( , double, - i32); + iXLen); -define @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, i32 %2) nounwind { +define @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1324,7 +1326,7 @@ entry: %a = call @llvm.riscv.vfsub.nxv8f64.f64( %0, double %1, - i32 %2) + iXLen %2) ret %a } @@ -1334,10 +1336,10 @@ declare @llvm.riscv.vfsub.mask.nxv8f64.f64( , double, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { +define @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64( %0, %1, double %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu @@ -1349,7 +1351,7 @@ entry: %1, double %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll deleted file mode 100644 index a04b9a54b9306..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll +++ /dev/null @@ -1,830 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vv v10, v8, v9 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vv v12, v8, v10 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vv v16, v8, v12 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vv v10, v8, v9 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vv v12, v8, v10 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32( - , - , - i64); - -define @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vv v16, v8, v12 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v10, v8, fa0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v12, v8, fa0 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v16, v8, fa0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v10, v8, fa0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v12, v8, fa0 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v16, v8, fa0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwadd.ll index a3bdcc4573287..541f2b8564f28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -17,7 +19,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -27,10 +29,10 @@ declare @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -50,9 +52,9 @@ entry: declare @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -63,7 +65,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -73,10 +75,10 @@ declare @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -96,9 +98,9 @@ entry: declare @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -109,7 +111,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -119,10 +121,10 @@ declare @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -142,9 +144,9 @@ entry: declare @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -155,7 +157,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -165,10 +167,10 @@ declare @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -188,9 +190,9 @@ entry: declare @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,7 +203,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -211,10 +213,10 @@ declare @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16 , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -234,9 +236,9 @@ entry: declare @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -247,7 +249,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -257,10 +259,10 @@ declare @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -280,9 +282,9 @@ entry: declare @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -293,7 +295,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -303,10 +305,10 @@ declare @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -326,9 +328,9 @@ entry: declare @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -339,7 +341,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -349,10 +351,10 @@ declare @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -372,9 +374,9 @@ entry: declare @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -385,7 +387,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -395,10 +397,10 @@ declare @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -418,9 +420,9 @@ entry: declare @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -431,7 +433,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -441,10 +443,10 @@ declare @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -456,7 +458,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -464,9 +466,9 @@ entry: declare @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -477,7 +479,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -487,10 +489,10 @@ declare @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -502,7 +504,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -510,9 +512,9 @@ entry: declare @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -523,7 +525,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -533,10 +535,10 @@ declare @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -548,7 +550,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -556,9 +558,9 @@ entry: declare @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -569,7 +571,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -579,10 +581,10 @@ declare @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -594,7 +596,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -602,9 +604,9 @@ entry: declare @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -615,7 +617,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -625,10 +627,10 @@ declare @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -640,7 +642,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -648,9 +650,9 @@ entry: declare @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -661,7 +663,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -671,10 +673,10 @@ declare @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -686,7 +688,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -694,9 +696,9 @@ entry: declare @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -707,7 +709,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -717,10 +719,10 @@ declare @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -732,7 +734,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -740,9 +742,9 @@ entry: declare @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -753,7 +755,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -763,10 +765,10 @@ declare @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -778,7 +780,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -786,9 +788,9 @@ entry: declare @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -799,7 +801,7 @@ entry: %a = call @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -809,10 +811,10 @@ declare @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -824,7 +826,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll deleted file mode 100644 index 3586ec64e9dfe..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll +++ /dev/null @@ -1,1248 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl4re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( - , - , - i64); - -define @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl4re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv1f32.f16( - , - half, - i64); - -define @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv1f32.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv2f32.f16( - , - half, - i64); - -define @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv2f32.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv4f32.f16( - , - half, - i64); - -define @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv4f32.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv8f32.f16( - , - half, - i64); - -define @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv8f32.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv16f32.f16( - , - half, - i64); - -define @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv16f32.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv1f64.f32( - , - float, - i64); - -define @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv1f64.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv2f64.f32( - , - float, - i64); - -define @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv2f64.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv4f64.f32( - , - float, - i64); - -define @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv4f64.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.nxv8f64.f32( - , - float, - i64); - -define @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv8f64.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wv v8, v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32( - %0, - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( - %0, - %0, - half %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( - %0, - %0, - half %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( - %0, - %0, - half %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( - %0, - %0, - half %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( - %0, - %0, - half %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( - %0, - %0, - float %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( - %0, - %0, - float %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( - %0, - %0, - float %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( - %0, - %0, - float %1, - %2, - i64 %3, i64 1) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwadd.wv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( - %1, - %0, - i64 %2) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( - %1, - %0, - i64 %2) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( - %1, - %0, - i64 %2) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v16, v12, v8 -; CHECK-NEXT: vmv4r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( - %1, - %0, - i64 %2) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwadd.wv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( - %1, - %0, - i64 %2) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwadd.wv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( - %1, - %0, - i64 %2) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwadd.wv v16, v12, v8 -; CHECK-NEXT: vmv4r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( - %1, - %0, - i64 %2) - - ret %a -} - -define @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwadd.wv v24, v16, v8 -; CHECK-NEXT: vmv8r.v v8, v24 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( - %1, - %0, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll index 3d046d2ba8057..28cdfbf621b3d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl4re16.v v24, (a0) @@ -222,7 +224,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -230,9 +232,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -242,7 +244,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -252,10 +254,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl4re32.v v24, (a0) @@ -403,7 +405,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -411,9 +413,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv1f32.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -423,7 +425,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv1f32.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f32.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -448,7 +450,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -456,9 +458,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv2f32.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -468,7 +470,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv2f32.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -478,10 +480,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f32.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -493,7 +495,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv4f32.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv4f32.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f32.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv8f32.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv8f32.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f32.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv16f32.f16( , half, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv16f32.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv16f32.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv1f64.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv1f64.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv1f64.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -673,7 +675,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -681,9 +683,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv2f64.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -693,7 +695,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv2f64.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -703,10 +705,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv2f64.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -718,7 +720,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -726,9 +728,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv4f64.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -738,7 +740,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv4f64.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -748,10 +750,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv4f64.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -763,7 +765,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -771,9 +773,9 @@ entry: declare @llvm.riscv.vfwadd.w.nxv8f64.f32( , float, - i32); + iXLen); -define @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -783,7 +785,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -793,10 +795,10 @@ declare @llvm.riscv.vfwadd.w.mask.nxv8f64.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -808,12 +810,12 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -825,12 +827,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -842,12 +844,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -859,12 +861,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -876,12 +878,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -893,12 +895,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -910,12 +912,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -927,12 +929,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -944,12 +946,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -961,12 +963,12 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -978,12 +980,12 @@ entry: %0, half %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -995,12 +997,12 @@ entry: %0, half %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -1012,12 +1014,12 @@ entry: %0, half %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -1029,12 +1031,12 @@ entry: %0, half %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -1046,12 +1048,12 @@ entry: %0, half %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -1063,12 +1065,12 @@ entry: %0, float %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1080,12 +1082,12 @@ entry: %0, float %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1097,12 +1099,12 @@ entry: %0, float %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1114,12 +1116,12 @@ entry: %0, float %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -1130,12 +1132,12 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( %1, %0, - i32 %2) + iXLen %2) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -1146,12 +1148,12 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16( %1, %0, - i32 %2) + iXLen %2) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -1162,12 +1164,12 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16( %1, %0, - i32 %2) + iXLen %2) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -1178,12 +1180,12 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16( %1, %0, - i32 %2) + iXLen %2) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -1194,12 +1196,12 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32( %1, %0, - i32 %2) + iXLen %2) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1210,12 +1212,12 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32( %1, %0, - i32 %2) + iXLen %2) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1226,12 +1228,12 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32( %1, %0, - i32 %2) + iXLen %2) ret %a } -define @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1242,7 +1244,7 @@ entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( %1, %0, - i32 %2) + iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll deleted file mode 100644 index e050090b7761c..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f32.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f32.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f32.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f32.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f32.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f32.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f32.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f32.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv16f32.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv16f32.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f64.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f64.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f64.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f64.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f64.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f64.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32( - , - i64); - -define @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f64.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f64.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll index 460488388b5ea..386fc9a4822ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16( @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f32.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16( @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f32.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16( @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f32.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16( @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f32.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16( @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv16f32.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32( @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f64.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32( @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f64.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32( @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f64.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32( @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f64.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll deleted file mode 100644 index e294cbe085f73..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll +++ /dev/null @@ -1,632 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f16.nxv1i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f16.nxv1i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f16.nxv2i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f16.nxv2i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f16.nxv4i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f16.nxv4i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f16.nxv8i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f16.nxv8i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f16.nxv16i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f16.nxv16i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv32f16.nxv32i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv32f16.nxv32i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f32.nxv1i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f32.nxv1i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f32.nxv2i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f32.nxv2i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f32.nxv4i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f32.nxv4i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f32.nxv8i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f32.nxv8i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f32.nxv16i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f32.nxv16i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f64.nxv1i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f64.nxv1i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f64.nxv2i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f64.nxv2i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f64.nxv4i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f64.nxv4i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32( - , - i64); - -define @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f64.nxv8i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.x.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f64.nxv8i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll index 467b64c20fedd..ad4a3a4a5eb6a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8( @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f16.nxv1i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8( @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f16.nxv2i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8( @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f16.nxv4i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8( @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f16.nxv8i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8( @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f16.nxv16i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8( @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv32f16.nxv32i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16( @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f32.nxv1i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16( @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f32.nxv2i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16( @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f32.nxv4i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -374,16 +376,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -393,7 +395,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16( @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -402,10 +404,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f32.nxv8i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -416,16 +418,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -435,7 +437,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16( @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -444,10 +446,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f32.nxv16i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -458,16 +460,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -477,7 +479,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32( @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -486,10 +488,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f64.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -500,16 +502,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -519,7 +521,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32( @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -528,10 +530,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f64.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -542,16 +544,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -561,7 +563,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32( @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -570,10 +572,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f64.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -584,16 +586,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -603,7 +605,7 @@ define @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32( @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -612,10 +614,10 @@ declare @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f64.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -626,7 +628,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll deleted file mode 100644 index 107813b7879bd..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll +++ /dev/null @@ -1,632 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f16.nxv1i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f16.nxv1i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f16.nxv2i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f16.nxv2i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f16.nxv4i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f16.nxv4i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f16.nxv8i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f16.nxv8i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f16.nxv16i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f16.nxv16i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32f16.nxv32i8( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32f16.nxv32i8( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f32.nxv1i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f32.nxv1i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f32.nxv2i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f32.nxv2i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f32.nxv4i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f32.nxv4i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f32.nxv8i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f32.nxv8i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f32.nxv16i16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f32.nxv16i16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f64.nxv1i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f64.nxv1i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f64.nxv2i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f64.nxv2i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f64.nxv4i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f64.nxv4i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32( - , - i64); - -define @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f64.nxv8i32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f64.nxv8i32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll index fc0af066c39ec..9eef34d4de1e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8( @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f16.nxv1i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8( @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f16.nxv2i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8( @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f16.nxv4i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8( @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f16.nxv8i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8( @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f16.nxv16i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8( @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32f16.nxv32i8( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16( @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f32.nxv1i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16( @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f32.nxv2i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16( @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f32.nxv4i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -374,16 +376,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -393,7 +395,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16( @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -402,10 +404,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f32.nxv8i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -416,16 +418,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -435,7 +437,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16( @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16( %0, - i32 %1) + iXLen %1) ret %a } @@ -444,10 +446,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f32.nxv16i16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -458,16 +460,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -477,7 +479,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32( @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -486,10 +488,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f64.nxv1i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -500,16 +502,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -519,7 +521,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32( @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -528,10 +530,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f64.nxv2i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -542,16 +544,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -561,7 +563,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32( @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -570,10 +572,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f64.nxv4i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -584,16 +586,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32( , - i32); + iXLen); -define @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -603,7 +605,7 @@ define @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32( @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32( %0, - i32 %1) + iXLen %1) ret %a } @@ -612,10 +614,10 @@ declare @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f64.nxv8i32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -626,7 +628,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll deleted file mode 100644 index 4d551f62ec52f..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i32.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i32.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i32.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i32.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i32.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i32.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i32.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i32.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv16i32.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv16i32.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i64.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i64.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i64.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i64.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i64.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i64.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32( - , - i64); - -define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i64.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i64.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll index d6cf1b3563105..0f7a46aadfd15 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i32.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i32.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i32.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i32.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv16i32.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i64.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i64.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i64.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32( @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i64.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll deleted file mode 100644 index c419e08471ca1..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f16( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f16( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f16( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f16( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f16( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f16( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f32( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f32( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f32( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32( - , - i32); - -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32( - %0, - i32 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f32( - , - , - , - i32, - i32); - -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f32( - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll index 6b881aba8f652..f3d786a37fbe4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16( %0, - i64 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f16 , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32( , - i64); + iXLen); -define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32( %0, i64 %1) nounwind { +define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32( @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32( %0, - i64 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll deleted file mode 100644 index fd01c64df0d36..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i32.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i32.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i32.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i32.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i32.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i32.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i32.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i32.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv16i32.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv16i32.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i64.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i64.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i64.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i64.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i64.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i64.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32( - , - i64); - -define @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i64.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i64.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll index b8d88e1c64e55..6c3c2d7702eb1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16( @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i32.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16( @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i32.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16( @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i32.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16( @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i32.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16( @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv16i32.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32( @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i64.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32( @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i64.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32( @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i64.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32( @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i64.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll deleted file mode 100644 index dc461d60b0be9..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll +++ /dev/null @@ -1,380 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i32.nxv1f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i32.nxv1f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i32.nxv2f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i32.nxv2f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i32.nxv4f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i32.nxv4f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i32.nxv8f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i32.nxv8f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv16i32.nxv16f16( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv16i32.nxv16f16( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i64.nxv1f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i64.nxv1f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v10, v8 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i64.nxv2f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i64.nxv2f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v12, v8 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i64.nxv4f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i64.nxv4f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32( - , - i64); - -define @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v16, v8 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32( - %0, - i64 %1) - - ret %a -} - -declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i64.nxv8f32( - , - , - , - i64, - i64); - -define @intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwcvt.xu.f.v v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i64.nxv8f32( - %0, - %1, - %2, - i64 %3, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll similarity index 87% rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll index c2b0a222709e4..10bd22304ed83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -15,7 +17,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16( @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -24,10 +26,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i32.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -38,16 +40,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -57,7 +59,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16( @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -66,10 +68,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i32.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -80,16 +82,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -99,7 +101,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16( @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -108,10 +110,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i32.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -122,16 +124,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -141,7 +143,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16( @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -150,10 +152,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i32.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -164,16 +166,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -183,7 +185,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16( @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16( %0, - i32 %1) + iXLen %1) ret %a } @@ -192,10 +194,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv16i32.nxv16f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -206,16 +208,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -225,7 +227,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32( @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -234,10 +236,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i64.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -248,16 +250,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -267,7 +269,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32( @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -276,10 +278,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i64.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -290,16 +292,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -309,7 +311,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32( @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -318,10 +320,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i64.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,16 +334,16 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } declare @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32( , - i32); + iXLen); -define @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32( %0, i32 %1) nounwind { +define @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -351,7 +353,7 @@ define @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32( @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32( %0, - i32 %1) + iXLen %1) ret %a } @@ -360,10 +362,10 @@ declare @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i64.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -374,7 +376,7 @@ entry: %0, %1, %2, - i32 %3, i32 1) + iXLen %3, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll deleted file mode 100644 index eb21c54c18e90..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll +++ /dev/null @@ -1,830 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv2f32.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv2f32.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv4f32.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv4f32.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv8f32.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv8f32.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv16f32.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv16f32.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv1f64.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv1f64.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv2f64.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv2f64.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv4f64.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv4f64.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv8f64.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv8f64.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv1f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv1f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv1f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv1f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv2f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv2f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv2f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv2f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv4f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv4f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv4f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv4f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv8f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv8f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv8f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv8f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv16f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv16f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv16f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv16f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv1f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv1f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv1f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv1f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv2f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv2f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv2f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv2f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv4f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv4f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv4f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv4f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmacc.nxv8f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.nxv8f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmacc.mask.nxv8f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmacc.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmacc.mask.nxv8f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwmacc.ll index 0de121cb3f002..f5db61b5e8c7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwmacc.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfwmacc.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfwmacc.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfwmacc.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfwmacc.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfwmacc.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfwmacc.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfwmacc.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfwmacc.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfwmacc.nxv1f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -432,7 +434,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -456,7 +458,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfwmacc.nxv2f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -478,7 +480,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -502,7 +504,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfwmacc.nxv4f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -548,7 +550,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfwmacc.nxv8f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfwmacc.nxv16f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv16f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfwmacc.nxv1f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -662,7 +664,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv1f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -686,7 +688,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfwmacc.nxv2f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -708,7 +710,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv2f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -732,7 +734,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfwmacc.nxv4f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -754,7 +756,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv4f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -778,7 +780,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfwmacc.nxv8f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfwmacc.mask.nxv8f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll deleted file mode 100644 index b2e1e235e9695..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll +++ /dev/null @@ -1,830 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv2f32.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv2f32.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv4f32.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv4f32.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv8f32.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv8f32.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv16f32.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv16f32.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv1f64.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv1f64.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv2f64.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv2f64.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv4f64.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv4f64.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv8f64.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv8f64.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv1f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv1f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv1f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv1f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv2f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv2f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv2f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv2f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv4f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv4f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv4f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv4f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv8f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv8f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv8f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv8f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv16f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv16f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv16f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv16f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv1f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv1f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv1f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv1f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv2f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv2f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv2f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv2f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv4f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv4f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv4f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv4f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwmsac.nxv8f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.nxv8f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwmsac.mask.nxv8f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmsac.mask.nxv8f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwmsac.ll index 82c4fad996e71..884ee36575b4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwmsac.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfwmsac.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfwmsac.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfwmsac.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfwmsac.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfwmsac.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfwmsac.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfwmsac.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfwmsac.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfwmsac.nxv1f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -432,7 +434,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -456,7 +458,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfwmsac.nxv2f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -478,7 +480,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -502,7 +504,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfwmsac.nxv4f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -548,7 +550,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfwmsac.nxv8f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfwmsac.nxv16f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv16f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfwmsac.nxv1f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -662,7 +664,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv1f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -686,7 +688,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfwmsac.nxv2f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -708,7 +710,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv2f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -732,7 +734,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfwmsac.nxv4f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -754,7 +756,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv4f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -778,7 +780,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfwmsac.nxv8f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfwmsac.mask.nxv8f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll deleted file mode 100644 index 670c79975a2e0..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll +++ /dev/null @@ -1,830 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s -declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.nxv1f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.nxv2f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vv v10, v8, v9 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vv v12, v8, v10 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vv v16, v8, v12 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.nxv1f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vv v10, v8, v9 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vv v12, v8, v10 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32( - , - , - i32); - -define @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vv v16, v8, v12 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16( - , - half, - i32); - -define @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16( - , - half, - i32); - -define @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16( - , - half, - i32); - -define @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v10, v8, fa0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16( - , - half, - i32); - -define @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v12, v8, fa0 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16( - , - half, - i32); - -define @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v16, v8, fa0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32( - , - float, - i32); - -define @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32( - , - float, - i32); - -define @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v10, v8, fa0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( - , - float, - i32); - -define @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v12, v8, fa0 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32( - , - float, - i32); - -define @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v16, v8, fa0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwmul.ll index fc7d8dcb59e31..b1ec8464047e1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -17,7 +19,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -27,10 +29,10 @@ declare @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.nxv1f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -50,9 +52,9 @@ entry: declare @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -63,7 +65,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -73,10 +75,10 @@ declare @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.nxv2f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -96,9 +98,9 @@ entry: declare @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -109,7 +111,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -119,10 +121,10 @@ declare @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -142,9 +144,9 @@ entry: declare @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -155,7 +157,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -165,10 +167,10 @@ declare @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -188,9 +190,9 @@ entry: declare @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,7 +203,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -211,10 +213,10 @@ declare @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16 , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -234,9 +236,9 @@ entry: declare @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -247,7 +249,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -257,10 +259,10 @@ declare @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.nxv1f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -280,9 +282,9 @@ entry: declare @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -293,7 +295,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -303,10 +305,10 @@ declare @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -326,9 +328,9 @@ entry: declare @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -339,7 +341,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -349,10 +351,10 @@ declare @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -372,9 +374,9 @@ entry: declare @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32( , , - i64); + iXLen); -define @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -385,7 +387,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -395,10 +397,10 @@ declare @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -418,9 +420,9 @@ entry: declare @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -431,7 +433,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -441,10 +443,10 @@ declare @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -456,7 +458,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -464,9 +466,9 @@ entry: declare @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -477,7 +479,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -487,10 +489,10 @@ declare @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -502,7 +504,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -510,9 +512,9 @@ entry: declare @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -523,7 +525,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -533,10 +535,10 @@ declare @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -548,7 +550,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -556,9 +558,9 @@ entry: declare @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -569,7 +571,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -579,10 +581,10 @@ declare @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -594,7 +596,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -602,9 +604,9 @@ entry: declare @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16( , half, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -615,7 +617,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -625,10 +627,10 @@ declare @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -640,7 +642,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -648,9 +650,9 @@ entry: declare @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -661,7 +663,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -671,10 +673,10 @@ declare @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -686,7 +688,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -694,9 +696,9 @@ entry: declare @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -707,7 +709,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -717,10 +719,10 @@ declare @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -732,7 +734,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -740,9 +742,9 @@ entry: declare @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -753,7 +755,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -763,10 +765,10 @@ declare @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -778,7 +780,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -786,9 +788,9 @@ entry: declare @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32( , float, - i64); + iXLen); -define @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -799,7 +801,7 @@ entry: %a = call @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -809,10 +811,10 @@ declare @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -824,7 +826,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll deleted file mode 100644 index ff2b40cfac2cd..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll +++ /dev/null @@ -1,830 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv1f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv1f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv2f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv2f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv4f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv4f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv8f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv8f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv16f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv16f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv1f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv1f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv2f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv2f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv4f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv4f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.nxv8f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.nxv8f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwnmacc.ll index 02842609f5685..4ccd0f8c55835 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfwnmacc.nxv1f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -432,7 +434,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -456,7 +458,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfwnmacc.nxv2f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -478,7 +480,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -502,7 +504,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfwnmacc.nxv4f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -548,7 +550,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfwnmacc.nxv8f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfwnmacc.nxv16f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv16f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfwnmacc.nxv1f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -662,7 +664,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv1f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -686,7 +688,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfwnmacc.nxv2f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -708,7 +710,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv2f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -732,7 +734,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfwnmacc.nxv4f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -754,7 +756,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv4f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -778,7 +780,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfwnmacc.nxv8f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfwnmacc.mask.nxv8f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll deleted file mode 100644 index 2fe370bb1d82f..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll +++ /dev/null @@ -1,830 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v10, v11 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v12, v14 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v16, v20 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32( - , - , - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv1f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv1f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv2f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv2f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv4f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv4f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv8f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv8f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv16f32.f16( - , - half, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv16f32.f16( - %0, - half %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( - , - half, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( - %0, - half %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv1f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv1f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv2f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv2f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv4f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv4f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.nxv8f64.f32( - , - float, - , - i64); - -define @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.nxv8f64.f32( - %0, - float %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( - , - float, - , - , - i64); - -define @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( - %0, - float %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac.ll similarity index 89% rename from llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwnmsac.ll index fe9683ed15adb..26fcb06d89167 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfwnmsac.nxv1f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -432,7 +434,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -456,7 +458,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfwnmsac.nxv2f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -478,7 +480,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -502,7 +504,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -511,9 +513,9 @@ declare @llvm.riscv.vfwnmsac.nxv4f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -524,7 +526,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -534,9 +536,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -548,7 +550,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -557,9 +559,9 @@ declare @llvm.riscv.vfwnmsac.nxv8f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -570,7 +572,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -580,9 +582,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -594,7 +596,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -603,9 +605,9 @@ declare @llvm.riscv.vfwnmsac.nxv16f32.f16( , half, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -616,7 +618,7 @@ entry: %0, half %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -626,9 +628,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv16f32.f16( half, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16( %0, half %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -640,7 +642,7 @@ entry: half %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -649,9 +651,9 @@ declare @llvm.riscv.vfwnmsac.nxv1f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -662,7 +664,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -672,9 +674,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv1f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -686,7 +688,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -695,9 +697,9 @@ declare @llvm.riscv.vfwnmsac.nxv2f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -708,7 +710,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -718,9 +720,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv2f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -732,7 +734,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -741,9 +743,9 @@ declare @llvm.riscv.vfwnmsac.nxv4f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -754,7 +756,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -764,9 +766,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv4f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -778,7 +780,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -787,9 +789,9 @@ declare @llvm.riscv.vfwnmsac.nxv8f64.f32( , float, , - i32); + iXLen); -define @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, i32 %3) nounwind { +define @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -800,7 +802,7 @@ entry: %0, float %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -810,9 +812,9 @@ declare @llvm.riscv.vfwnmsac.mask.nxv8f64.f32( float, , , - i32); + iXLen); -define @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32( %0, float %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -824,7 +826,7 @@ entry: float %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll deleted file mode 100644 index 2282bd5fbc860..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll +++ /dev/null @@ -1,508 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwredosum.nxv2f32.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv2f32.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv1f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv2f32.nxv1f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv2f32.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv2f32.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv2f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv2f32.nxv2f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv2f32.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv2f32.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv4f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv2f32.nxv4f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv2f32.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv2f32.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv8f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv2f32.nxv8f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv2f32.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv2f32.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv16f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv2f32.nxv16f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv2f32.nxv32f16( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv2f32.nxv32f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv1f64.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv1f64.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv1f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv1f64.nxv1f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv1f64.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv1f64.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv2f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv1f64.nxv2f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv1f64.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv1f64.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv4f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv1f64.nxv4f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv1f64.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv1f64.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv8f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv1f64.nxv8f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredosum.nxv1f64.nxv16f32( - , - , - , - i64); - -define @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.nxv1f64.nxv16f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32( - , - , - , - , - i64); - -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfwredosum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwredosum.ll index 37240159bd908..7eb16dd4b8a80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwredosum.nxv2f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv1f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfwredosum.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv2f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfwredosum.nxv2f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv4f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfwredosum.nxv2f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv8f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfwredosum.nxv2f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv16f16.nxv2f3 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfwredosum.nxv2f32.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfwredosum.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv1f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfwredosum.nxv1f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv2f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfwredosum.nxv1f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv4f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfwredosum.nxv1f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv8f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfwredosum.nxv1f64.nxv16f32( , , , - i32); + iXLen); -define @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv1f , , , - i32); + iXLen); -define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll deleted file mode 100644 index 52bde877eb7d1..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll +++ /dev/null @@ -1,508 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv2f32.nxv2f16( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv2f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv2f32.nxv4f16( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv4f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv2f32.nxv8f16( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv8f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv2f32.nxv16f16( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv16f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv2f32.nxv32f16( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv32f16( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv1f64.nxv1f32( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv1f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv1f64.nxv2f32( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv2f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv1f64.nxv4f32( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v10, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv4f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v10, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv1f64.nxv8f32( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v12, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv8f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v12, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} - -declare @llvm.riscv.vfwredusum.nxv1f64.nxv16f32( - , - , - , - i64); - -define @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v16, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv16f32( - %0, - %1, - %2, - i64 %3) - - ret %a -} - -declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32( - , - , - , - , - i64); - -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu -; CHECK-NEXT: vfwredusum.vs v8, v16, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32( - %0, - %1, - %2, - %3, - i64 %4) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwredusum.ll index fe56d0c6bd0d0..897cd61fb437a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -18,7 +20,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -28,9 +30,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -51,9 +53,9 @@ declare @llvm.riscv.vfwredusum.nxv2f32.nxv2f16( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -64,7 +66,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -74,9 +76,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -97,9 +99,9 @@ declare @llvm.riscv.vfwredusum.nxv2f32.nxv4f16( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -110,7 +112,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -120,9 +122,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -143,9 +145,9 @@ declare @llvm.riscv.vfwredusum.nxv2f32.nxv8f16( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -156,7 +158,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -166,9 +168,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -189,9 +191,9 @@ declare @llvm.riscv.vfwredusum.nxv2f32.nxv16f16( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -202,7 +204,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -212,9 +214,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f3 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -235,9 +237,9 @@ declare @llvm.riscv.vfwredusum.nxv2f32.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -248,7 +250,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -258,9 +260,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -281,9 +283,9 @@ declare @llvm.riscv.vfwredusum.nxv1f64.nxv1f32( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -294,7 +296,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -304,9 +306,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -327,9 +329,9 @@ declare @llvm.riscv.vfwredusum.nxv1f64.nxv2f32( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -340,7 +342,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -350,9 +352,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -373,9 +375,9 @@ declare @llvm.riscv.vfwredusum.nxv1f64.nxv4f32( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -386,7 +388,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -396,9 +398,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -419,9 +421,9 @@ declare @llvm.riscv.vfwredusum.nxv1f64.nxv8f32( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -432,7 +434,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -442,9 +444,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f6 , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu @@ -456,7 +458,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } @@ -465,9 +467,9 @@ declare @llvm.riscv.vfwredusum.nxv1f64.nxv16f32( , , , - i32); + iXLen); -define @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +define @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -478,7 +480,7 @@ entry: %0, %1, %2, - i32 %3) + iXLen %3) ret %a } @@ -488,9 +490,9 @@ declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32.nxv1f , , , - i32); + iXLen); -define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu @@ -502,7 +504,7 @@ entry: %1, %2, %3, - i32 %4) + iXLen %4) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll deleted file mode 100644 index d4b0780f03c57..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll +++ /dev/null @@ -1,830 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s -declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.nxv1f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.nxv1f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.nxv2f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.nxv2f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vv v10, v8, v9 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.nxv4f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.nxv4f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vv v12, v8, v10 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.nxv8f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.nxv8f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vv v16, v8, v12 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.nxv16f16( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.nxv16f16( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.nxv1f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.nxv1f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vv v10, v8, v9 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.nxv2f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v10, v11, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.nxv2f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vv v12, v8, v10 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.nxv4f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v12, v14, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.nxv4f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32( - , - , - i64); - -define @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vv v16, v8, v12 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.nxv8f32( - , - , - , - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vv v8, v16, v20, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.nxv8f32( - %0, - %1, - %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16( - , - half, - i64); - -define @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16( - , - half, - i64); - -define @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16( - , - half, - i64); - -define @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v10, v8, fa0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16( - , - half, - i64); - -define @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v12, v8, fa0 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16( - , - half, - i64); - -define @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16( %0, half %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v16, v8, fa0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16( - %0, - half %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16( - , - , - half, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16( - %0, - %1, - half %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32( - , - float, - i64); - -define @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v9, v8, fa0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32( - , - float, - i64); - -define @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v10, v8, fa0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( - , - float, - i64); - -define @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v12, v8, fa0 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32( - , - float, - i64); - -define @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32( %0, float %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v16, v8, fa0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32( - %0, - float %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32( - , - , - float, - , - i64, - i64); - -define @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i64 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32( - %0, - %1, - float %2, - %3, - i64 %4, i64 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll similarity index 86% rename from llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwsub.ll index e81121f848ddb..916abcae0de0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -17,7 +19,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -27,10 +29,10 @@ declare @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.nxv1f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -42,7 +44,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -50,9 +52,9 @@ entry: declare @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -63,7 +65,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -73,10 +75,10 @@ declare @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.nxv2f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -88,7 +90,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -96,9 +98,9 @@ entry: declare @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -109,7 +111,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -119,10 +121,10 @@ declare @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.nxv4f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -134,7 +136,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -142,9 +144,9 @@ entry: declare @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -155,7 +157,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -165,10 +167,10 @@ declare @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.nxv8f16( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -180,7 +182,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -188,9 +190,9 @@ entry: declare @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -201,7 +203,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -211,10 +213,10 @@ declare @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.nxv16f16 , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -226,7 +228,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -234,9 +236,9 @@ entry: declare @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -247,7 +249,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -257,10 +259,10 @@ declare @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.nxv1f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -272,7 +274,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -280,9 +282,9 @@ entry: declare @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -293,7 +295,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -303,10 +305,10 @@ declare @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.nxv2f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -318,7 +320,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -326,9 +328,9 @@ entry: declare @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -339,7 +341,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -349,10 +351,10 @@ declare @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.nxv4f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -364,7 +366,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -372,9 +374,9 @@ entry: declare @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32( , , - i32); + iXLen); -define @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -385,7 +387,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32( %0, %1, - i32 %2) + iXLen %2) ret %a } @@ -395,10 +397,10 @@ declare @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.nxv8f32( , , , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -410,7 +412,7 @@ entry: %1, %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -418,9 +420,9 @@ entry: declare @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -431,7 +433,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -441,10 +443,10 @@ declare @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -456,7 +458,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -464,9 +466,9 @@ entry: declare @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -477,7 +479,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -487,10 +489,10 @@ declare @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -502,7 +504,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -510,9 +512,9 @@ entry: declare @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -523,7 +525,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -533,10 +535,10 @@ declare @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -548,7 +550,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -556,9 +558,9 @@ entry: declare @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -569,7 +571,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -579,10 +581,10 @@ declare @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -594,7 +596,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -602,9 +604,9 @@ entry: declare @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16( , half, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16( %0, half %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -615,7 +617,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16( %0, half %1, - i32 %2) + iXLen %2) ret %a } @@ -625,10 +627,10 @@ declare @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16( , half, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -640,7 +642,7 @@ entry: %1, half %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -648,9 +650,9 @@ entry: declare @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -661,7 +663,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -671,10 +673,10 @@ declare @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -686,7 +688,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -694,9 +696,9 @@ entry: declare @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -707,7 +709,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -717,10 +719,10 @@ declare @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -732,7 +734,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -740,9 +742,9 @@ entry: declare @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -753,7 +755,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -763,10 +765,10 @@ declare @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -778,7 +780,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } @@ -786,9 +788,9 @@ entry: declare @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32( , float, - i32); + iXLen); -define @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32( %0, float %1, i32 %2) nounwind { +define @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -799,7 +801,7 @@ entry: %a = call @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32( %0, float %1, - i32 %2) + iXLen %2) ret %a } @@ -809,10 +811,10 @@ declare @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32( , float, , - i32, - i32); + iXLen, + iXLen); -define @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, i32 %4) nounwind { +define @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -824,7 +826,7 @@ entry: %1, float %2, %3, - i32 %4, i32 1) + iXLen %4, iXLen 1) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll deleted file mode 100644 index da2290be93d27..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll +++ /dev/null @@ -1,1248 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=ilp32d < %s | FileCheck %s -declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl4re16.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v9 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v10, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v12, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( - , - , - i32); - -define @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32( - , - , - , - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vl4re32.v v24, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32( - %0, - %1, - %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv1f32.f16( - , - half, - i32); - -define @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv1f32.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv2f32.f16( - , - half, - i32); - -define @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv2f32.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv4f32.f16( - , - half, - i32); - -define @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv4f32.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv8f32.f16( - , - half, - i32); - -define @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv8f32.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv16f32.f16( - , - half, - i32); - -define @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv16f32.f16( - %0, - half %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( - , - , - half, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( - %0, - %1, - half %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv1f64.f32( - , - float, - i32); - -define @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv1f64.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv2f64.f32( - , - float, - i32); - -define @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv2f64.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv4f64.f32( - , - float, - i32); - -define @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv4f64.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.nxv8f64.f32( - , - float, - i32); - -define @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv8f64.f32( - %0, - float %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( - , - , - float, - , - i32, - i32); - -define @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i32 %4) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( - %0, - %1, - float %2, - %3, - i32 %4, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v10, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wv v8, v8, v16, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32( - %0, - %0, - %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( - %0, - %0, - half %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( - %0, - %0, - half %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( - %0, - %0, - half %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( - %0, - %0, - half %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( - %0, - %0, - half %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( - %0, - %0, - float %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( - %0, - %0, - float %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( - %0, - %0, - float %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i32 %3) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( - %0, - %0, - float %1, - %2, - i32 %3, i32 1) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfwsub.wv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( - %1, - %0, - i32 %2) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( - %1, - %0, - i32 %2) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( - %1, - %0, - i32 %2) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v16, v12, v8 -; CHECK-NEXT: vmv4r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( - %1, - %0, - i32 %2) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfwsub.wv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( - %1, - %0, - i32 %2) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwsub.wv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( - %1, - %0, - i32 %2) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfwsub.wv v16, v12, v8 -; CHECK-NEXT: vmv4r.v v8, v16 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( - %1, - %0, - i32 %2) - - ret %a -} - -define @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfwsub.wv v24, v16, v8 -; CHECK-NEXT: vmv8r.v v8, v24 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( - %1, - %0, - i32 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll similarity index 88% rename from llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll index ec0bd527dafe6..b5d008c3e1ed5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll @@ -1,12 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: -target-abi=lp64d < %s | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -16,7 +18,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -26,10 +28,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -41,7 +43,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -49,9 +51,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -61,7 +63,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -71,10 +73,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -86,7 +88,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -94,9 +96,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -106,7 +108,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -116,10 +118,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -131,7 +133,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -139,9 +141,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -151,7 +153,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -161,10 +163,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -176,7 +178,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -184,9 +186,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -196,7 +198,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -206,10 +208,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl4re16.v v24, (a0) @@ -222,7 +224,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -230,9 +232,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -242,7 +244,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -252,10 +254,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -267,7 +269,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -275,9 +277,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -287,7 +289,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -297,10 +299,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -312,7 +314,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -320,9 +322,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -332,7 +334,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -342,10 +344,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -357,7 +359,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -365,9 +367,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( , , - i64); + iXLen); -define @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -377,7 +379,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( %0, %1, - i64 %2) + iXLen %2) ret %a } @@ -387,10 +389,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32( , , , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl4re32.v v24, (a0) @@ -403,7 +405,7 @@ entry: %1, %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -411,9 +413,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv1f32.f16( , half, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -423,7 +425,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv1f32.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -433,10 +435,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f32.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -448,7 +450,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -456,9 +458,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv2f32.f16( , half, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -468,7 +470,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv2f32.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -478,10 +480,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f32.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -493,7 +495,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -501,9 +503,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv4f32.f16( , half, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -513,7 +515,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv4f32.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -523,10 +525,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f32.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -538,7 +540,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -546,9 +548,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv8f32.f16( , half, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -558,7 +560,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv8f32.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -568,10 +570,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f32.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -583,7 +585,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -591,9 +593,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv16f32.f16( , half, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -603,7 +605,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv16f32.f16( %0, half %1, - i64 %2) + iXLen %2) ret %a } @@ -613,10 +615,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv16f32.f16( , half, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16( %0, %1, half %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -628,7 +630,7 @@ entry: %1, half %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -636,9 +638,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv1f64.f32( , float, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -648,7 +650,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv1f64.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -658,10 +660,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv1f64.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -673,7 +675,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -681,9 +683,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv2f64.f32( , float, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -693,7 +695,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv2f64.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -703,10 +705,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv2f64.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -718,7 +720,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -726,9 +728,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv4f64.f32( , float, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -738,7 +740,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv4f64.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -748,10 +750,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv4f64.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -763,7 +765,7 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } @@ -771,9 +773,9 @@ entry: declare @llvm.riscv.vfwsub.w.nxv8f64.f32( , float, - i64); + iXLen); -define @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -783,7 +785,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.f32( %0, float %1, - i64 %2) + iXLen %2) ret %a } @@ -793,10 +795,10 @@ declare @llvm.riscv.vfwsub.w.mask.nxv8f64.f32( , float, , - i64, - i64); + iXLen, + iXLen); -define @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, i64 %4) nounwind { +define @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32( %0, %1, float %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -808,12 +810,12 @@ entry: %1, float %2, %3, - i64 %4, i64 1) + iXLen %4, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -825,12 +827,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -842,12 +844,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -859,12 +861,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -876,12 +878,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -893,12 +895,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -910,12 +912,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -927,12 +929,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -944,12 +946,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -961,12 +963,12 @@ entry: %0, %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -978,12 +980,12 @@ entry: %0, half %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -995,12 +997,12 @@ entry: %0, half %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -1012,12 +1014,12 @@ entry: %0, half %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -1029,12 +1031,12 @@ entry: %0, half %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16( %0, half %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu @@ -1046,12 +1048,12 @@ entry: %0, half %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -1063,12 +1065,12 @@ entry: %0, float %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1080,12 +1082,12 @@ entry: %0, float %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1097,12 +1099,12 @@ entry: %0, float %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, i64 %3) nounwind { +define @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32( %0, float %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1114,12 +1116,12 @@ entry: %0, float %1, %2, - i64 %3, i64 1) + iXLen %3, iXLen 1) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu @@ -1130,12 +1132,12 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( %1, %0, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu @@ -1146,12 +1148,12 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16( %1, %0, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu @@ -1162,12 +1164,12 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16( %1, %0, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu @@ -1178,12 +1180,12 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16( %1, %0, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu @@ -1194,12 +1196,12 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32( %1, %0, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu @@ -1210,12 +1212,12 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32( %1, %0, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu @@ -1226,12 +1228,12 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32( %1, %0, - i64 %2) + iXLen %2) ret %a } -define @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, i64 %2) nounwind { +define @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu @@ -1242,7 +1244,7 @@ entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( %1, %0, - i64 %2) + iXLen %2) ret %a } From f69379d0a43bbe14e58e45286de3ae1cf8a58147 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 18:03:12 +0000 Subject: [PATCH 295/946] [InstCombine] Add test coverage for PR48683 D108992 added self-multiply handling to KnownBits::mul but we don't use it yet.. --- .../Transforms/InstCombine/mul-masked-bits.ll | 47 +++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll index fcff725cdf6f1..4886cd581a284 100644 --- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll +++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll @@ -1,10 +1,10 @@ -; NOTE: Assertions have been autogenerated by update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define i32 @foo(i32 %x, i32 %y) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[A:%.*]] = and i32 %x, 7 -; CHECK-NEXT: [[B:%.*]] = and i32 %y, 7 +; CHECK-NEXT: [[A:%.*]] = and i32 [[X:%.*]], 7 +; CHECK-NEXT: [[B:%.*]] = and i32 [[Y:%.*]], 7 ; CHECK-NEXT: [[C:%.*]] = mul nuw nsw i32 [[A]], [[B]] ; CHECK-NEXT: [[D:%.*]] = shl nuw i32 [[C]], 26 ; CHECK-NEXT: [[E:%.*]] = ashr exact i32 [[D]], 26 @@ -17,3 +17,44 @@ define i32 @foo(i32 %x, i32 %y) { %e = ashr i32 %d, 26 ret i32 %e } + +; PR48683 'Quadratic Reciprocity' - and(mul(x,x),2) -> 0 + +define i1 @PR48683(i32 %x) { +; CHECK-LABEL: @PR48683( +; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], [[X]] +; CHECK-NEXT: [[B:%.*]] = and i32 [[A]], 2 +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[B]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %a = mul i32 %x, %x + %b = and i32 %a, 2 + %c = icmp ne i32 %b, 0 + ret i1 %c +} + +define <4 x i1> @PR48683_vec(<4 x i32> %x) { +; CHECK-LABEL: @PR48683_vec( +; CHECK-NEXT: [[A:%.*]] = mul <4 x i32> [[X:%.*]], [[X]] +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <4 x i32> [[B]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[C]] +; + %a = mul <4 x i32> %x, %x + %b = and <4 x i32> %a, + %c = icmp ne <4 x i32> %b, zeroinitializer + ret <4 x i1> %c +} + +define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) { +; CHECK-LABEL: @PR48683_vec_undef( +; CHECK-NEXT: [[A:%.*]] = mul <4 x i32> [[X:%.*]], [[X]] +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> [[A]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <4 x i32> [[B]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[C]] +; + %a = mul <4 x i32> %x, %x + %b = and <4 x i32> %a, + %c = icmp ne <4 x i32> %b, zeroinitializer + ret <4 x i1> %c +} From 818cfb10c57487cd9c8b57d8136d9e42b31a50aa Mon Sep 17 00:00:00 2001 From: Casey Carter Date: Wed, 29 Dec 2021 16:02:00 -0800 Subject: [PATCH 296/946] [libcxx][test] Make MSVC `` test compile when testing MSVC How many layers of irony are you on? Differential Revision: https://reviews.llvm.org/D117967 --- .../test/std/utilities/charconv/charconv.msvc/test.pass.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp index 66854205937a2..1c4cdb9558b38 100644 --- a/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp +++ b/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp @@ -31,6 +31,10 @@ # define sprintf_s snprintf #endif +#ifdef _MSVC_STL_VERSION +#include +using std::_Bit_cast; +#else // FUNCTION TEMPLATE _Bit_cast template && @@ -39,6 +43,7 @@ template Date: Sun, 23 Jan 2022 10:35:44 -0800 Subject: [PATCH 297/946] [Support] Simplify parallelForEach{,N} * Merge parallel_for_each into parallelForEach (this removes 1 `Fn(...)` call) * Change parallelForEach to use parallelForEachN * Move parallelForEachN into Parallel.cpp My x86-64 `lld` executable is 100KiB smaller. No noticeable difference in performance. Reviewed By: lattner Differential Revision: https://reviews.llvm.org/D117510 --- llvm/include/llvm/Support/Parallel.h | 80 ++-------------------------- llvm/lib/Support/Parallel.cpp | 32 +++++++++++ 2 files changed, 35 insertions(+), 77 deletions(-) diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h index 5c3b26d5754c2..04caf5eac961d 100644 --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -130,64 +130,6 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, // improving to take the number of available cores into account.) enum { MaxTasksPerGroup = 1024 }; -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - // If we have zero or one items, then do not incur the overhead of spinning up - // a task group. They are surprisingly expensive, and because they do not - // support nested parallelism, a single entry task group can block parallel - // execution underneath them. - auto NumItems = std::distance(Begin, End); - if (NumItems <= 1) { - if (NumItems) - Fn(*Begin); - return; - } - - // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling - // overhead on large inputs. - ptrdiff_t TaskSize = NumItems / MaxTasksPerGroup; - if (TaskSize == 0) - TaskSize = 1; - - TaskGroup TG; - while (TaskSize < std::distance(Begin, End)) { - TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); - Begin += TaskSize; - } - std::for_each(Begin, End, Fn); -} - -template -void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - // If we have zero or one items, then do not incur the overhead of spinning up - // a task group. They are surprisingly expensive, and because they do not - // support nested parallelism, a single entry task group can block parallel - // execution underneath them. - auto NumItems = End - Begin; - if (NumItems <= 1) { - if (NumItems) - Fn(Begin); - return; - } - - // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling - // overhead on large inputs. - ptrdiff_t TaskSize = NumItems / MaxTasksPerGroup; - if (TaskSize == 0) - TaskSize = 1; - - TaskGroup TG; - IndexTy I = Begin; - for (; I + TaskSize < End; I += TaskSize) { - TG.spawn([=, &Fn] { - for (IndexTy J = I, E = I + TaskSize; J != E; ++J) - Fn(J); - }); - } - for (IndexTy J = I; J < End; ++J) - Fn(J); -} - template ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init, @@ -251,27 +193,11 @@ void parallelSort(RandomAccessIterator Start, RandomAccessIterator End, llvm::sort(Start, End, Comp); } +void parallelForEachN(size_t Begin, size_t End, function_ref Fn); + template void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) { -#if LLVM_ENABLE_THREADS - if (parallel::strategy.ThreadsRequested != 1) { - parallel::detail::parallel_for_each(Begin, End, Fn); - return; - } -#endif - std::for_each(Begin, End, Fn); -} - -template -void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) { -#if LLVM_ENABLE_THREADS - if (parallel::strategy.ThreadsRequested != 1) { - parallel::detail::parallel_for_each_n(Begin, End, Fn); - return; - } -#endif - for (size_t I = Begin; I != End; ++I) - Fn(I); + parallelForEachN(0, End - Begin, [&](size_t I) { Fn(Begin[I]); }); } template F) { } // namespace parallel } // namespace llvm #endif // LLVM_ENABLE_THREADS + +void llvm::parallelForEachN(size_t Begin, size_t End, + llvm::function_ref Fn) { + // If we have zero or one items, then do not incur the overhead of spinning up + // a task group. They are surprisingly expensive, and because they do not + // support nested parallelism, a single entry task group can block parallel + // execution underneath them. +#if LLVM_ENABLE_THREADS + auto NumItems = End - Begin; + if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) { + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup; + if (TaskSize == 0) + TaskSize = 1; + + parallel::detail::TaskGroup TG; + for (; Begin + TaskSize < End; Begin += TaskSize) { + TG.spawn([=, &Fn] { + for (size_t I = Begin, E = Begin + TaskSize; I != E; ++I) + Fn(I); + }); + } + for (; Begin != End; ++Begin) + Fn(Begin); + return; + } +#endif + + for (; Begin != End; ++Begin) + Fn(Begin); +} From 1a5dea9e2b97a74a277e82bfe010d521f1690eea Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Sun, 23 Jan 2022 19:06:21 +0000 Subject: [PATCH 298/946] [NewGVN][NFC] precommit tests for PR53277 --- .../Transforms/NewGVN/phi-of-ops-loads.ll | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll diff --git a/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll b/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll new file mode 100644 index 0000000000000..63a16d60dfc78 --- /dev/null +++ b/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=newgvn -enable-phi-of-ops=true -S -o - %s | FileCheck %s + +define void @store-in-loop(i8* %p, i8* %q) { +; CHECK-LABEL: @store-in-loop( +; CHECK-NEXT: bb56: +; CHECK-NEXT: br label [[BB57:%.*]] +; CHECK: bb57: +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ] +; CHECK-NEXT: [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ 0, [[BB56]] ], [ [[INC:%.*]], [[BB229]] ] +; CHECK-NEXT: [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1 +; CHECK-NEXT: [[N62]] = icmp ne i8 [[N60]], 2 +; CHECK-NEXT: br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]] +; CHECK: bb229: +; CHECK-NEXT: [[INC]] = add i8 [[IDX]], 1 +; CHECK-NEXT: store i8 [[INC]], i8* [[Q:%.*]], align 1 +; CHECK-NEXT: br label [[BB57]] +; CHECK: bb237: +; CHECK-NEXT: ret void +; +bb56: + br label %bb57 + +bb57: + %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ] + %idx = phi i8 [0, %bb56], [%inc, %bb229] + %n60 = load i8, i8* %p + %n62 = icmp ne i8 %n60, 2 + %n63 = or i1 %n59, %n62 + br i1 %n63, label %bb229, label %bb237 + +bb229: + %inc = add i8 %idx, 1 + store i8 %inc, i8* %q + br label %bb57 + +bb237: + ret void +} + +define void @no-alias-store-in-loop(i8* noalias %p, i8* noalias %q) { +; CHECK-LABEL: @no-alias-store-in-loop( +; CHECK-NEXT: bb56: +; CHECK-NEXT: br label [[BB57:%.*]] +; CHECK: bb57: +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ] +; CHECK-NEXT: [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ] +; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ 0, [[BB56]] ], [ [[INC:%.*]], [[BB229]] ] +; CHECK-NEXT: [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1 +; CHECK-NEXT: [[N62]] = icmp ne i8 [[N60]], 2 +; CHECK-NEXT: br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]] +; CHECK: bb229: +; CHECK-NEXT: [[INC]] = add i8 [[IDX]], 1 +; CHECK-NEXT: store i8 [[INC]], i8* [[Q:%.*]], align 1 +; CHECK-NEXT: br label [[BB57]] +; CHECK: bb237: +; CHECK-NEXT: ret void +; +bb56: + br label %bb57 + +bb57: + %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ] + %idx = phi i8 [0, %bb56], [%inc, %bb229] + %n60 = load i8, i8* %p + %n62 = icmp ne i8 %n60, 2 + %n63 = or i1 %n59, %n62 + br i1 %n63, label %bb229, label %bb237 + +bb229: + %inc = add i8 %idx, 1 + store i8 %inc, i8* %q + br label %bb57 + +bb237: + ret void +} + +define void @function-in-loop(i8* %p) { +; CHECK-LABEL: @function-in-loop( +; CHECK-NEXT: bb56: +; CHECK-NEXT: br label [[BB57:%.*]] +; CHECK: bb57: +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ] +; CHECK-NEXT: [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ] +; CHECK-NEXT: [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1 +; CHECK-NEXT: [[N62]] = icmp ne i8 [[N60]], 2 +; CHECK-NEXT: br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]] +; CHECK: bb229: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: br label [[BB57]] +; CHECK: bb237: +; CHECK-NEXT: ret void +; +bb56: + br label %bb57 + +bb57: + %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ] + %n60 = load i8, i8* %p + %n62 = icmp ne i8 %n60, 2 + %n63 = or i1 %n59, %n62 + br i1 %n63, label %bb229, label %bb237 + +bb229: + call void @f() + br label %bb57 + +bb237: + ret void +} + +define void @nowrite-function-in-loop(i8* %p) { +; CHECK-LABEL: @nowrite-function-in-loop( +; CHECK-NEXT: bb56: +; CHECK-NEXT: br label [[BB57:%.*]] +; CHECK: bb57: +; CHECK-NEXT: [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ] +; CHECK-NEXT: [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ] +; CHECK-NEXT: [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1 +; CHECK-NEXT: [[N62]] = icmp ne i8 [[N60]], 2 +; CHECK-NEXT: br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]] +; CHECK: bb229: +; CHECK-NEXT: call void @f() #[[ATTR0:[0-9]+]] +; CHECK-NEXT: br label [[BB57]] +; CHECK: bb237: +; CHECK-NEXT: ret void +; +bb56: + br label %bb57 + +bb57: + %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ] + %n60 = load i8, i8* %p + %n62 = icmp ne i8 %n60, 2 + %n63 = or i1 %n59, %n62 + br i1 %n63, label %bb229, label %bb237 + +bb229: + call void @f() inaccessiblememonly + br label %bb57 + +bb237: + ret void +} + +declare void @f() From 7a29b0b58383e8ceb751144fe638c46cacc6fe40 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 11:07:12 -0800 Subject: [PATCH 299/946] [llvm] Fix header guards (NFC) Identified with llvm-header-guard. --- llvm/include/llvm/Analysis/NoInferenceModelRunner.h | 2 +- llvm/include/llvm/Analysis/ReleaseModeModelRunner.h | 6 ++++++ llvm/include/llvm/Demangle/ItaniumDemangle.h | 6 +++--- llvm/include/llvm/Demangle/StringView.h | 4 ++-- llvm/include/llvm/Demangle/Utility.h | 4 ++-- llvm/include/llvm/ProfileData/MemProfData.inc | 4 ++-- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h index 1e8cb26cc3c14..5bcedf98865ca 100644 --- a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h +++ b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h @@ -40,4 +40,4 @@ class NoInferenceModelRunner : public MLModelRunner { }; } // namespace llvm #endif // defined(LLVM_HAVE_TF_API) -#endif // defined(LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H) +#endif // LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h index bb19d2c7d9260..1bf2e853980c7 100644 --- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h +++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h @@ -10,6 +10,10 @@ // Only inference is supported. // //===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H +#define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H + #include "llvm/Analysis/MLModelRunner.h" #include @@ -70,3 +74,5 @@ class ReleaseModeModelRunner final : public MLModelRunner { std::unique_ptr CompiledModel; }; } // namespace llvm + +#endif // LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 01f414a7257bf..4df092c70ee1f 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_ITANIUMDEMANGLE_H -#define DEMANGLE_ITANIUMDEMANGLE_H +#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H +#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H // FIXME: (possibly) incomplete list of features that clang mangles that this // file does not yet support: @@ -5749,4 +5749,4 @@ struct ManglingParser : AbstractManglingParser, Alloc> { DEMANGLE_NAMESPACE_END -#endif // DEMANGLE_ITANIUMDEMANGLE_H +#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h index 7c8cb482ae1c1..6b5d01c09fe5f 100644 --- a/llvm/include/llvm/Demangle/StringView.h +++ b/llvm/include/llvm/Demangle/StringView.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_STRINGVIEW_H -#define DEMANGLE_STRINGVIEW_H +#ifndef LLVM_DEMANGLE_STRINGVIEW_H +#define LLVM_DEMANGLE_STRINGVIEW_H #include "DemangleConfig.h" #include diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index 587c0e4bec36d..dcd12b0daa88b 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_UTILITY_H -#define DEMANGLE_UTILITY_H +#ifndef LLVM_DEMANGLE_UTILITY_H +#define LLVM_DEMANGLE_UTILITY_H #include "StringView.h" #include diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc index d64227e4ba31d..f2cb3738f0531 100644 --- a/llvm/include/llvm/ProfileData/MemProfData.inc +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -1,5 +1,5 @@ -#ifndef MEMPROF_DATA_INC -#define MEMPROF_DATA_INC +#ifndef LLVM_PROFILEDATA_MEMPROFDATA_INC +#define LLVM_PROFILEDATA_MEMPROFDATA_INC /*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ |* |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. From abb0ed44957cb4ba1bc94d19202860f10369cea1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 11:07:14 -0800 Subject: [PATCH 300/946] [Commands] Remove redundant member initialization (NFC) Identified with readability-redundant-member-init. --- lldb/source/Commands/CommandCompletions.cpp | 2 +- .../Commands/CommandObjectBreakpoint.cpp | 56 +++++++------------ .../CommandObjectBreakpointCommand.cpp | 9 ++- .../source/Commands/CommandObjectCommands.cpp | 29 ++++------ .../Commands/CommandObjectDisassemble.cpp | 3 +- .../Commands/CommandObjectExpression.cpp | 6 +- lldb/source/Commands/CommandObjectFrame.cpp | 18 +++--- lldb/source/Commands/CommandObjectHelp.cpp | 3 +- lldb/source/Commands/CommandObjectHelp.h | 2 +- lldb/source/Commands/CommandObjectLog.cpp | 5 +- lldb/source/Commands/CommandObjectMemory.cpp | 25 ++++----- .../Commands/CommandObjectMemoryTag.cpp | 5 +- .../source/Commands/CommandObjectPlatform.cpp | 36 +++++------- lldb/source/Commands/CommandObjectProcess.cpp | 45 ++++++--------- .../Commands/CommandObjectRegexCommand.cpp | 2 +- .../source/Commands/CommandObjectRegister.cpp | 6 +- .../Commands/CommandObjectReproducer.cpp | 6 +- lldb/source/Commands/CommandObjectScript.h | 2 +- lldb/source/Commands/CommandObjectSession.cpp | 6 +- .../source/Commands/CommandObjectSettings.cpp | 15 ++--- lldb/source/Commands/CommandObjectSource.cpp | 12 ++-- lldb/source/Commands/CommandObjectStats.cpp | 2 +- lldb/source/Commands/CommandObjectTarget.cpp | 50 +++++++---------- lldb/source/Commands/CommandObjectThread.cpp | 43 ++++++-------- lldb/source/Commands/CommandObjectTrace.cpp | 15 ++--- lldb/source/Commands/CommandObjectType.cpp | 44 +++++++-------- .../Commands/CommandObjectWatchpoint.cpp | 26 ++++----- .../CommandObjectWatchpointCommand.cpp | 5 +- .../Commands/CommandOptionsProcessLaunch.h | 2 +- 29 files changed, 193 insertions(+), 287 deletions(-) diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp index ff825cce813ec..ae1ee1fdd30b8 100644 --- a/lldb/source/Commands/CommandCompletions.cpp +++ b/lldb/source/Commands/CommandCompletions.cpp @@ -129,7 +129,7 @@ class SourceFileCompleter : public Completer { public: SourceFileCompleter(CommandInterpreter &interpreter, CompletionRequest &request) - : Completer(interpreter, request), m_matching_files() { + : Completer(interpreter, request) { FileSpec partial_spec(m_request.GetCursorArgumentPrefix()); m_file_name = partial_spec.GetFilename().GetCString(); m_dir_name = partial_spec.GetDirectory().GetCString(); diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index 3f88a2fa63780..c4e55fdb3b9c0 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -49,7 +49,7 @@ static void AddBreakpointDescription(Stream *s, Breakpoint *bp, class lldb_private::BreakpointOptionGroup : public OptionGroup { public: - BreakpointOptionGroup() : OptionGroup(), m_bp_opts(false) {} + BreakpointOptionGroup() : m_bp_opts(false) {} ~BreakpointOptionGroup() override = default; @@ -179,7 +179,7 @@ class lldb_private::BreakpointOptionGroup : public OptionGroup { class BreakpointDummyOptionGroup : public OptionGroup { public: - BreakpointDummyOptionGroup() : OptionGroup() {} + BreakpointDummyOptionGroup() {} ~BreakpointDummyOptionGroup() override = default; @@ -234,8 +234,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { interpreter, "breakpoint set", "Sets a breakpoint or set of breakpoints in the executable.", "breakpoint set "), - m_bp_opts(), m_python_class_options("scripted breakpoint", true, 'P'), - m_options() { + m_python_class_options("scripted breakpoint", true, 'P') { // We're picking up all the normal options, commands and disable. m_all_options.Append(&m_python_class_options, LLDB_OPT_SET_1 | LLDB_OPT_SET_2, LLDB_OPT_SET_11); @@ -253,9 +252,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { class CommandOptions : public OptionGroup { public: - CommandOptions() - : OptionGroup(), m_condition(), m_filenames(), m_func_names(), - m_func_regexp(), m_source_text_regexp(), m_modules() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -809,8 +806,7 @@ class CommandObjectBreakpointModify : public CommandObjectParsed { "created breakpoint. " "With the exception of -e, -d and -i, passing an " "empty argument clears the modification.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg; CommandObject::AddIDsArgumentData(arg, eArgTypeBreakpointID, eArgTypeBreakpointIDRange); @@ -1100,8 +1096,7 @@ class CommandObjectBreakpointList : public CommandObjectParsed { : CommandObjectParsed( interpreter, "breakpoint list", "List some or all breakpoints at configurable levels of detail.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg; CommandArgumentData bp_id_arg; @@ -1123,7 +1118,7 @@ class CommandObjectBreakpointList : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1246,8 +1241,7 @@ class CommandObjectBreakpointClear : public CommandObjectParsed { : CommandObjectParsed(interpreter, "breakpoint clear", "Delete or disable breakpoints matching the " "specified source file and line.", - "breakpoint clear "), - m_options() {} + "breakpoint clear ") {} ~CommandObjectBreakpointClear() override = default; @@ -1255,7 +1249,7 @@ class CommandObjectBreakpointClear : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options(), m_filename() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1384,8 +1378,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { : CommandObjectParsed(interpreter, "breakpoint delete", "Delete the specified breakpoint(s). If no " "breakpoints are specified, delete them all.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg; CommandObject::AddIDsArgumentData(arg, eArgTypeBreakpointID, eArgTypeBreakpointIDRange); @@ -1408,7 +1401,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1565,8 +1558,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed { class BreakpointNameOptionGroup : public OptionGroup { public: BreakpointNameOptionGroup() - : OptionGroup(), m_breakpoint(LLDB_INVALID_BREAK_ID), m_use_dummy(false) { - } + : m_breakpoint(LLDB_INVALID_BREAK_ID), m_use_dummy(false) {} ~BreakpointNameOptionGroup() override = default; @@ -1626,7 +1618,7 @@ class BreakpointNameOptionGroup : public OptionGroup { class BreakpointAccessOptionGroup : public OptionGroup { public: - BreakpointAccessOptionGroup() : OptionGroup() {} + BreakpointAccessOptionGroup() {} ~BreakpointAccessOptionGroup() override = default; @@ -1696,8 +1688,7 @@ class CommandObjectBreakpointNameConfigure : public CommandObjectParsed { "the breakpoint, otherwise only the options specified will be set " "on the name.", "breakpoint name configure " - ""), - m_bp_opts(), m_option_group() { + "") { // Create the first variant for the first (and only) argument for this // command. CommandArgumentEntry arg1; @@ -1787,8 +1778,7 @@ class CommandObjectBreakpointNameAdd : public CommandObjectParsed { CommandObjectBreakpointNameAdd(CommandInterpreter &interpreter) : CommandObjectParsed( interpreter, "add", "Add a name to the breakpoints provided.", - "breakpoint name add "), - m_name_options(), m_option_group() { + "breakpoint name add ") { // Create the first variant for the first (and only) argument for this // command. CommandArgumentEntry arg1; @@ -1872,8 +1862,7 @@ class CommandObjectBreakpointNameDelete : public CommandObjectParsed { : CommandObjectParsed( interpreter, "delete", "Delete a name from the breakpoints provided.", - "breakpoint name delete "), - m_name_options(), m_option_group() { + "breakpoint name delete ") { // Create the first variant for the first (and only) argument for this // command. CommandArgumentEntry arg1; @@ -1956,8 +1945,7 @@ class CommandObjectBreakpointNameList : public CommandObjectParsed { "List either the names for a breakpoint or info " "about a given name. With no arguments, lists all " "names", - "breakpoint name list "), - m_name_options(), m_option_group() { + "breakpoint name list ") { m_option_group.Append(&m_name_options, LLDB_OPT_SET_3, LLDB_OPT_SET_ALL); m_option_group.Finalize(); } @@ -2063,8 +2051,7 @@ class CommandObjectBreakpointRead : public CommandObjectParsed { : CommandObjectParsed(interpreter, "breakpoint read", "Read and set the breakpoints previously saved to " "a file with \"breakpoint write\". ", - nullptr), - m_options() {} + nullptr) {} ~CommandObjectBreakpointRead() override = default; @@ -2072,7 +2059,7 @@ class CommandObjectBreakpointRead : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -2245,8 +2232,7 @@ class CommandObjectBreakpointWrite : public CommandObjectParsed { "Write the breakpoints listed to a file that can " "be read in with \"breakpoint read\". " "If given no arguments, writes all breakpoints.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg; CommandObject::AddIDsArgumentData(arg, eArgTypeBreakpointID, eArgTypeBreakpointIDRange); @@ -2269,7 +2255,7 @@ class CommandObjectBreakpointWrite : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp index 26d35c82f57d0..637e8b8bd5783 100644 --- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp @@ -69,7 +69,7 @@ class CommandObjectBreakpointCommandAdd : public CommandObjectParsed, nullptr), IOHandlerDelegateMultiline("DONE", IOHandlerDelegate::Completion::LLDBCommand), - m_options(), m_func_options("breakpoint command", false, 'F') { + m_func_options("breakpoint command", false, 'F') { SetHelpLong( R"( General information about entering breakpoint commands @@ -281,7 +281,7 @@ are no syntax errors may indicate that a function was declared but never called. class CommandOptions : public OptionGroup { public: - CommandOptions() : OptionGroup(), m_one_liner() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -479,8 +479,7 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed { CommandObjectBreakpointCommandDelete(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "delete", "Delete the set of commands from a breakpoint.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg; CommandArgumentData bp_id_arg; @@ -502,7 +501,7 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index defa21af7c170..4b4932dd367ba 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -38,8 +38,7 @@ class CommandObjectCommandsSource : public CommandObjectParsed { : CommandObjectParsed( interpreter, "command source", "Read and execute LLDB commands from the file .", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg; CommandArgumentData file_arg; @@ -76,8 +75,8 @@ class CommandObjectCommandsSource : public CommandObjectParsed { class CommandOptions : public Options { public: CommandOptions() - : Options(), m_stop_on_error(true), m_silent_run(false), - m_stop_on_continue(true), m_cmd_relative_to_command_file(false) {} + : m_stop_on_error(true), m_silent_run(false), m_stop_on_continue(true), + m_cmd_relative_to_command_file(false) {} ~CommandOptions() override = default; @@ -207,7 +206,7 @@ class CommandObjectCommandsAlias : public CommandObjectRaw { protected: class CommandOptions : public OptionGroup { public: - CommandOptions() : OptionGroup(), m_help(), m_long_help() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -258,8 +257,7 @@ class CommandObjectCommandsAlias : public CommandObjectRaw { CommandObjectCommandsAlias(CommandInterpreter &interpreter) : CommandObjectRaw( interpreter, "command alias", - "Define a custom command in terms of an existing command."), - m_option_group(), m_command_options() { + "Define a custom command in terms of an existing command.") { m_option_group.Append(&m_command_options); m_option_group.Finalize(); @@ -793,8 +791,7 @@ class CommandObjectCommandsAddRegex : public CommandObjectParsed, "regular expressions.", "command regex [s/// ...]"), IOHandlerDelegateMultiline("", - IOHandlerDelegate::Completion::LLDBCommand), - m_options() { + IOHandlerDelegate::Completion::LLDBCommand) { SetHelpLong( R"( )" @@ -1025,7 +1022,7 @@ a number follows 'f':" class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1239,8 +1236,7 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed { public: CommandObjectCommandsScriptImport(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "command script import", - "Import a scripting module in LLDB.", nullptr), - m_options() { + "Import a scripting module in LLDB.", nullptr) { CommandArgumentEntry arg1; CommandArgumentData cmd_arg; @@ -1271,7 +1267,7 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed { protected: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1395,7 +1391,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, "must be a path to a user-added container " "command, and the last element will be the new " "command name."), - IOHandlerDelegateMultiline("DONE"), m_options() { + IOHandlerDelegateMultiline("DONE") { CommandArgumentEntry arg1; CommandArgumentData cmd_arg; @@ -1426,8 +1422,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed, protected: class CommandOptions : public Options { public: - CommandOptions() - : Options(), m_class_name(), m_funct_name(), m_short_help() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1884,7 +1879,7 @@ class CommandObjectCommandsContainerAdd : public CommandObjectParsed { protected: class CommandOptions : public Options { public: - CommandOptions() : Options(), m_short_help(), m_long_help() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp index 02a16622c76b9..e3c40ed73cf63 100644 --- a/lldb/source/Commands/CommandObjectDisassemble.cpp +++ b/lldb/source/Commands/CommandObjectDisassemble.cpp @@ -30,8 +30,7 @@ using namespace lldb_private; #define LLDB_OPTIONS_disassemble #include "CommandOptions.inc" -CommandObjectDisassemble::CommandOptions::CommandOptions() - : Options(), func_name(), plugin_name(), flavor_string(), arch() { +CommandObjectDisassemble::CommandOptions::CommandOptions() { OptionParsingStarting(nullptr); } diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index 9d13ccab6d3ec..e1a289b219c3c 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -24,7 +24,7 @@ using namespace lldb; using namespace lldb_private; -CommandObjectExpression::CommandOptions::CommandOptions() : OptionGroup() {} +CommandObjectExpression::CommandOptions::CommandOptions() {} CommandObjectExpression::CommandOptions::~CommandOptions() = default; @@ -200,10 +200,10 @@ CommandObjectExpression::CommandObjectExpression( "", eCommandProcessMustBePaused | eCommandTryTargetAPILock), IOHandlerDelegate(IOHandlerDelegate::Completion::Expression), - m_option_group(), m_format_options(eFormatDefault), + m_format_options(eFormatDefault), m_repl_option(LLDB_OPT_SET_1, false, "repl", 'r', "Drop into REPL", false, true), - m_command_options(), m_expr_line_count(0), m_expr_lines() { + m_command_options(), m_expr_line_count(0) { SetHelpLong( R"( Single and multi-line expressions: diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp index 8dd1a79d38959..70881f2d00612 100644 --- a/lldb/source/Commands/CommandObjectFrame.cpp +++ b/lldb/source/Commands/CommandObjectFrame.cpp @@ -49,7 +49,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -110,8 +110,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed { nullptr, eCommandRequiresThread | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | - eCommandProcessMustBePaused), - m_options() { + eCommandProcessMustBePaused) { CommandArgumentEntry arg; CommandArgumentData index_arg; @@ -222,7 +221,7 @@ class CommandObjectFrameSelect : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -267,8 +266,7 @@ class CommandObjectFrameSelect : public CommandObjectParsed { nullptr, eCommandRequiresThread | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | - eCommandProcessMustBePaused), - m_options() { + eCommandProcessMustBePaused) { CommandArgumentEntry arg; CommandArgumentData index_arg; @@ -399,10 +397,9 @@ class CommandObjectFrameVariable : public CommandObjectParsed { eCommandRequiresFrame | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | eCommandProcessMustBePaused | eCommandRequiresProcess), - m_option_group(), m_option_variable( true), // Include the frame specific options by passing "true" - m_option_format(eFormatDefault), m_varobj_options() { + m_option_format(eFormatDefault) { SetHelpLong(R"( Children of aggregate variables can be specified such as 'var->child.x'. In 'frame variable', the operators -> and [] do not invoke operator overloads if @@ -729,7 +726,7 @@ class CommandObjectFrameRecognizerAdd : public CommandObjectParsed { private: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, @@ -798,8 +795,7 @@ class CommandObjectFrameRecognizerAdd : public CommandObjectParsed { public: CommandObjectFrameRecognizerAdd(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "frame recognizer add", - "Add a new frame recognizer.", nullptr), - m_options() { + "Add a new frame recognizer.", nullptr) { SetHelpLong(R"( Frame recognizers allow for retrieving information about special frames based on ABI, arguments or other special properties of that frame, even without source diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp index 8c24efaa08ee3..a2f682049ae03 100644 --- a/lldb/source/Commands/CommandObjectHelp.cpp +++ b/lldb/source/Commands/CommandObjectHelp.cpp @@ -46,8 +46,7 @@ CommandObjectHelp::CommandObjectHelp(CommandInterpreter &interpreter) "Show a list of all debugger " "commands, or give details " "about a specific command.", - "help []"), - m_options() { + "help []") { CommandArgumentEntry arg; CommandArgumentData command_arg; diff --git a/lldb/source/Commands/CommandObjectHelp.h b/lldb/source/Commands/CommandObjectHelp.h index c924dda7c6d41..71799ebb31217 100644 --- a/lldb/source/Commands/CommandObjectHelp.h +++ b/lldb/source/Commands/CommandObjectHelp.h @@ -32,7 +32,7 @@ class CommandObjectHelp : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectLog.cpp b/lldb/source/Commands/CommandObjectLog.cpp index 05ffba27e65fd..d432ab244805f 100644 --- a/lldb/source/Commands/CommandObjectLog.cpp +++ b/lldb/source/Commands/CommandObjectLog.cpp @@ -45,8 +45,7 @@ class CommandObjectLogEnable : public CommandObjectParsed { CommandObjectLogEnable(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "log enable", "Enable logging for a single log channel.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg1; CommandArgumentEntry arg2; CommandArgumentData channel_arg; @@ -76,7 +75,7 @@ class CommandObjectLogEnable : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options(), log_file() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index e59cd8028998a..0b5f39bc7a8f4 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -48,7 +48,7 @@ using namespace lldb_private; class OptionGroupReadMemory : public OptionGroup { public: OptionGroupReadMemory() - : m_num_per_line(1, 1), m_view_as_type(), m_offset(0, 0), + : m_num_per_line(1, 1), m_offset(0, 0), m_language_for_type(eLanguageTypeUnknown) {} ~OptionGroupReadMemory() override = default; @@ -287,12 +287,10 @@ class CommandObjectMemoryRead : public CommandObjectParsed { interpreter, "memory read", "Read from the memory of the current target process.", nullptr, eCommandRequiresTarget | eCommandProcessMustBePaused), - m_option_group(), m_format_options(eFormatBytesWithASCII, 1, 8), - m_memory_options(), m_outfile_options(), m_varobj_options(), + m_format_options(eFormatBytesWithASCII, 1, 8), + m_next_addr(LLDB_INVALID_ADDRESS), m_prev_byte_size(0), - m_prev_format_options(eFormatBytesWithASCII, 1, 8), - m_prev_memory_options(), m_prev_outfile_options(), - m_prev_varobj_options() { + m_prev_format_options(eFormatBytesWithASCII, 1, 8) { CommandArgumentEntry arg1; CommandArgumentEntry arg2; CommandArgumentData start_addr_arg; @@ -890,7 +888,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed { public: class OptionGroupFindMemory : public OptionGroup { public: - OptionGroupFindMemory() : OptionGroup(), m_count(1), m_offset(0) {} + OptionGroupFindMemory() : m_count(1), m_offset(0) {} ~OptionGroupFindMemory() override = default; @@ -944,8 +942,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed { : CommandObjectParsed( interpreter, "memory find", "Find a value in the memory of the current target process.", - nullptr, eCommandRequiresProcess | eCommandProcessMustBeLaunched), - m_option_group(), m_memory_options() { + nullptr, eCommandRequiresProcess | eCommandProcessMustBeLaunched) { CommandArgumentEntry arg1; CommandArgumentEntry arg2; CommandArgumentData addr_arg; @@ -1178,7 +1175,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { public: class OptionGroupWriteMemory : public OptionGroup { public: - OptionGroupWriteMemory() : OptionGroup() {} + OptionGroupWriteMemory() {} ~OptionGroupWriteMemory() override = default; @@ -1230,16 +1227,14 @@ class CommandObjectMemoryWrite : public CommandObjectParsed { interpreter, "memory write", "Write to the memory of the current target process.", nullptr, eCommandRequiresProcess | eCommandProcessMustBeLaunched), - m_option_group(), m_format_options( eFormatBytes, 1, UINT64_MAX, {std::make_tuple( eArgTypeFormat, "The format to use for each of the value to be written."), - std::make_tuple( - eArgTypeByteSize, - "The size in bytes to write from input file or each value.")}), - m_memory_options() { + std::make_tuple(eArgTypeByteSize, + "The size in bytes to write from input file or " + "each value.")}) { CommandArgumentEntry arg1; CommandArgumentEntry arg2; CommandArgumentData addr_arg; diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp index 666d5c21b8714..d108cf58b18c0 100644 --- a/lldb/source/Commands/CommandObjectMemoryTag.cpp +++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp @@ -138,7 +138,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { public: class OptionGroupTagWrite : public OptionGroup { public: - OptionGroupTagWrite() : OptionGroup(), m_end_addr(LLDB_INVALID_ADDRESS) {} + OptionGroupTagWrite() : m_end_addr(LLDB_INVALID_ADDRESS) {} ~OptionGroupTagWrite() override = default; @@ -177,8 +177,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed { "contains the given address.", nullptr, eCommandRequiresTarget | eCommandRequiresProcess | - eCommandProcessMustBePaused), - m_option_group(), m_tag_write_options() { + eCommandProcessMustBePaused) { // Address m_arguments.push_back( CommandArgumentEntry{CommandArgumentData(eArgTypeAddressOrExpression)}); diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp index 10dd878249116..4c18465c868a0 100644 --- a/lldb/source/Commands/CommandObjectPlatform.cpp +++ b/lldb/source/Commands/CommandObjectPlatform.cpp @@ -145,7 +145,6 @@ class CommandObjectPlatformSelect : public CommandObjectParsed { "Create a platform if needed and select it as the " "current platform.", "platform select ", 0), - m_option_group(), m_platform_options( false) // Don't include the "--platform" option by passing false { @@ -377,7 +376,6 @@ class CommandObjectPlatformSettings : public CommandObjectParsed { "Set settings for the current target's platform, " "or for a platform by name.", "platform settings", 0), - m_options(), m_option_working_dir(LLDB_OPT_SET_1, false, "working-dir", 'w', CommandCompletions::eRemoteDiskDirectoryCompletion, eArgTypePath, @@ -417,8 +415,7 @@ class CommandObjectPlatformMkDir : public CommandObjectParsed { CommandObjectPlatformMkDir(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "platform mkdir", "Make a new directory on the remote end.", nullptr, - 0), - m_options() {} + 0) {} ~CommandObjectPlatformMkDir() override = default; @@ -464,8 +461,7 @@ class CommandObjectPlatformFOpen : public CommandObjectParsed { public: CommandObjectPlatformFOpen(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "platform file open", - "Open a file on the remote end.", nullptr, 0), - m_options() {} + "Open a file on the remote end.", nullptr, 0) {} ~CommandObjectPlatformFOpen() override = default; @@ -566,8 +562,7 @@ class CommandObjectPlatformFRead : public CommandObjectParsed { CommandObjectPlatformFRead(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "platform file read", "Read data from a file on the remote end.", nullptr, - 0), - m_options() {} + 0) {} ~CommandObjectPlatformFRead() override = default; @@ -605,7 +600,7 @@ class CommandObjectPlatformFRead : public CommandObjectParsed { protected: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -660,8 +655,7 @@ class CommandObjectPlatformFWrite : public CommandObjectParsed { CommandObjectPlatformFWrite(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "platform file write", "Write data to a file on the remote end.", nullptr, - 0), - m_options() {} + 0) {} ~CommandObjectPlatformFWrite() override = default; @@ -698,7 +692,7 @@ class CommandObjectPlatformFWrite : public CommandObjectParsed { protected: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1124,8 +1118,7 @@ class CommandObjectPlatformProcessLaunch : public CommandObjectParsed { : CommandObjectParsed(interpreter, "platform process launch", "Launch a new process on a remote platform.", "platform process launch program", - eCommandRequiresTarget | eCommandTryTargetAPILock), - m_options(), m_all_options() { + eCommandRequiresTarget | eCommandTryTargetAPILock) { m_all_options.Append(&m_options); m_all_options.Finalize(); } @@ -1217,8 +1210,7 @@ class CommandObjectPlatformProcessList : public CommandObjectParsed { : CommandObjectParsed(interpreter, "platform process list", "List processes on a remote platform by name, pid, " "or many other matching attributes.", - "platform process list", 0), - m_options() {} + "platform process list", 0) {} ~CommandObjectPlatformProcessList() override = default; @@ -1324,7 +1316,7 @@ class CommandObjectPlatformProcessList : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options(), match_info() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1560,7 +1552,7 @@ class CommandObjectPlatformProcessAttach : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -1622,8 +1614,7 @@ class CommandObjectPlatformProcessAttach : public CommandObjectParsed { CommandObjectPlatformProcessAttach(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "platform process attach", "Attach to a process.", - "platform process attach "), - m_options() {} + "platform process attach ") {} ~CommandObjectPlatformProcessAttach() override = default; @@ -1689,7 +1680,7 @@ class CommandObjectPlatformShell : public CommandObjectRaw { public: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1747,8 +1738,7 @@ class CommandObjectPlatformShell : public CommandObjectRaw { CommandObjectPlatformShell(CommandInterpreter &interpreter) : CommandObjectRaw(interpreter, "platform shell", "Run a shell command on the current platform.", - "platform shell ", 0), - m_options() {} + "platform shell ", 0) {} ~CommandObjectPlatformShell() override = default; diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 5fd1718e84840..c73f0df0aaf25 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -110,9 +110,8 @@ class CommandObjectProcessLaunch : public CommandObjectProcessLaunchOrAttach { interpreter, "process launch", "Launch the executable in the debugger.", nullptr, eCommandRequiresTarget, "restart"), - m_options(), - m_class_options("scripted process", true, 'C', 'k', 'v', 0), - m_all_options() { + + m_class_options("scripted process", true, 'C', 'k', 'v', 0) { m_all_options.Append(&m_options); m_all_options.Append(&m_class_options, LLDB_OPT_SET_1 | LLDB_OPT_SET_2, LLDB_OPT_SET_ALL); @@ -300,7 +299,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -364,8 +363,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach { CommandObjectProcessAttach(CommandInterpreter &interpreter) : CommandObjectProcessLaunchOrAttach( interpreter, "process attach", "Attach to a process.", - "process attach ", 0, "attach"), - m_options() {} + "process attach ", 0, "attach") {} ~CommandObjectProcessAttach() override = default; @@ -502,15 +500,14 @@ class CommandObjectProcessContinue : public CommandObjectParsed { "Continue execution of all threads in the current process.", "process continue", eCommandRequiresProcess | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_options() {} + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {} ~CommandObjectProcessContinue() override = default; protected: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -651,7 +648,7 @@ class CommandObjectProcessDetach : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -698,8 +695,7 @@ class CommandObjectProcessDetach : public CommandObjectParsed { "Detach from the current target process.", "process detach", eCommandRequiresProcess | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched), - m_options() {} + eCommandProcessMustBeLaunched) {} ~CommandObjectProcessDetach() override = default; @@ -741,7 +737,7 @@ class CommandObjectProcessConnect : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -781,8 +777,7 @@ class CommandObjectProcessConnect : public CommandObjectParsed { CommandObjectProcessConnect(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "process connect", "Connect to a remote debug service.", - "process connect ", 0), - m_options() {} + "process connect ", 0) {} ~CommandObjectProcessConnect() override = default; @@ -863,7 +858,7 @@ class CommandObjectProcessLoad : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -907,8 +902,7 @@ class CommandObjectProcessLoad : public CommandObjectParsed { "process load [ ...]", eCommandRequiresProcess | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | - eCommandProcessMustBePaused), - m_options() {} + eCommandProcessMustBePaused) {} ~CommandObjectProcessLoad() override = default; @@ -1220,8 +1214,7 @@ class CommandObjectProcessSaveCore : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() - : Options(), m_requested_save_core_style(eSaveCoreUnspecified) {} + CommandOptions() : m_requested_save_core_style(eSaveCoreUnspecified) {} ~CommandOptions() override = default; @@ -1316,8 +1309,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed { interpreter, "process status", "Show status and stop location for the current target process.", "process status", - eCommandRequiresProcess | eCommandTryTargetAPILock), - m_options() {} + eCommandRequiresProcess | eCommandTryTargetAPILock) {} ~CommandObjectProcessStatus() override = default; @@ -1325,7 +1317,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1430,7 +1422,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -1477,8 +1469,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed { "Manage LLDB handling of OS signals for the " "current target process. Defaults to showing " "current policy.", - nullptr, eCommandRequiresTarget), - m_options() { + nullptr, eCommandRequiresTarget) { SetHelpLong("\nIf no signals are specified, update them all. If no update " "option is specified, list the current values."); CommandArgumentEntry arg; @@ -1687,7 +1678,7 @@ class CommandObjectProcessTraceSave : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, ExecutionContext *execution_context) override { diff --git a/lldb/source/Commands/CommandObjectRegexCommand.cpp b/lldb/source/Commands/CommandObjectRegexCommand.cpp index 46295421834a8..7ddc5c0c7e083 100644 --- a/lldb/source/Commands/CommandObjectRegexCommand.cpp +++ b/lldb/source/Commands/CommandObjectRegexCommand.cpp @@ -20,7 +20,7 @@ CommandObjectRegexCommand::CommandObjectRegexCommand( bool is_removable) : CommandObjectRaw(interpreter, name, help, syntax), m_max_matches(max_matches), m_completion_type_mask(completion_type_mask), - m_entries(), m_is_removable(is_removable) {} + m_is_removable(is_removable) {} // Destructor CommandObjectRegexCommand::~CommandObjectRegexCommand() = default; diff --git a/lldb/source/Commands/CommandObjectRegister.cpp b/lldb/source/Commands/CommandObjectRegister.cpp index 6fd71c90c327f..933c243dedd51 100644 --- a/lldb/source/Commands/CommandObjectRegister.cpp +++ b/lldb/source/Commands/CommandObjectRegister.cpp @@ -43,8 +43,7 @@ class CommandObjectRegisterRead : public CommandObjectParsed { nullptr, eCommandRequiresFrame | eCommandRequiresRegContext | eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_option_group(), m_format_options(eFormatDefault), - m_command_options() { + m_format_options(eFormatDefault) { CommandArgumentEntry arg; CommandArgumentData register_arg; @@ -232,8 +231,7 @@ class CommandObjectRegisterRead : public CommandObjectParsed { class CommandOptions : public OptionGroup { public: CommandOptions() - : OptionGroup(), - set_indexes(OptionValue::ConvertTypeToMask(OptionValue::eTypeUInt64)), + : set_indexes(OptionValue::ConvertTypeToMask(OptionValue::eTypeUInt64)), dump_all_sets(false, false), // Initial and default values are false alternate_name(false, false) {} diff --git a/lldb/source/Commands/CommandObjectReproducer.cpp b/lldb/source/Commands/CommandObjectReproducer.cpp index 4db3e070df3c1..7e0ea65e148ee 100644 --- a/lldb/source/Commands/CommandObjectReproducer.cpp +++ b/lldb/source/Commands/CommandObjectReproducer.cpp @@ -227,7 +227,7 @@ class CommandObjectReproducerXCrash : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -355,7 +355,7 @@ class CommandObjectReproducerDump : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options(), file() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -602,7 +602,7 @@ class CommandObjectReproducerVerify : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options(), file() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectScript.h b/lldb/source/Commands/CommandObjectScript.h index b9fee7124818a..97fc05421bd06 100644 --- a/lldb/source/Commands/CommandObjectScript.h +++ b/lldb/source/Commands/CommandObjectScript.h @@ -21,7 +21,7 @@ class CommandObjectScript : public CommandObjectRaw { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, ExecutionContext *execution_context) override; diff --git a/lldb/source/Commands/CommandObjectSession.cpp b/lldb/source/Commands/CommandObjectSession.cpp index c2cdfa29a3f64..c11839a48de0a 100644 --- a/lldb/source/Commands/CommandObjectSession.cpp +++ b/lldb/source/Commands/CommandObjectSession.cpp @@ -62,8 +62,7 @@ class CommandObjectSessionHistory : public CommandObjectParsed { "using \"!\". \"!-\" will re-run " "the command that is commands from the end" " of the list (counting the current command).", - nullptr), - m_options() {} + nullptr) {} ~CommandObjectSessionHistory() override = default; @@ -73,8 +72,7 @@ class CommandObjectSessionHistory : public CommandObjectParsed { class CommandOptions : public Options { public: CommandOptions() - : Options(), m_start_idx(0), m_stop_idx(0), m_count(0), m_clear(false) { - } + : m_start_idx(0), m_stop_idx(0), m_count(0), m_clear(false) {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp index 13ff27c78deaa..391e728d9d8a4 100644 --- a/lldb/source/Commands/CommandObjectSettings.cpp +++ b/lldb/source/Commands/CommandObjectSettings.cpp @@ -27,8 +27,7 @@ class CommandObjectSettingsSet : public CommandObjectRaw { public: CommandObjectSettingsSet(CommandInterpreter &interpreter) : CommandObjectRaw(interpreter, "settings set", - "Set the value of the specified debugger setting."), - m_options() { + "Set the value of the specified debugger setting.") { CommandArgumentEntry arg1; CommandArgumentEntry arg2; CommandArgumentData var_name_arg; @@ -87,7 +86,7 @@ insert-before or insert-after."); class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -304,8 +303,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed { "Write matching debugger settings and their " "current values to a file that can be read in with " "\"settings read\". Defaults to writing all settings.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry arg1; CommandArgumentData var_name_arg; @@ -327,7 +325,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -417,8 +415,7 @@ class CommandObjectSettingsRead : public CommandObjectParsed { : CommandObjectParsed( interpreter, "settings read", "Read settings previously saved to a file with \"settings write\".", - nullptr), - m_options() {} + nullptr) {} ~CommandObjectSettingsRead() override = default; @@ -426,7 +423,7 @@ class CommandObjectSettingsRead : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp index fb33f41b8ef96..6c6706f4a98b0 100644 --- a/lldb/source/Commands/CommandObjectSource.cpp +++ b/lldb/source/Commands/CommandObjectSource.cpp @@ -36,7 +36,7 @@ using namespace lldb_private; class CommandObjectSourceInfo : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -118,8 +118,7 @@ class CommandObjectSourceInfo : public CommandObjectParsed { "Display source line information for the current target " "process. Defaults to instruction pointer in current stack " "frame.", - nullptr, eCommandRequiresTarget), - m_options() {} + nullptr, eCommandRequiresTarget) {} ~CommandObjectSourceInfo() override = default; @@ -624,7 +623,7 @@ class CommandObjectSourceInfo : public CommandObjectParsed { class CommandObjectSourceList : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -723,8 +722,7 @@ class CommandObjectSourceList : public CommandObjectParsed { : CommandObjectParsed(interpreter, "source list", "Display source code for the current target " "process as specified by options.", - nullptr, eCommandRequiresTarget), - m_options() {} + nullptr, eCommandRequiresTarget) {} ~CommandObjectSourceList() override = default; @@ -757,7 +755,7 @@ class CommandObjectSourceList : public CommandObjectParsed { SourceInfo(ConstString name, const LineEntry &line_entry) : function(name), line_entry(line_entry) {} - SourceInfo() : function(), line_entry() {} + SourceInfo() {} bool IsValid() const { return (bool)function && line_entry.IsValid(); } diff --git a/lldb/source/Commands/CommandObjectStats.cpp b/lldb/source/Commands/CommandObjectStats.cpp index f32d559ca039f..63aa36b39f4d3 100644 --- a/lldb/source/Commands/CommandObjectStats.cpp +++ b/lldb/source/Commands/CommandObjectStats.cpp @@ -65,7 +65,7 @@ class CommandObjectStatsDisable : public CommandObjectParsed { class CommandObjectStatsDump : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, ExecutionContext *execution_context) override { diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index bd19ac513d017..157065bde10e6 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -216,7 +216,6 @@ class CommandObjectTargetCreate : public CommandObjectParsed { interpreter, "target create", "Create a target using the argument as the main executable.", nullptr), - m_option_group(), m_arch_option(), m_platform_options(true), // Include the --platform option. m_core_file(LLDB_OPT_SET_1, false, "core", 'c', 0, eArgTypeFilename, "Fullpath to a core file to use for this target."), @@ -227,8 +226,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed { "are not in the executable."), m_remote_file( LLDB_OPT_SET_1, false, "remote-file", 'r', 0, eArgTypeFilename, - "Fullpath to the file on the remote host if debugging remotely."), - m_add_dependents() { + "Fullpath to the file on the remote host if debugging remotely.") { CommandArgumentEntry arg; CommandArgumentData file_arg; @@ -534,8 +532,8 @@ class CommandObjectTargetDelete : public CommandObjectParsed { : CommandObjectParsed(interpreter, "target delete", "Delete one or more targets by target index.", nullptr), - m_option_group(), m_all_option(LLDB_OPT_SET_1, false, "all", 'a', - "Delete all targets.", false, true), + m_all_option(LLDB_OPT_SET_1, false, "all", 'a', "Delete all targets.", + false, true), m_cleanup_option( LLDB_OPT_SET_1, false, "clean", 'c', "Perform extra cleanup to minimize memory consumption after " @@ -678,7 +676,6 @@ class CommandObjectTargetVariable : public CommandObjectParsed { "Read global variables for the current target, " "before or while running a process.", nullptr, eCommandRequiresTarget), - m_option_group(), m_option_variable(false), // Don't include frame options m_option_format(eFormatDefault), m_option_compile_units(LLDB_OPT_SET_1, false, "file", SHORT_OPTION_FILE, @@ -691,8 +688,7 @@ class CommandObjectTargetVariable : public CommandObjectParsed { eArgTypeFilename, "A basename or fullpath to a shared library to use in the search " "for global " - "variables. This option can be specified multiple times."), - m_varobj_options() { + "variables. This option can be specified multiple times.") { CommandArgumentEntry arg; CommandArgumentData var_name_arg; @@ -1928,8 +1924,7 @@ class CommandObjectTargetModulesDumpSymtab : CommandObjectTargetModulesModuleAutoComplete( interpreter, "target modules dump symtab", "Dump the symbol table from one or more target modules.", nullptr, - eCommandRequiresTarget), - m_options() {} + eCommandRequiresTarget) {} ~CommandObjectTargetModulesDumpSymtab() override = default; @@ -1937,7 +1932,7 @@ class CommandObjectTargetModulesDumpSymtab class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -2354,7 +2349,7 @@ class CommandObjectTargetModulesDumpLineTable class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, ExecutionContext *execution_context) override { @@ -2423,11 +2418,11 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed { "Add a new module to the current target's modules.", "target modules add []", eCommandRequiresTarget), - m_option_group(), m_symbol_file(LLDB_OPT_SET_1, false, "symfile", 's', - 0, eArgTypeFilename, - "Fullpath to a stand alone debug " - "symbols file for when debug symbols " - "are not in the executable.") { + m_symbol_file(LLDB_OPT_SET_1, false, "symfile", 's', 0, + eArgTypeFilename, + "Fullpath to a stand alone debug " + "symbols file for when debug symbols " + "are not in the executable.") { m_option_group.Append(&m_uuid_option_group, LLDB_OPT_SET_ALL, LLDB_OPT_SET_1); m_option_group.Append(&m_symbol_file, LLDB_OPT_SET_ALL, LLDB_OPT_SET_1); @@ -2575,7 +2570,6 @@ class CommandObjectTargetModulesLoad "target modules load [--file --uuid ] " "
[
....]", eCommandRequiresTarget), - m_option_group(), m_file_option(LLDB_OPT_SET_1, false, "file", 'f', 0, eArgTypeName, "Fullpath or basename for module to load.", ""), m_load_option(LLDB_OPT_SET_1, false, "load", 'l', @@ -2843,7 +2837,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options(), m_format_array() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -2886,8 +2880,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed { : CommandObjectParsed( interpreter, "target modules list", "List current executable and dependent shared library images.", - "target modules list []"), - m_options() {} + "target modules list []") {} ~CommandObjectTargetModulesList() override = default; @@ -3186,7 +3179,7 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options(), m_str() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -3243,8 +3236,7 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed { interpreter, "target modules show-unwind", "Show synthesized unwind instructions for a function.", nullptr, eCommandRequiresTarget | eCommandRequiresProcess | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_options() {} + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {} ~CommandObjectTargetModulesShowUnwind() override = default; @@ -3533,7 +3525,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -3648,8 +3640,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed { : CommandObjectParsed(interpreter, "target modules lookup", "Look up information within executable and " "dependent shared library images.", - nullptr, eCommandRequiresTarget), - m_options() { + nullptr, eCommandRequiresTarget) { CommandArgumentEntry arg; CommandArgumentData file_arg; @@ -3955,7 +3946,6 @@ class CommandObjectTargetSymbolsAdd : public CommandObjectParsed { "to specify a module.", "target symbols add []", eCommandRequiresTarget), - m_option_group(), m_file_option( LLDB_OPT_SET_1, false, "shlib", 's', CommandCompletions::eModuleCompletion, eArgTypeShlibName, @@ -4442,7 +4432,7 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, public: class CommandOptions : public OptionGroup { public: - CommandOptions() : OptionGroup(), m_line_end(UINT_MAX), m_one_liner() {} + CommandOptions() : m_line_end(UINT_MAX) {} ~CommandOptions() override = default; @@ -4599,7 +4589,7 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed, "target stop-hook add"), IOHandlerDelegateMultiline("DONE", IOHandlerDelegate::Completion::LLDBCommand), - m_options(), m_python_class_options("scripted stop-hook", true, 'P') { + m_python_class_options("scripted stop-hook", true, 'P') { SetHelpLong( R"( Command Based stop-hooks: diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index 71e67f6ba2087..137aaa81c61a0 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -47,7 +47,7 @@ class CommandObjectThreadBacktrace : public CommandObjectIterateOverThreads { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -119,8 +119,7 @@ class CommandObjectThreadBacktrace : public CommandObjectIterateOverThreads { nullptr, eCommandRequiresProcess | eCommandRequiresThread | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | - eCommandProcessMustBePaused), - m_options() {} + eCommandProcessMustBePaused) {} ~CommandObjectThreadBacktrace() override = default; @@ -203,7 +202,7 @@ static constexpr OptionEnumValues TriRunningModes() { class ThreadStepScopeOptionGroup : public OptionGroup { public: - ThreadStepScopeOptionGroup() : OptionGroup() { + ThreadStepScopeOptionGroup() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -327,7 +326,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_step_type(step_type), m_step_scope(step_scope), m_options(), + m_step_type(step_type), m_step_scope(step_scope), m_class_options("scripted step") { CommandArgumentEntry arg; CommandArgumentData thread_id_arg; @@ -780,7 +779,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { uint32_t m_thread_idx = LLDB_INVALID_THREAD_ID; uint32_t m_frame_idx = LLDB_INVALID_FRAME_ID; - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -861,8 +860,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { " is provided, stepping will stop when the first one is hit.", nullptr, eCommandRequiresThread | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_options() { + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) { CommandArgumentEntry arg; CommandArgumentData line_num_arg; @@ -1186,7 +1184,7 @@ class CommandObjectThreadInfo : public CommandObjectIterateOverThreads { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -1231,8 +1229,7 @@ class CommandObjectThreadInfo : public CommandObjectIterateOverThreads { "current thread.", "thread info", eCommandRequiresProcess | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_options() { + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) { m_add_return = false; } @@ -1331,7 +1328,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -1386,8 +1383,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw { "thread return", eCommandRequiresFrame | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | - eCommandProcessMustBePaused), - m_options() { + eCommandProcessMustBePaused) { CommandArgumentEntry arg; CommandArgumentData expression_arg; @@ -1496,7 +1492,7 @@ class CommandObjectThreadJump : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -1556,8 +1552,7 @@ class CommandObjectThreadJump : public CommandObjectParsed { interpreter, "thread jump", "Sets the program counter to a new address.", "thread jump", eCommandRequiresFrame | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_options() {} + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {} ~CommandObjectThreadJump() override = default; @@ -1633,7 +1628,7 @@ class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { + CommandOptions() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); @@ -1695,8 +1690,7 @@ class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads { nullptr, eCommandRequiresProcess | eCommandRequiresThread | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | - eCommandProcessMustBePaused), - m_options() {} + eCommandProcessMustBePaused) {} ~CommandObjectThreadPlanList() override = default; @@ -2004,7 +1998,7 @@ class CommandObjectTraceDumpInstructions public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -2085,7 +2079,7 @@ class CommandObjectTraceDumpInstructions eCommandRequiresProcess | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | eCommandProcessMustBePaused | eCommandProcessMustBeTraced), - m_options(), m_create_repeat_command_just_invoked(false) {} + m_create_repeat_command_just_invoked(false) {} ~CommandObjectTraceDumpInstructions() override = default; @@ -2165,7 +2159,7 @@ class CommandObjectTraceDumpInfo : public CommandObjectIterateOverThreads { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -2213,8 +2207,7 @@ class CommandObjectTraceDumpInfo : public CommandObjectIterateOverThreads { nullptr, eCommandRequiresProcess | eCommandTryTargetAPILock | eCommandProcessMustBeLaunched | eCommandProcessMustBePaused | - eCommandProcessMustBeTraced), - m_options() {} + eCommandProcessMustBeTraced) {} ~CommandObjectTraceDumpInfo() override = default; diff --git a/lldb/source/Commands/CommandObjectTrace.cpp b/lldb/source/Commands/CommandObjectTrace.cpp index 62ee48ca05469..53f1b0a32e607 100644 --- a/lldb/source/Commands/CommandObjectTrace.cpp +++ b/lldb/source/Commands/CommandObjectTrace.cpp @@ -40,7 +40,7 @@ class CommandObjectTraceLoad : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -74,8 +74,7 @@ class CommandObjectTraceLoad : public CommandObjectParsed { CommandObjectTraceLoad(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "trace load", "Load a processor trace session from a JSON file.", - "trace load"), - m_options() {} + "trace load") {} ~CommandObjectTraceLoad() override = default; @@ -139,7 +138,7 @@ class CommandObjectTraceDump : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -173,8 +172,7 @@ class CommandObjectTraceDump : public CommandObjectParsed { CommandObjectTraceDump(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "trace dump", "Dump the loaded processor trace data.", - "trace dump"), - m_options() {} + "trace dump") {} ~CommandObjectTraceDump() override = default; @@ -205,7 +203,7 @@ class CommandObjectTraceSchema : public CommandObjectParsed { public: class CommandOptions : public Options { public: - CommandOptions() : Options() { OptionParsingStarting(nullptr); } + CommandOptions() { OptionParsingStarting(nullptr); } ~CommandOptions() override = default; @@ -240,8 +238,7 @@ class CommandObjectTraceSchema : public CommandObjectParsed { : CommandObjectParsed(interpreter, "trace schema", "Show the schema of the given trace plugin.", "trace schema . Use the plug-in name " - "\"all\" to see all schemas.\n"), - m_options() {} + "\"all\" to see all schemas.\n") {} ~CommandObjectTraceSchema() override = default; diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp index 0562b6be3cb5e..f9e1d0f91fb76 100644 --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -69,7 +69,7 @@ class SynthAddOptions { SynthAddOptions(bool sptr, bool sref, bool casc, bool regx, std::string catg) : m_skip_pointers(sptr), m_skip_references(sref), m_cascade(casc), - m_regex(regx), m_target_types(), m_category(catg) {} + m_regex(regx), m_category(catg) {} typedef std::shared_ptr SharedPointer; }; @@ -103,7 +103,7 @@ class CommandObjectTypeSummaryAdd : public CommandObjectParsed, private: class CommandOptions : public Options { public: - CommandOptions(CommandInterpreter &interpreter) : Options() {} + CommandOptions(CommandInterpreter &interpreter) {} ~CommandOptions() override = default; @@ -286,7 +286,7 @@ class CommandObjectTypeSynthAdd : public CommandObjectParsed, private: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -498,7 +498,7 @@ class CommandObjectTypeFormatAdd : public CommandObjectParsed { private: class CommandOptions : public OptionGroup { public: - CommandOptions() : OptionGroup() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -571,8 +571,7 @@ class CommandObjectTypeFormatAdd : public CommandObjectParsed { CommandObjectTypeFormatAdd(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "type format add", "Add a new formatting style for a type.", nullptr), - m_option_group(), m_format_options(eFormatInvalid), - m_command_options() { + m_format_options(eFormatInvalid) { CommandArgumentEntry type_arg; CommandArgumentData type_style_arg; @@ -708,7 +707,7 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed { protected: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -760,7 +759,7 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed { CommandObjectTypeFormatterDelete(CommandInterpreter &interpreter, uint32_t formatter_kind_mask, const char *name, const char *help) - : CommandObjectParsed(interpreter, name, help, nullptr), m_options(), + : CommandObjectParsed(interpreter, name, help, nullptr), m_formatter_kind_mask(formatter_kind_mask) { CommandArgumentEntry type_arg; CommandArgumentData type_style_arg; @@ -873,7 +872,7 @@ class CommandObjectTypeFormatterClear : public CommandObjectParsed { private: class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -914,7 +913,7 @@ class CommandObjectTypeFormatterClear : public CommandObjectParsed { CommandObjectTypeFormatterClear(CommandInterpreter &interpreter, uint32_t formatter_kind_mask, const char *name, const char *help) - : CommandObjectParsed(interpreter, name, help, nullptr), m_options(), + : CommandObjectParsed(interpreter, name, help, nullptr), m_formatter_kind_mask(formatter_kind_mask) {} ~CommandObjectTypeFormatterClear() override = default; @@ -1713,7 +1712,7 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed { class CommandOptions : public Options { public: CommandOptions() - : Options(), m_define_enabled(false, false), + : m_define_enabled(false, false), m_cate_language(eLanguageTypeUnknown, eLanguageTypeUnknown) {} ~CommandOptions() override = default; @@ -1760,8 +1759,7 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed { CommandObjectTypeCategoryDefine(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "type category define", "Define a new category as a source of formatters.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry type_arg; CommandArgumentData type_style_arg; @@ -1817,7 +1815,7 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed { class CommandObjectTypeCategoryEnable : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -1863,8 +1861,7 @@ class CommandObjectTypeCategoryEnable : public CommandObjectParsed { CommandObjectTypeCategoryEnable(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "type category enable", "Enable a category as a source of formatters.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry type_arg; CommandArgumentData type_style_arg; @@ -1995,7 +1992,7 @@ class CommandObjectTypeCategoryDelete : public CommandObjectParsed { class CommandObjectTypeCategoryDisable : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -2041,8 +2038,7 @@ class CommandObjectTypeCategoryDisable : public CommandObjectParsed { CommandObjectTypeCategoryDisable(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "type category disable", "Disable a category as a source of formatters.", - nullptr), - m_options() { + nullptr) { CommandArgumentEntry type_arg; CommandArgumentData type_style_arg; @@ -2409,7 +2405,7 @@ class CommandObjectTypeFilterAdd : public CommandObjectParsed { typedef std::vector option_vector; public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -2528,8 +2524,7 @@ class CommandObjectTypeFilterAdd : public CommandObjectParsed { public: CommandObjectTypeFilterAdd(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "type filter add", - "Add a new filter for a type.", nullptr), - m_options() { + "Add a new filter for a type.", nullptr) { CommandArgumentEntry type_arg; CommandArgumentData type_style_arg; @@ -2666,7 +2661,7 @@ class CommandObjectTypeLookup : public CommandObjectRaw { class CommandOptions : public OptionGroup { public: - CommandOptions() : OptionGroup() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -2716,8 +2711,7 @@ class CommandObjectTypeLookup : public CommandObjectRaw { "Lookup types and declarations in the current target, " "following language-specific naming conventions.", "type lookup ", - eCommandRequiresTarget), - m_option_group(), m_command_options() { + eCommandRequiresTarget) { m_option_group.Append(&m_command_options); m_option_group.Finalize(); } diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp index 9fbf036a19d1a..9701553bdda9f 100644 --- a/lldb/source/Commands/CommandObjectWatchpoint.cpp +++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp @@ -149,8 +149,7 @@ class CommandObjectWatchpointList : public CommandObjectParsed { : CommandObjectParsed( interpreter, "watchpoint list", "List all watchpoints at configurable levels of detail.", nullptr, - eCommandRequiresTarget), - m_options() { + eCommandRequiresTarget) { CommandArgumentEntry arg; CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID, eArgTypeWatchpointIDRange); @@ -165,7 +164,7 @@ class CommandObjectWatchpointList : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -432,8 +431,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { : CommandObjectParsed(interpreter, "watchpoint delete", "Delete the specified watchpoint(s). If no " "watchpoints are specified, delete them all.", - nullptr, eCommandRequiresTarget), - m_options() { + nullptr, eCommandRequiresTarget) { CommandArgumentEntry arg; CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID, eArgTypeWatchpointIDRange); @@ -456,7 +454,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -556,8 +554,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { : CommandObjectParsed(interpreter, "watchpoint ignore", "Set ignore count on the specified watchpoint(s). " "If no watchpoints are specified, set them all.", - nullptr, eCommandRequiresTarget), - m_options() { + nullptr, eCommandRequiresTarget) { CommandArgumentEntry arg; CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID, eArgTypeWatchpointIDRange); @@ -580,7 +577,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -682,8 +679,7 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { "If no watchpoint is specified, act on the last created " "watchpoint. " "Passing an empty argument clears the modification.", - nullptr, eCommandRequiresTarget), - m_options() { + nullptr, eCommandRequiresTarget) { CommandArgumentEntry arg; CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID, eArgTypeWatchpointIDRange); @@ -706,7 +702,7 @@ class CommandObjectWatchpointModify : public CommandObjectParsed { class CommandOptions : public Options { public: - CommandOptions() : Options(), m_condition() {} + CommandOptions() {} ~CommandOptions() override = default; @@ -813,8 +809,7 @@ class CommandObjectWatchpointSetVariable : public CommandObjectParsed { "to free up resources.", nullptr, eCommandRequiresFrame | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_option_group(), m_option_watchpoint() { + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) { SetHelpLong( R"( Examples: @@ -1006,8 +1001,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw { "to free up resources.", "", eCommandRequiresFrame | eCommandTryTargetAPILock | - eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), - m_option_group(), m_option_watchpoint() { + eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) { SetHelpLong( R"( Examples: diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index 1f4e953663857..a429e568c61ae 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -66,8 +66,7 @@ class CommandObjectWatchpointCommandAdd : public CommandObjectParsed, "commands previously added to it.", nullptr, eCommandRequiresTarget), IOHandlerDelegateMultiline("DONE", - IOHandlerDelegate::Completion::LLDBCommand), - m_options() { + IOHandlerDelegate::Completion::LLDBCommand) { SetHelpLong( R"( General information about entering watchpoint commands @@ -314,7 +313,7 @@ are no syntax errors may indicate that a function was declared but never called. class CommandOptions : public Options { public: - CommandOptions() : Options(), m_one_liner(), m_function_name() {} + CommandOptions() {} ~CommandOptions() override = default; diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.h b/lldb/source/Commands/CommandOptionsProcessLaunch.h index d18a23245080d..7ab7fabe10503 100644 --- a/lldb/source/Commands/CommandOptionsProcessLaunch.h +++ b/lldb/source/Commands/CommandOptionsProcessLaunch.h @@ -18,7 +18,7 @@ namespace lldb_private { class CommandOptionsProcessLaunch : public lldb_private::OptionGroup { public: - CommandOptionsProcessLaunch() : lldb_private::OptionGroup() { + CommandOptionsProcessLaunch() { // Keep default values of all options in one place: OptionParsingStarting // () OptionParsingStarting(nullptr); From f8ddcb4131256fcb1777cba617d3c5277024f9ec Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 11:07:16 -0800 Subject: [PATCH 301/946] [Object] Remove a redundant return statement (NFC) Identified with readability-redundant-control-flow. --- llvm/lib/Object/Archive.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp index 91d8a0bf69aec..9a4ef055faa4d 100644 --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -1174,5 +1174,4 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) } setFirstRegular(*I); Err = Error::success(); - return; } From ad36f37ce2b4ab6b1aadf318456fb2b8bb141d71 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Mon, 24 Jan 2022 01:08:54 +0530 Subject: [PATCH 302/946] [MLIR][Presburger] Clean PresburgerSet identifier interface to match IntegerPolyhedron's interface This patch changes names of identifiers and their corresponding getters in PresburgerSet to match those of IntegerPolyhedron. Reviewed By: arjunp Differential Revision: https://reviews.llvm.org/D117998 --- .../mlir/Analysis/Presburger/PresburgerSet.h | 18 +++++----- .../lib/Analysis/Presburger/PresburgerSet.cpp | 35 ++++++++++--------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h b/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h index 5963e60071cca..e6be939027596 100644 --- a/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h +++ b/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h @@ -36,10 +36,10 @@ class PresburgerSet { unsigned getNumPolys() const; /// Return the number of real dimensions. - unsigned getNumDims() const; + unsigned getNumDimIds() const; /// Return the number of symbolic dimensions. - unsigned getNumSyms() const; + unsigned getNumSymbolIds() const; /// Return a reference to the list of IntegerPolyhedrons. ArrayRef getAllIntegerPolyhedron() const; @@ -82,9 +82,11 @@ class PresburgerSet { bool isEqual(const PresburgerSet &set) const; /// Return a universe set of the specified type that contains all points. - static PresburgerSet getUniverse(unsigned nDim = 0, unsigned nSym = 0); + static PresburgerSet getUniverse(unsigned numDims = 0, + unsigned numSymbols = 0); /// Return an empty set of the specified type that contains no points. - static PresburgerSet getEmptySet(unsigned nDim = 0, unsigned nSym = 0); + static PresburgerSet getEmptySet(unsigned numDims = 0, + unsigned numSymbols = 0); /// Return true if all the sets in the union are known to be integer empty /// false otherwise. @@ -102,19 +104,19 @@ class PresburgerSet { private: /// Construct an empty PresburgerSet. - PresburgerSet(unsigned nDim = 0, unsigned nSym = 0) - : nDim(nDim), nSym(nSym) {} + PresburgerSet(unsigned numDims = 0, unsigned numSymbols = 0) + : numDims(numDims), numSymbols(numSymbols) {} /// Return the set difference poly \ set. static PresburgerSet getSetDifference(IntegerPolyhedron poly, const PresburgerSet &set); /// Number of identifiers corresponding to real dimensions. - unsigned nDim; + unsigned numDims; /// Number of symbolic dimensions, unknown but constant for analysis, as in /// IntegerPolyhedron. - unsigned nSym; + unsigned numSymbols; /// The list of integerPolyhedrons that this set is the union of. SmallVector integerPolyhedrons; diff --git a/mlir/lib/Analysis/Presburger/PresburgerSet.cpp b/mlir/lib/Analysis/Presburger/PresburgerSet.cpp index 829d786c8883d..08d15bc88c74e 100644 --- a/mlir/lib/Analysis/Presburger/PresburgerSet.cpp +++ b/mlir/lib/Analysis/Presburger/PresburgerSet.cpp @@ -16,7 +16,7 @@ using namespace mlir; using namespace presburger_utils; PresburgerSet::PresburgerSet(const IntegerPolyhedron &poly) - : nDim(poly.getNumDimIds()), nSym(poly.getNumSymbolIds()) { + : numDims(poly.getNumDimIds()), numSymbols(poly.getNumSymbolIds()) { unionPolyInPlace(poly); } @@ -24,9 +24,9 @@ unsigned PresburgerSet::getNumPolys() const { return integerPolyhedrons.size(); } -unsigned PresburgerSet::getNumDims() const { return nDim; } +unsigned PresburgerSet::getNumDimIds() const { return numDims; } -unsigned PresburgerSet::getNumSyms() const { return nSym; } +unsigned PresburgerSet::getNumSymbolIds() const { return numSymbols; } ArrayRef PresburgerSet::getAllIntegerPolyhedron() const { return integerPolyhedrons; @@ -42,10 +42,10 @@ PresburgerSet::getIntegerPolyhedron(unsigned index) const { /// compatible spaces. static void assertDimensionsCompatible(const IntegerPolyhedron &poly, const PresburgerSet &set) { - assert(poly.getNumDimIds() == set.getNumDims() && + assert(poly.getNumDimIds() == set.getNumDimIds() && "Number of dimensions of the IntegerPolyhedron and PresburgerSet" "do not match!"); - assert(poly.getNumSymbolIds() == set.getNumSyms() && + assert(poly.getNumSymbolIds() == set.getNumSymbolIds() && "Number of symbols of the IntegerPolyhedron and PresburgerSet" "do not match!"); } @@ -53,9 +53,9 @@ static void assertDimensionsCompatible(const IntegerPolyhedron &poly, /// Assert that the two PresburgerSets live in compatible spaces. static void assertDimensionsCompatible(const PresburgerSet &setA, const PresburgerSet &setB) { - assert(setA.getNumDims() == setB.getNumDims() && + assert(setA.getNumDimIds() == setB.getNumDimIds() && "Number of dimensions of the PresburgerSets do not match!"); - assert(setA.getNumSyms() == setB.getNumSyms() && + assert(setA.getNumSymbolIds() == setB.getNumSymbolIds() && "Number of symbols of the PresburgerSets do not match!"); } @@ -91,14 +91,16 @@ bool PresburgerSet::containsPoint(ArrayRef point) const { }); } -PresburgerSet PresburgerSet::getUniverse(unsigned nDim, unsigned nSym) { - PresburgerSet result(nDim, nSym); - result.unionPolyInPlace(IntegerPolyhedron::getUniverse(nDim, nSym)); +PresburgerSet PresburgerSet::getUniverse(unsigned numDims, + unsigned numSymbols) { + PresburgerSet result(numDims, numSymbols); + result.unionPolyInPlace(IntegerPolyhedron::getUniverse(numDims, numSymbols)); return result; } -PresburgerSet PresburgerSet::getEmptySet(unsigned nDim, unsigned nSym) { - return PresburgerSet(nDim, nSym); +PresburgerSet PresburgerSet::getEmptySet(unsigned numDims, + unsigned numSymbols) { + return PresburgerSet(numDims, numSymbols); } // Return the intersection of this set with the given set. @@ -111,7 +113,7 @@ PresburgerSet PresburgerSet::getEmptySet(unsigned nDim, unsigned nSym) { PresburgerSet PresburgerSet::intersect(const PresburgerSet &set) const { assertDimensionsCompatible(set, *this); - PresburgerSet result(nDim, nSym); + PresburgerSet result(getNumDimIds(), getNumSymbolIds()); for (const IntegerPolyhedron &csA : integerPolyhedrons) { for (const IntegerPolyhedron &csB : set.integerPolyhedrons) { IntegerPolyhedron csACopy = csA, csBCopy = csB; @@ -336,14 +338,14 @@ PresburgerSet PresburgerSet::getSetDifference(IntegerPolyhedron poly, /// Return the complement of this set. PresburgerSet PresburgerSet::complement() const { return getSetDifference( - IntegerPolyhedron::getUniverse(getNumDims(), getNumSyms()), *this); + IntegerPolyhedron::getUniverse(getNumDimIds(), getNumSymbolIds()), *this); } /// Return the result of subtract the given set from this set, i.e., /// return `this \ set`. PresburgerSet PresburgerSet::subtract(const PresburgerSet &set) const { assertDimensionsCompatible(set, *this); - PresburgerSet result(nDim, nSym); + PresburgerSet result(getNumDimIds(), getNumSymbolIds()); // We compute (U_i t_i) \ (U_i set_i) as U_i (t_i \ V_i set_i). for (const IntegerPolyhedron &poly : integerPolyhedrons) result.unionSetInPlace(getSetDifference(poly, set)); @@ -386,7 +388,8 @@ bool PresburgerSet::findIntegerSample(SmallVectorImpl &sample) { } PresburgerSet PresburgerSet::coalesce() const { - PresburgerSet newSet = PresburgerSet::getEmptySet(getNumDims(), getNumSyms()); + PresburgerSet newSet = + PresburgerSet::getEmptySet(getNumDimIds(), getNumSymbolIds()); llvm::SmallBitVector isRedundant(getNumPolys()); for (unsigned i = 0, e = integerPolyhedrons.size(); i < e; ++i) { From 413684313d9dd7c83ab0c40830cccbd31a94bd7c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 23 Jan 2022 09:42:18 -0800 Subject: [PATCH 303/946] [RISCV] Adjust the header comment in RISCVInstrInfoZb.td to better integrate Zbk* extensions. The Zbk* extensions have some overlap with Zb so have been placed in this file. Reviewed By: VincentWu Differential Revision: https://reviews.llvm.org/D117958 --- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 560ebb4eb08c6..25c5c35c03509 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -12,16 +12,21 @@ // Zbb - 1.0 // Zbc - 1.0 // Zbs - 1.0 -// Zbe - 0.93 -// Zbf - 0.93 -// Zbm - 0.93 -// Zbp - 0.93 -// Zbr - 0.93 -// Zbt - 0.93 +// Zbe - 0.93 *experimental +// Zbf - 0.93 *experimental +// Zbm - 0.93 *experimental +// Zbp - 0.93 *experimental +// Zbr - 0.93 *experimental +// Zbt - 0.93 *experimental // -// Zba, Zbb, Zbc, and Zbs have been ratified and are considered stable. The -// other extensions are experimental as they have not yet been ratiied and are -// subject to change. +// The experimental extensions appeared in an earlier draft of the Bitmanip +// extensions. They are not ratified and subject to change. +// +// This file also describes RISC-V instructions from the Zbk* extensions in +// Cryptography Extensions Volume I: Scalar & Entropy Source Instructions, +// versions: +// Zbkb - 1.0 +// Zbkc - 1.0 // //===----------------------------------------------------------------------===// From 32dc14f876c4b196dccb5b8db56510e401fa91ab Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 21:13:58 +0000 Subject: [PATCH 304/946] [X86] LowerFunnelShift - use supportedVectorShiftWithBaseAmnt to check for supported scalar shifts Allows us to reuse the ISD shift opcode instead of a mixture of ISD/X86ISD variants --- llvm/lib/Target/X86/X86ISelLowering.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 17079116a6ae1..b3672abfef957 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29832,6 +29832,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT); SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); + unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL; unsigned NumElts = VT.getVectorNumElements(); MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits); MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2); @@ -29848,20 +29849,19 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, } // Attempt to fold scalar shift as unpack(y,x) << zext(splat(z)) - if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) { - unsigned ShiftX86Opc = IsFSHR ? X86ISD::VSRLI : X86ISD::VSHLI; - SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); - SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0)); - ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32); - Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, ScalarAmt, Subtarget, - DAG); - Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, ScalarAmt, Subtarget, - DAG); - return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR); + if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) { + if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) { + SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); + SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0)); + ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32); + Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt, Subtarget, + DAG); + Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt, Subtarget, + DAG); + return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR); + } } - unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL; - MVT WideSVT = MVT::getIntegerVT( std::min(EltSizeInBits * 2, Subtarget.hasBWI() ? 16 : 32)); MVT WideVT = MVT::getVectorVT(WideSVT, NumElts); From ab1add6adc444371268ddbcb169a509559abd9dc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 13:28:02 -0800 Subject: [PATCH 305/946] [clang] Move the definition of ASTDiff (NFC) This patch moves the definition of ASTDiff later within the header file. Without this patch, the header depends on the forward decalrations of SyntaxTree and ComparisonOptions from another header file, which is not desirable. Since SyntaxTree and ComparisonOptions are defined in ASTDiff.h, we can move the definition of ASTDiff later and stop relying on the forward declarations from another header file. --- clang/include/clang/Tooling/ASTDiff/ASTDiff.h | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Tooling/ASTDiff/ASTDiff.h b/clang/include/clang/Tooling/ASTDiff/ASTDiff.h index c772ad84c1390..5fe6db6cb133b 100644 --- a/clang/include/clang/Tooling/ASTDiff/ASTDiff.h +++ b/clang/include/clang/Tooling/ASTDiff/ASTDiff.h @@ -48,20 +48,6 @@ struct Node { llvm::Optional getQualifiedIdentifier() const; }; -class ASTDiff { -public: - ASTDiff(SyntaxTree &Src, SyntaxTree &Dst, const ComparisonOptions &Options); - ~ASTDiff(); - - // Returns the ID of the node that is mapped to the given node in SourceTree. - NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const; - - class Impl; - -private: - std::unique_ptr DiffImpl; -}; - /// SyntaxTree objects represent subtrees of the AST. /// They can be constructed from any Decl or Stmt. class SyntaxTree { @@ -120,6 +106,20 @@ struct ComparisonOptions { } }; +class ASTDiff { +public: + ASTDiff(SyntaxTree &Src, SyntaxTree &Dst, const ComparisonOptions &Options); + ~ASTDiff(); + + // Returns the ID of the node that is mapped to the given node in SourceTree. + NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const; + + class Impl; + +private: + std::unique_ptr DiffImpl; +}; + } // end namespace diff } // end namespace clang From ee591a64a795995fad96d8c16484baa7cacce99f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 13:28:04 -0800 Subject: [PATCH 306/946] [clang] Forward-declare DynTypedNode (NFC) This patch adds a forward declaraiton of DynTypedNode. DumpAST.h is relying on the forward declaration of DynTypedNode in ASTContext.h, which is undesirable. --- clang-tools-extra/clangd/DumpAST.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clangd/DumpAST.h b/clang-tools-extra/clangd/DumpAST.h index 424025aeca796..c72fe59179fd8 100644 --- a/clang-tools-extra/clangd/DumpAST.h +++ b/clang-tools-extra/clangd/DumpAST.h @@ -34,6 +34,7 @@ #include "clang/AST/ASTContext.h" namespace clang { +class DynTypedNode; namespace syntax { class TokenBuffer; } // namespace syntax From e59964b67e026cde7a1438a8e91ca077a90810e0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 13:28:06 -0800 Subject: [PATCH 307/946] [clang] Remove unused forward declarations (NFC) --- clang/include/clang/AST/ASTContext.h | 1 - clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index c5946b662cb29..ed35e73ce4cf9 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -99,7 +99,6 @@ class CXXMethodDecl; class CXXRecordDecl; class DiagnosticsEngine; class ParentMapContext; -class DynTypedNode; class DynTypedNodeList; class Expr; enum class FloatModeKind; diff --git a/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h b/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h index fb7bd4e8afa23..b74af5e8f24f5 100644 --- a/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h +++ b/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h @@ -17,9 +17,6 @@ namespace diff { using DynTypedNode = DynTypedNode; -class SyntaxTree; -struct ComparisonOptions; - /// Within a tree, this identifies a node by its preorder offset. struct NodeId { private: From 4762c077e7102326306c7788494e3ea16e0f4cec Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 21:34:55 +0000 Subject: [PATCH 308/946] [X86] LowerFunnelShift - always lower vXi8 fshl by constant amounts as unpack(y,x) << zext(z) This can always be lowered as PMULLW+PSRLWI+PACKUSWB --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +- llvm/test/CodeGen/X86/vector-fshl-128.ll | 165 ++++++----------------- llvm/test/CodeGen/X86/vector-fshl-256.ll | 117 ++++------------ llvm/test/CodeGen/X86/vector-fshl-512.ll | 130 ++++++------------ 4 files changed, 112 insertions(+), 306 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b3672abfef957..be85c116bb037 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29831,6 +29831,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT); SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); + bool IsCst = ISD::isBuildVectorOfConstantSDNodes(AmtMod.getNode()); unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL; unsigned NumElts = VT.getVectorNumElements(); @@ -29867,7 +29868,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, MVT WideVT = MVT::getVectorVT(WideSVT, NumElts); // If per-element shifts are legal, fallback to generic expansion. - if (supportedVectorVarShift(VT, Subtarget, ShiftOpc)) + if (supportedVectorVarShift(VT, Subtarget, ShiftOpc) || Subtarget.hasXOP()) return SDValue(); // Attempt to fold as: @@ -29889,7 +29890,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, } // Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z) - if (supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) { + if ((IsCst && !IsFSHR && EltSizeInBits == 8) || + supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) { SDValue Z = DAG.getConstant(0, DL, VT); SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0)); diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 7dda02a147341..b5d8c4f21a219 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -2182,119 +2182,50 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { -; SSE2-LABEL: constant_funnnel_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: packuswb %xmm2, %xmm0 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; SSE2-NEXT: psrlw $8, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: packuswb %xmm3, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: constant_funnnel_v16i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: pand %xmm3, %xmm0 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE41-NEXT: pand %xmm3, %xmm2 -; SSE41-NEXT: packuswb %xmm0, %xmm2 -; SSE41-NEXT: psrlw $1, %xmm1 -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: packuswb %xmm1, %xmm0 -; SSE41-NEXT: por %xmm2, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: constant_funnnel_v16i8: -; AVX1: # %bb.0: -; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq +; SSE-LABEL: constant_funnnel_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm1, %xmm2 +; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: psrlw $8, %xmm2 +; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: packuswb %xmm2, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq ; -; AVX2-LABEL: constant_funnnel_v16i8: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: constant_funnnel_v16i8: +; AVX: # %bb.0: +; AVX-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: retq ; ; AVX512F-LABEL: constant_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512F-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512F-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v16i8: @@ -2360,27 +2291,15 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; ; X86-SSE2-LABEL: constant_funnnel_v16i8: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] ; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; X86-SSE2-NEXT: pand %xmm3, %xmm2 -; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-SSE2-NEXT: pand %xmm3, %xmm0 -; X86-SSE2-NEXT: packuswb %xmm2, %xmm0 -; X86-SSE2-NEXT: psrlw $1, %xmm1 -; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm2 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 -; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 -; X86-SSE2-NEXT: psrlw $8, %xmm3 -; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; X86-SSE2-NEXT: psrlw $8, %xmm2 +; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE2-NEXT: psrlw $8, %xmm1 -; X86-SSE2-NEXT: packuswb %xmm3, %xmm1 -; X86-SSE2-NEXT: por %xmm1, %xmm0 +; X86-SSE2-NEXT: packuswb %xmm2, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 ; X86-SSE2-NEXT: retl %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> ) ret <16 x i8> %res diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 8f9c88792353d..41f980886d000 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -1935,118 +1935,57 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX1-LABEL: constant_funnnel_v32i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [1,128,64,32,16,8,4,2] -; AVX1-NEXT: vpmullw %xmm3, %xmm8, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX1-NEXT: vmovdqa {{.*#+}} xmm9 = [1,2,4,8,16,32,64,128] -; AVX1-NEXT: vpmullw %xmm2, %xmm9, %xmm2 -; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm3 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm10 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX1-NEXT: vpand %xmm3, %xmm10, %xmm3 -; AVX1-NEXT: vpxor %xmm11, %xmm11, %xmm11 -; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm3[8],xmm11[8],xmm3[9],xmm11[9],xmm3[10],xmm11[10],xmm3[11],xmm11[11],xmm3[12],xmm11[12],xmm3[13],xmm11[13],xmm3[14],xmm11[14],xmm3[15],xmm11[15] -; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [2,256,128,64,32,16,8,4] -; AVX1-NEXT: vpmullw %xmm7, %xmm6, %xmm6 -; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2,4,8,16,32,64,128,256] -; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpackuswb %xmm6, %xmm3, %xmm3 -; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vpmullw %xmm3, %xmm8, %xmm3 -; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw %xmm0, %xmm9, %xmm0 -; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 -; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm10, %xmm1 -; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm11[8],xmm1[9],xmm11[9],xmm1[10],xmm11[10],xmm1[11],xmm11[11],xmm1[12],xmm11[12],xmm1[13],xmm11[13],xmm1[14],xmm11[14],xmm1[15],xmm11[15] -; AVX1-NEXT: vpmullw %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX1-NEXT: vpmullw %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,128,64,32,16,8,4,2] +; AVX1-NEXT: vpmullw %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128] +; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] +; AVX1-NEXT: vpmullw %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX1-NEXT: vpmullw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: constant_funnnel_v32i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] +; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] +; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] ; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: constant_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] +; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] +; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] ; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] +; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] +; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] ; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 -; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX512VL-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v32i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index 8ba01c9ae3144..9954447f6836e 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -911,105 +911,51 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; AVX512F-LABEL: constant_funnnel_v64i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] -; AVX512F-NEXT: # ymm4 = mem[0,1,0,1] -; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; AVX512F-NEXT: # ymm6 = mem[0,1,0,1] -; AVX512F-NEXT: vpmullw %ymm6, %ymm2, %ymm2 -; AVX512F-NEXT: vpand %ymm5, %ymm2, %ymm2 -; AVX512F-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3 -; AVX512F-NEXT: vpand %ymm5, %ymm3, %ymm3 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vpmullw %ymm6, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpsrlw $1, %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31] -; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [2,256,128,64,32,16,8,4,2,256,128,64,32,16,8,4] -; AVX512F-NEXT: # ymm6 = mem[0,1,0,1] -; AVX512F-NEXT: vpmullw %ymm6, %ymm5, %ymm5 -; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23] -; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] -; AVX512F-NEXT: # ymm7 = mem[0,1,0,1] -; AVX512F-NEXT: vpmullw %ymm7, %ymm2, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15],ymm3[24],ymm2[24],ymm3[25],ymm2[25],ymm3[26],ymm2[26],ymm3[27],ymm2[27],ymm3[28],ymm2[28],ymm3[29],ymm2[29],ymm3[30],ymm2[30],ymm3[31],ymm2[31] +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] +; AVX512F-NEXT: # ymm5 = mem[0,1,0,1] +; AVX512F-NEXT: vpmullw %ymm5, %ymm4, %ymm4 +; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[16],ymm2[16],ymm3[17],ymm2[17],ymm3[18],ymm2[18],ymm3[19],ymm2[19],ymm3[20],ymm2[20],ymm3[21],ymm2[21],ymm3[22],ymm2[22],ymm3[23],ymm2[23] +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; AVX512F-NEXT: # ymm3 = mem[0,1,0,1] +; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vpackuswb %ymm5, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm4[8],ymm1[9],ymm4[9],ymm1[10],ymm4[10],ymm1[11],ymm4[11],ymm1[12],ymm4[12],ymm1[13],ymm4[13],ymm1[14],ymm4[14],ymm1[15],ymm4[15],ymm1[24],ymm4[24],ymm1[25],ymm4[25],ymm1[26],ymm4[26],ymm1[27],ymm4[27],ymm1[28],ymm4[28],ymm1[29],ymm4[29],ymm1[30],ymm4[30],ymm1[31],ymm4[31] -; AVX512F-NEXT: vpmullw %ymm6, %ymm3, %ymm3 -; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm4[0],ymm1[1],ymm4[1],ymm1[2],ymm4[2],ymm1[3],ymm4[3],ymm1[4],ymm4[4],ymm1[5],ymm4[5],ymm1[6],ymm4[6],ymm1[7],ymm4[7],ymm1[16],ymm4[16],ymm1[17],ymm4[17],ymm1[18],ymm4[18],ymm1[19],ymm4[19],ymm1[20],ymm4[20],ymm1[21],ymm4[21],ymm1[22],ymm4[22],ymm1[23],ymm4[23] -; AVX512F-NEXT: vpmullw %ymm7, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpackuswb %ymm4, %ymm2, %ymm2 +; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] +; AVX512F-NEXT: vpmullw %ymm5, %ymm4, %ymm4 +; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] +; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v64i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] -; AVX512VL-NEXT: # ymm4 = mem[0,1,0,1] -; AVX512VL-NEXT: vpmullw %ymm4, %ymm3, %ymm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512VL-NEXT: vpand %ymm5, %ymm3, %ymm3 -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] -; AVX512VL-NEXT: # ymm6 = mem[0,1,0,1] -; AVX512VL-NEXT: vpmullw %ymm6, %ymm2, %ymm2 -; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 -; AVX512VL-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512VL-NEXT: vpmullw %ymm4, %ymm3, %ymm3 -; AVX512VL-NEXT: vpand %ymm5, %ymm3, %ymm3 -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512VL-NEXT: vpmullw %ymm6, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 -; AVX512VL-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-NEXT: vpsrlw $1, %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31] -; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [2,256,128,64,32,16,8,4,2,256,128,64,32,16,8,4] -; AVX512VL-NEXT: # ymm6 = mem[0,1,0,1] -; AVX512VL-NEXT: vpmullw %ymm6, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5 -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23] -; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] -; AVX512VL-NEXT: # ymm7 = mem[0,1,0,1] -; AVX512VL-NEXT: vpmullw %ymm7, %ymm2, %ymm2 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15],ymm3[24],ymm2[24],ymm3[25],ymm2[25],ymm3[26],ymm2[26],ymm3[27],ymm2[27],ymm3[28],ymm2[28],ymm3[29],ymm2[29],ymm3[30],ymm2[30],ymm3[31],ymm2[31] +; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] +; AVX512VL-NEXT: # ymm5 = mem[0,1,0,1] +; AVX512VL-NEXT: vpmullw %ymm5, %ymm4, %ymm4 +; AVX512VL-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[16],ymm2[16],ymm3[17],ymm2[17],ymm3[18],ymm2[18],ymm3[19],ymm2[19],ymm3[20],ymm2[20],ymm3[21],ymm2[21],ymm3[22],ymm2[22],ymm3[23],ymm2[23] +; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] +; AVX512VL-NEXT: # ymm3 = mem[0,1,0,1] +; AVX512VL-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512VL-NEXT: vpackuswb %ymm5, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm4[8],ymm1[9],ymm4[9],ymm1[10],ymm4[10],ymm1[11],ymm4[11],ymm1[12],ymm4[12],ymm1[13],ymm4[13],ymm1[14],ymm4[14],ymm1[15],ymm4[15],ymm1[24],ymm4[24],ymm1[25],ymm4[25],ymm1[26],ymm4[26],ymm1[27],ymm4[27],ymm1[28],ymm4[28],ymm1[29],ymm4[29],ymm1[30],ymm4[30],ymm1[31],ymm4[31] -; AVX512VL-NEXT: vpmullw %ymm6, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm4[0],ymm1[1],ymm4[1],ymm1[2],ymm4[2],ymm1[3],ymm4[3],ymm1[4],ymm4[4],ymm1[5],ymm4[5],ymm1[6],ymm4[6],ymm1[7],ymm4[7],ymm1[16],ymm4[16],ymm1[17],ymm4[17],ymm1[18],ymm4[18],ymm1[19],ymm4[19],ymm1[20],ymm4[20],ymm1[21],ymm4[21],ymm1[22],ymm4[22],ymm1[23],ymm4[23] -; AVX512VL-NEXT: vpmullw %ymm7, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX512VL-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpackuswb %ymm4, %ymm2, %ymm2 +; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] +; AVX512VL-NEXT: vpmullw %ymm5, %ymm4, %ymm4 +; AVX512VL-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] +; AVX512VL-NEXT: vpmullw %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 +; AVX512VL-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v64i8: From 88f33cff4bee87ea31e129f734df232274098a78 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 23 Jan 2022 09:42:18 -0800 Subject: [PATCH 309/946] [RISCV] Add bitreverse tests to bswap-ctlz-cttz-ctpop.ll. Add Zbb command lines. NFC Rename to include bitreverse. Add additional tests and Zbb command lines. There's some overlapping tests with rv32zbb.ll and rv64zbb.ll. Maybe I'll clean that up in a future patch. --- ...ll => bswap-bitreverse-ctlz-cttz-ctpop.ll} | 819 ++++++++++++++++-- 1 file changed, 769 insertions(+), 50 deletions(-) rename llvm/test/CodeGen/RISCV/{bswap-ctlz-cttz-ctpop.ll => bswap-bitreverse-ctlz-cttz-ctpop.ll} (58%) diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll similarity index 58% rename from llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll rename to llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll index 4be19eefa948f..8a0c0db7aaa38 100644 --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll @@ -3,10 +3,18 @@ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBB +; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64ZBB declare i16 @llvm.bswap.i16(i16) declare i32 @llvm.bswap.i32(i32) declare i64 @llvm.bswap.i64(i64) +declare i8 @llvm.bitreverse.i8(i8) +declare i16 @llvm.bitreverse.i16(i16) +declare i32 @llvm.bitreverse.i32(i32) +declare i64 @llvm.bitreverse.i64(i64) declare i8 @llvm.cttz.i8(i8, i1) declare i16 @llvm.cttz.i16(i16, i1) declare i32 @llvm.cttz.i32(i32, i1) @@ -31,6 +39,18 @@ define i16 @test_bswap_i16(i16 %a) nounwind { ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bswap_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a0, a0, 16 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bswap_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 48 +; RV64ZBB-NEXT: ret %tmp = call i16 @llvm.bswap.i16(i16 %a) ret i16 %tmp } @@ -67,6 +87,17 @@ define i32 @test_bswap_i32(i32 %a) nounwind { ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bswap_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bswap_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 32 +; RV64ZBB-NEXT: ret %tmp = call i32 @llvm.bswap.i32(i32 %a) ret i32 %tmp } @@ -129,15 +160,542 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bswap_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a2, a1 +; RV32ZBB-NEXT: rev8 a1, a0 +; RV32ZBB-NEXT: mv a0, a2 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bswap_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i64 @llvm.bswap.i64(i64 %a) ret i64 %tmp } +define i8 @test_bitreverse_i8(i8 %a) nounwind { +; RV32I-LABEL: test_bitreverse_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a1, a0, 15 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srli a0, a0, 28 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: andi a1, a0, 51 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: andi a0, a0, 51 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: andi a1, a0, 85 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: andi a0, a0, 85 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bitreverse_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a1, a0, 15 +; RV64I-NEXT: slli a1, a1, 4 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srli a0, a0, 60 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: andi a1, a0, 51 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: andi a0, a0, 51 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: andi a1, a0, 85 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: andi a0, a0, 85 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bitreverse_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: andi a1, a0, 15 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: slli a0, a0, 24 +; RV32ZBB-NEXT: srli a0, a0, 28 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: andi a1, a0, 51 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: srli a0, a0, 2 +; RV32ZBB-NEXT: andi a0, a0, 51 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: andi a1, a0, 85 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: srli a0, a0, 1 +; RV32ZBB-NEXT: andi a0, a0, 85 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bitreverse_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: andi a1, a0, 15 +; RV64ZBB-NEXT: slli a1, a1, 4 +; RV64ZBB-NEXT: slli a0, a0, 56 +; RV64ZBB-NEXT: srli a0, a0, 60 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: andi a1, a0, 51 +; RV64ZBB-NEXT: slli a1, a1, 2 +; RV64ZBB-NEXT: srli a0, a0, 2 +; RV64ZBB-NEXT: andi a0, a0, 51 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: andi a1, a0, 85 +; RV64ZBB-NEXT: slli a1, a1, 1 +; RV64ZBB-NEXT: srli a0, a0, 1 +; RV64ZBB-NEXT: andi a0, a0, 85 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: ret + %tmp = call i8 @llvm.bitreverse.i8(i8 %a) + ret i8 %tmp +} + +define i16 @test_bitreverse_i16(i16 %a) nounwind { +; RV32I-LABEL: test_bitreverse_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 1 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: lui a2, 3 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 5 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bitreverse_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 1 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 3 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 5 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bitreverse_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 12 +; RV32ZBB-NEXT: lui a2, 15 +; RV32ZBB-NEXT: addi a2, a2, 240 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: srli a0, a0, 20 +; RV32ZBB-NEXT: andi a0, a0, -241 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 3 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 5 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bitreverse_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 44 +; RV64ZBB-NEXT: lui a2, 15 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 52 +; RV64ZBB-NEXT: andi a0, a0, -241 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 3 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 5 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + ret i16 %tmp +} + +define i32 @test_bitreverse_i32(i32 %a) nounwind { +; RV32I-LABEL: test_bitreverse_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bitreverse_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a2, a0, 24 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: slliw a0, a0, 24 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slliw a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slliw a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slliw a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bitreverse_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 61681 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 209715 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 349525 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bitreverse_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 36 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 28 +; RV64ZBB-NEXT: lui a2, 986895 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + ret i32 %tmp +} + +define i64 @test_bitreverse_i64(i64 %a) nounwind { +; RV32I-LABEL: test_bitreverse_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: lui a5, 4080 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 2 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: or a2, a2, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bitreverse_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 8 +; RV64I-NEXT: li a3, 255 +; RV64I-NEXT: slli a4, a3, 24 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: srli a2, a0, 40 +; RV64I-NEXT: lui a4, 16 +; RV64I-NEXT: addiw a4, a4, -256 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: srli a4, a0, 56 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: slli a4, a3, 40 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 40 +; RV64I-NEXT: slli a3, a3, 48 +; RV64I-NEXT: and a3, a4, a3 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI6_0) +; RV64I-NEXT: ld a3, %lo(.LCPI6_0)(a3) +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, %hi(.LCPI6_1) +; RV64I-NEXT: ld a2, %lo(.LCPI6_1)(a2) +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI6_2) +; RV64I-NEXT: ld a2, %lo(.LCPI6_2)(a2) +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bitreverse_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a1, a1 +; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: lui a3, 61681 +; RV32ZBB-NEXT: addi a3, a3, -241 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: lui a4, 209715 +; RV32ZBB-NEXT: addi a4, a4, 819 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: lui a5, 349525 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: or a2, a2, a1 +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: and a0, a0, a3 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: and a0, a0, a4 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a1, a1, a0 +; RV32ZBB-NEXT: mv a0, a2 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bitreverse_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_0) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_0)(a1) +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_1) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_1)(a1) +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI6_2) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI6_2)(a1) +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: ret + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + ret i64 %tmp +} + define i8 @test_cttz_i8(i8 %a) nounwind { ; RV32I-LABEL: test_cttz_i8: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: beqz a1, .LBB3_2 +; RV32I-NEXT: beqz a1, .LBB7_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 @@ -153,14 +711,14 @@ define i8 @test_cttz_i8(i8 %a) nounwind { ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: andi a0, a0, 15 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: .LBB7_2: ; RV32I-NEXT: li a0, 8 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_cttz_i8: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: beqz a1, .LBB3_2 +; RV64I-NEXT: beqz a1, .LBB7_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 @@ -176,9 +734,21 @@ define i8 @test_cttz_i8(i8 %a) nounwind { ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 15 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: .LBB7_2: ; RV64I-NEXT: li a0, 8 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i8: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ori a0, a0, 256 +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i8: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ori a0, a0, 256 +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false) ret i8 %tmp } @@ -188,7 +758,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 16 ; RV32I-NEXT: srli a1, a1, 16 -; RV32I-NEXT: beqz a1, .LBB4_2 +; RV32I-NEXT: beqz a1, .LBB8_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 @@ -214,7 +784,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV32I-NEXT: slli a0, a0, 19 ; RV32I-NEXT: srli a0, a0, 27 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: .LBB8_2: ; RV32I-NEXT: li a0, 16 ; RV32I-NEXT: ret ; @@ -222,7 +792,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 48 ; RV64I-NEXT: srli a1, a1, 48 -; RV64I-NEXT: beqz a1, .LBB4_2 +; RV64I-NEXT: beqz a1, .LBB8_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 @@ -248,9 +818,23 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 51 ; RV64I-NEXT: srli a0, a0, 59 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: .LBB8_2: ; RV64I-NEXT: li a0, 16 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: lui a1, 16 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, 16 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false) ret i16 %tmp } @@ -258,7 +842,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { define i32 @test_cttz_i32(i32 %a) nounwind { ; RV32I-LABEL: test_cttz_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: beqz a0, .LBB5_2 +; RV32I-NEXT: beqz a0, .LBB9_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill @@ -288,14 +872,14 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: .LBB9_2: ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_cttz_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: beqz a1, .LBB5_2 +; RV64I-NEXT: beqz a1, .LBB9_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill @@ -325,9 +909,19 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: .LBB9_2: ; RV64I-NEXT: li a0, 32 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false) ret i32 %tmp } @@ -335,7 +929,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind { define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I-LABEL: test_ctlz_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: beqz a0, .LBB6_2 +; RV32I-NEXT: beqz a0, .LBB10_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill @@ -373,14 +967,14 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: .LBB10_2: ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctlz_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: beqz a1, .LBB6_2 +; RV64I-NEXT: beqz a1, .LBB10_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill @@ -418,9 +1012,19 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: .LBB10_2: ; RV64I-NEXT: li a0, 32 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_ctlz_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: clz a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_ctlz_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: clzw a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false) ret i32 %tmp } @@ -478,14 +1082,14 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s2, .LBB7_2 +; RV32I-NEXT: bnez s2, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB7_3 -; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: j .LBB11_3 +; RV32I-NEXT: .LBB11_2: ; RV32I-NEXT: srli a0, s0, 24 -; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: .LBB11_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -500,39 +1104,57 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; ; RV64I-LABEL: test_cttz_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: beqz a0, .LBB7_2 +; RV64I-NEXT: beqz a0, .LBB11_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI7_0) -; RV64I-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI7_1) -; RV64I-NEXT: ld a2, %lo(.LCPI7_1)(a2) +; RV64I-NEXT: lui a1, %hi(.LCPI11_0) +; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI11_1) +; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) ; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI7_2) -; RV64I-NEXT: ld a2, %lo(.LCPI7_2)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI11_2) +; RV64I-NEXT: ld a2, %lo(.LCPI11_2)(a2) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI7_3) -; RV64I-NEXT: ld a1, %lo(.LCPI7_3)(a1) +; RV64I-NEXT: lui a1, %hi(.LCPI11_3) +; RV64I-NEXT: ld a1, %lo(.LCPI11_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: .LBB11_2: ; RV64I-NEXT: li a0, 64 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: bnez a0, .LBB11_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: ctz a0, a1 +; RV32ZBB-NEXT: addi a0, a0, 32 +; RV32ZBB-NEXT: li a1, 0 +; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: .LBB11_2: +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: li a1, 0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false) ret i64 %tmp } @@ -571,6 +1193,16 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind { ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 15 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i8_zero_undef: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i8_zero_undef: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true) ret i8 %tmp } @@ -629,6 +1261,16 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 51 ; RV64I-NEXT: srli a0, a0, 59 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i16_zero_undef: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i16_zero_undef: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true) ret i16 %tmp } @@ -695,6 +1337,16 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i32_zero_undef: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i32_zero_undef: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctzw a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true) ret i32 %tmp } @@ -752,14 +1404,14 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s2, .LBB11_2 +; RV32I-NEXT: bnez s2, .LBB15_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB11_3 -; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: j .LBB15_3 +; RV32I-NEXT: .LBB15_2: ; RV32I-NEXT: srli a0, s0, 24 -; RV32I-NEXT: .LBB11_3: +; RV32I-NEXT: .LBB15_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -779,29 +1431,47 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI11_0) -; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI11_1) -; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) +; RV64I-NEXT: lui a1, %hi(.LCPI15_0) +; RV64I-NEXT: ld a1, %lo(.LCPI15_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI15_1) +; RV64I-NEXT: ld a2, %lo(.LCPI15_1)(a2) ; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI11_2) -; RV64I-NEXT: ld a2, %lo(.LCPI11_2)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI15_2) +; RV64I-NEXT: ld a2, %lo(.LCPI15_2)(a2) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI11_3) -; RV64I-NEXT: ld a1, %lo(.LCPI11_3)(a1) +; RV64I-NEXT: lui a1, %hi(.LCPI15_3) +; RV64I-NEXT: ld a1, %lo(.LCPI15_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_cttz_i64_zero_undef: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: bnez a0, .LBB15_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: ctz a0, a1 +; RV32ZBB-NEXT: addi a0, a0, 32 +; RV32ZBB-NEXT: li a1, 0 +; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: .LBB15_2: +; RV32ZBB-NEXT: ctz a0, a0 +; RV32ZBB-NEXT: li a1, 0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_cttz_i64_zero_undef: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: ctz a0, a0 +; RV64ZBB-NEXT: ret %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true) ret i64 %tmp } @@ -862,6 +1532,16 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_ctpop_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_ctpop_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 } @@ -928,29 +1608,42 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, %hi(.LCPI13_0) -; RV64I-NEXT: ld a1, %lo(.LCPI13_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI13_1) -; RV64I-NEXT: ld a2, %lo(.LCPI13_1)(a2) +; RV64I-NEXT: lui a1, %hi(.LCPI17_0) +; RV64I-NEXT: ld a1, %lo(.LCPI17_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI17_1) +; RV64I-NEXT: ld a2, %lo(.LCPI17_1)(a2) ; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI13_2) -; RV64I-NEXT: ld a2, %lo(.LCPI13_2)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI17_2) +; RV64I-NEXT: ld a2, %lo(.LCPI17_2)(a2) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI13_3) -; RV64I-NEXT: ld a1, %lo(.LCPI13_3)(a1) +; RV64I-NEXT: lui a1, %hi(.LCPI17_3) +; RV64I-NEXT: ld a1, %lo(.LCPI17_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_ctpop_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: add a0, a0, a1 +; RV32ZBB-NEXT: li a1, 0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_ctpop_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %1 } @@ -987,6 +1680,18 @@ define i32 @test_parity_i32(i32 %a) { ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_parity_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: andi a0, a0, 1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_parity_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 1 +; RV64ZBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) %2 = and i32 %1, 1 ret i32 %2 @@ -1026,6 +1731,20 @@ define i64 @test_parity_i64(i64 %a) { ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_parity_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: xor a0, a0, a1 +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: andi a0, a0, 1 +; RV32ZBB-NEXT: li a1, 0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_parity_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 1 +; RV64ZBB-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) %2 = and i64 %1, 1 ret i64 %2 From 3575700b286f8c3150abb3de7968a9f36dd1cceb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 23 Jan 2022 13:37:33 -0800 Subject: [PATCH 310/946] [RISCV] Add tests that do a bitreverse before or after a bswap. NFC We don't optimize this as well as we could. Bitreverse is always expanded to bswap and a shift/and/or sequence to swap bits within a byte. The newly created bswap will either becomes a shift/and/or sequence or rev8 instruction. We don't always realize the bswap is redundant with another bswap before or after the bitreverse. Found while thinking about the brev8 instruction from the Cryptography extension. It's equivalent to bswap(bitreverse(x)) or bitreverse(bswap(x)). --- .../RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll | 1118 ++++++++++++++++- 1 file changed, 1064 insertions(+), 54 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll index 8a0c0db7aaa38..435ea9c0d80df 100644 --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll @@ -691,11 +691,1021 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind { ret i64 %tmp } +define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind { +; RV32I-LABEL: test_bswap_bitreverse_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a2, a0, 16 +; RV32I-NEXT: srli a2, a2, 24 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 1 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: lui a2, 3 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 5 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bswap_bitreverse_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a2, a0, 48 +; RV64I-NEXT: srli a2, a2, 56 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 1 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 3 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 5 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bswap_bitreverse_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a0, a0, 16 +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 12 +; RV32ZBB-NEXT: lui a2, 15 +; RV32ZBB-NEXT: addi a2, a2, 240 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: srli a0, a0, 20 +; RV32ZBB-NEXT: andi a0, a0, -241 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 3 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 5 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bswap_bitreverse_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 48 +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 44 +; RV64ZBB-NEXT: lui a2, 15 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 52 +; RV64ZBB-NEXT: andi a0, a0, -241 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 3 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 5 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret + %tmp = call i16 @llvm.bswap.i16(i16 %a) + %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind { +; RV32I-LABEL: test_bswap_bitreverse_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: lui a4, 4080 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bswap_bitreverse_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: lui a4, 4080 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: slliw a0, a0, 24 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a2, a0, 24 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: slliw a0, a0, 24 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slliw a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slliw a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slliw a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bswap_bitreverse_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 61681 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 209715 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 349525 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bswap_bitreverse_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 32 +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 36 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 28 +; RV64ZBB-NEXT: lui a2, 986895 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: ret + %tmp = call i32 @llvm.bswap.i32(i32 %a) + %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind { +; RV32I-LABEL: test_bswap_bitreverse_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: lui a3, 16 +; RV32I-NEXT: addi a3, a3, -256 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: lui a5, 4080 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 8 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a0, 8 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srli a2, a0, 8 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a4, a0, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a0, 8 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: srli a2, a1, 8 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 2 +; RV32I-NEXT: and a2, a2, a6 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bswap_bitreverse_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: li a4, 255 +; RV64I-NEXT: slli a5, a4, 24 +; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 40 +; RV64I-NEXT: lui a6, 16 +; RV64I-NEXT: addiw a6, a6, -256 +; RV64I-NEXT: and a3, a3, a6 +; RV64I-NEXT: srli a7, a0, 56 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 24 +; RV64I-NEXT: slli a7, a4, 40 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: srliw t0, a0, 24 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a3, a3, t0 +; RV64I-NEXT: slli t0, a0, 40 +; RV64I-NEXT: slli a4, a4, 48 +; RV64I-NEXT: and t0, t0, a4 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: and a2, a3, a2 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: and a2, a2, a7 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI9_0) +; RV64I-NEXT: ld a3, %lo(.LCPI9_0)(a3) +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a2, %hi(.LCPI9_1) +; RV64I-NEXT: ld a2, %lo(.LCPI9_1)(a2) +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, %hi(.LCPI9_2) +; RV64I-NEXT: ld a2, %lo(.LCPI9_2)(a2) +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bswap_bitreverse_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: lui a3, 61681 +; RV32ZBB-NEXT: addi a3, a3, -241 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a0, a0, a3 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: lui a4, 209715 +; RV32ZBB-NEXT: addi a4, a4, 819 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a0, a0, a4 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: lui a5, 349525 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bswap_bitreverse_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_0) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_0)(a1) +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_1) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_1)(a1) +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI9_2) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI9_2)(a1) +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: ret + %tmp = call i64 @llvm.bswap.i64(i64 %a) + %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp) + ret i64 %tmp2 +} + +define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind { +; RV32I-LABEL: test_bitreverse_bswap_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a2, 1 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: lui a2, 3 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a2, 5 +; RV32I-NEXT: addi a2, a2, 1365 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bitreverse_bswap_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a2, 1 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 3 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 5 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bitreverse_bswap_i16: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 12 +; RV32ZBB-NEXT: lui a2, 15 +; RV32ZBB-NEXT: addi a2, a2, 240 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: srli a0, a0, 20 +; RV32ZBB-NEXT: andi a0, a0, -241 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 3 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 5 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a0, a0, 16 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bitreverse_bswap_i16: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 44 +; RV64ZBB-NEXT: lui a2, 15 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 52 +; RV64ZBB-NEXT: andi a0, a0, -241 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 3 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 5 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 48 +; RV64ZBB-NEXT: ret + %tmp = call i16 @llvm.bitreverse.i16(i16 %a) + %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp) + ret i16 %tmp2 +} + +define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind { +; RV32I-LABEL: test_bitreverse_bswap_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: lui a4, 4080 +; RV32I-NEXT: and a3, a3, a4 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a1, a0, 4 +; RV32I-NEXT: lui a3, 61681 +; RV32I-NEXT: addi a3, a3, -241 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 2 +; RV32I-NEXT: lui a3, 209715 +; RV32I-NEXT: addi a3, a3, 819 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: and a0, a0, a3 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: srli a1, a0, 8 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bitreverse_bswap_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: lui a4, 4080 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: slliw a0, a0, 24 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: lui a3, 61681 +; RV64I-NEXT: addiw a3, a3, -241 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: slliw a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a3, 209715 +; RV64I-NEXT: addiw a3, a3, 819 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: slliw a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: slliw a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srliw a1, a0, 8 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srliw a2, a0, 24 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: slliw a0, a0, 24 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bitreverse_bswap_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a1, a0, 4 +; RV32ZBB-NEXT: lui a2, 61681 +; RV32ZBB-NEXT: addi a2, a2, -241 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 2 +; RV32ZBB-NEXT: lui a2, 209715 +; RV32ZBB-NEXT: addi a2, a2, 819 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: lui a2, 349525 +; RV32ZBB-NEXT: addi a2, a2, 1365 +; RV32ZBB-NEXT: and a1, a1, a2 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a1, a0 +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bitreverse_bswap_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a1, a0, 36 +; RV64ZBB-NEXT: lui a2, 61681 +; RV64ZBB-NEXT: addiw a2, a2, -241 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: srli a0, a0, 28 +; RV64ZBB-NEXT: lui a2, 986895 +; RV64ZBB-NEXT: addiw a2, a2, 240 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 2 +; RV64ZBB-NEXT: lui a2, 209715 +; RV64ZBB-NEXT: addiw a2, a2, 819 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slliw a0, a0, 2 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: srli a1, a0, 1 +; RV64ZBB-NEXT: lui a2, 349525 +; RV64ZBB-NEXT: addiw a2, a2, 1365 +; RV64ZBB-NEXT: and a1, a1, a2 +; RV64ZBB-NEXT: and a0, a0, a2 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a1, a0 +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 32 +; RV64ZBB-NEXT: ret + %tmp = call i32 @llvm.bitreverse.i32(i32 %a) + %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp) + ret i32 %tmp2 +} + +define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind { +; RV32I-LABEL: test_bitreverse_bswap_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: srli a3, a1, 8 +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: and a3, a3, a2 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a4, a3, a4 +; RV32I-NEXT: slli a5, a1, 8 +; RV32I-NEXT: lui a3, 4080 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a5 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: lui a5, 61681 +; RV32I-NEXT: addi a5, a5, -241 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: slli a1, a1, 4 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: srli a4, a1, 2 +; RV32I-NEXT: lui a6, 209715 +; RV32I-NEXT: addi a6, a6, 819 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: srli a4, a1, 1 +; RV32I-NEXT: lui a7, 349525 +; RV32I-NEXT: addi a7, a7, 1365 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: srli t0, a0, 24 +; RV32I-NEXT: or a4, a4, t0 +; RV32I-NEXT: slli t0, a0, 8 +; RV32I-NEXT: and t0, t0, a3 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srli a4, a0, 4 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: srli a4, a0, 2 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: srli a4, a0, 1 +; RV32I-NEXT: and a4, a4, a7 +; RV32I-NEXT: and a0, a0, a7 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: srli a4, a0, 8 +; RV32I-NEXT: and a4, a4, a2 +; RV32I-NEXT: srli a5, a0, 24 +; RV32I-NEXT: or a4, a4, a5 +; RV32I-NEXT: slli a5, a0, 8 +; RV32I-NEXT: and a5, a5, a3 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: srli a4, a1, 8 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: srli a4, a1, 24 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: slli a4, a1, 8 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_bitreverse_bswap_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: li a4, 255 +; RV64I-NEXT: slli a5, a4, 24 +; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: srli a3, a0, 40 +; RV64I-NEXT: lui a6, 16 +; RV64I-NEXT: addiw a6, a6, -256 +; RV64I-NEXT: and a3, a3, a6 +; RV64I-NEXT: srli a7, a0, 56 +; RV64I-NEXT: or a3, a3, a7 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 24 +; RV64I-NEXT: slli a7, a4, 40 +; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: srliw t0, a0, 24 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: or a3, a3, t0 +; RV64I-NEXT: slli t0, a0, 40 +; RV64I-NEXT: slli a4, a4, 48 +; RV64I-NEXT: and t0, t0, a4 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: lui t0, %hi(.LCPI12_0) +; RV64I-NEXT: ld t0, %lo(.LCPI12_0)(t0) +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: and a1, a1, t0 +; RV64I-NEXT: and a0, a0, t0 +; RV64I-NEXT: lui a3, %hi(.LCPI12_1) +; RV64I-NEXT: ld a3, %lo(.LCPI12_1)(a3) +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: lui a3, %hi(.LCPI12_2) +; RV64I-NEXT: ld a3, %lo(.LCPI12_2)(a3) +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: and a1, a1, a6 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: and a2, a3, a2 +; RV64I-NEXT: srli a3, a0, 8 +; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: and a2, a2, a7 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBB-LABEL: test_bitreverse_bswap_i64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: rev8 a1, a1 +; RV32ZBB-NEXT: srli a2, a1, 4 +; RV32ZBB-NEXT: lui a3, 61681 +; RV32ZBB-NEXT: addi a3, a3, -241 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a1, a1, a3 +; RV32ZBB-NEXT: slli a1, a1, 4 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 2 +; RV32ZBB-NEXT: lui a4, 209715 +; RV32ZBB-NEXT: addi a4, a4, 819 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a1, a1, a4 +; RV32ZBB-NEXT: slli a1, a1, 2 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: srli a2, a1, 1 +; RV32ZBB-NEXT: lui a5, 349525 +; RV32ZBB-NEXT: addi a5, a5, 1365 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: slli a1, a1, 1 +; RV32ZBB-NEXT: or a1, a2, a1 +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: srli a2, a0, 4 +; RV32ZBB-NEXT: and a2, a2, a3 +; RV32ZBB-NEXT: and a0, a0, a3 +; RV32ZBB-NEXT: slli a0, a0, 4 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 2 +; RV32ZBB-NEXT: and a2, a2, a4 +; RV32ZBB-NEXT: and a0, a0, a4 +; RV32ZBB-NEXT: slli a0, a0, 2 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: and a2, a2, a5 +; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: slli a0, a0, 1 +; RV32ZBB-NEXT: or a0, a2, a0 +; RV32ZBB-NEXT: rev8 a0, a0 +; RV32ZBB-NEXT: rev8 a1, a1 +; RV32ZBB-NEXT: ret +; +; RV64ZBB-LABEL: test_bitreverse_bswap_i64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_0) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_0)(a1) +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: srli a2, a0, 4 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_1) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_1)(a1) +; RV64ZBB-NEXT: slli a0, a0, 4 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 2 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, %hi(.LCPI12_2) +; RV64ZBB-NEXT: ld a1, %lo(.LCPI12_2)(a1) +; RV64ZBB-NEXT: slli a0, a0, 2 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: srli a2, a0, 1 +; RV64ZBB-NEXT: and a2, a2, a1 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: slli a0, a0, 1 +; RV64ZBB-NEXT: or a0, a2, a0 +; RV64ZBB-NEXT: rev8 a0, a0 +; RV64ZBB-NEXT: ret + %tmp = call i64 @llvm.bitreverse.i64(i64 %a) + %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp) + ret i64 %tmp2 +} + define i8 @test_cttz_i8(i8 %a) nounwind { ; RV32I-LABEL: test_cttz_i8: ; RV32I: # %bb.0: ; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: beqz a1, .LBB7_2 +; RV32I-NEXT: beqz a1, .LBB13_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 @@ -711,14 +1721,14 @@ define i8 @test_cttz_i8(i8 %a) nounwind { ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: andi a0, a0, 15 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: .LBB13_2: ; RV32I-NEXT: li a0, 8 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_cttz_i8: ; RV64I: # %bb.0: ; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: beqz a1, .LBB7_2 +; RV64I-NEXT: beqz a1, .LBB13_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 @@ -734,7 +1744,7 @@ define i8 @test_cttz_i8(i8 %a) nounwind { ; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: andi a0, a0, 15 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: .LBB13_2: ; RV64I-NEXT: li a0, 8 ; RV64I-NEXT: ret ; @@ -758,7 +1768,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a0, 16 ; RV32I-NEXT: srli a1, a1, 16 -; RV32I-NEXT: beqz a1, .LBB8_2 +; RV32I-NEXT: beqz a1, .LBB14_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi a1, a0, -1 ; RV32I-NEXT: not a0, a0 @@ -784,7 +1794,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV32I-NEXT: slli a0, a0, 19 ; RV32I-NEXT: srli a0, a0, 27 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: .LBB14_2: ; RV32I-NEXT: li a0, 16 ; RV32I-NEXT: ret ; @@ -792,7 +1802,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: slli a1, a0, 48 ; RV64I-NEXT: srli a1, a1, 48 -; RV64I-NEXT: beqz a1, .LBB8_2 +; RV64I-NEXT: beqz a1, .LBB14_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 @@ -818,7 +1828,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { ; RV64I-NEXT: slli a0, a0, 51 ; RV64I-NEXT: srli a0, a0, 59 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: .LBB14_2: ; RV64I-NEXT: li a0, 16 ; RV64I-NEXT: ret ; @@ -842,7 +1852,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind { define i32 @test_cttz_i32(i32 %a) nounwind { ; RV32I-LABEL: test_cttz_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: beqz a0, .LBB9_2 +; RV32I-NEXT: beqz a0, .LBB15_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill @@ -872,14 +1882,14 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: .LBB15_2: ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_cttz_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: beqz a1, .LBB9_2 +; RV64I-NEXT: beqz a1, .LBB15_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill @@ -909,7 +1919,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB9_2: +; RV64I-NEXT: .LBB15_2: ; RV64I-NEXT: li a0, 32 ; RV64I-NEXT: ret ; @@ -929,7 +1939,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind { define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I-LABEL: test_ctlz_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: beqz a0, .LBB10_2 +; RV32I-NEXT: beqz a0, .LBB16_2 ; RV32I-NEXT: # %bb.1: # %cond.false ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill @@ -967,14 +1977,14 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB10_2: +; RV32I-NEXT: .LBB16_2: ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctlz_i32: ; RV64I: # %bb.0: ; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: beqz a1, .LBB10_2 +; RV64I-NEXT: beqz a1, .LBB16_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill @@ -1012,7 +2022,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: .LBB16_2: ; RV64I-NEXT: li a0, 32 ; RV64I-NEXT: ret ; @@ -1082,14 +2092,14 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s2, .LBB11_2 +; RV32I-NEXT: bnez s2, .LBB17_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB11_3 -; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: j .LBB17_3 +; RV32I-NEXT: .LBB17_2: ; RV32I-NEXT: srli a0, s0, 24 -; RV32I-NEXT: .LBB11_3: +; RV32I-NEXT: .LBB17_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1104,49 +2114,49 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; ; RV64I-LABEL: test_cttz_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: beqz a0, .LBB11_2 +; RV64I-NEXT: beqz a0, .LBB17_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI11_0) -; RV64I-NEXT: ld a1, %lo(.LCPI11_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI11_1) -; RV64I-NEXT: ld a2, %lo(.LCPI11_1)(a2) +; RV64I-NEXT: lui a1, %hi(.LCPI17_0) +; RV64I-NEXT: ld a1, %lo(.LCPI17_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI17_1) +; RV64I-NEXT: ld a2, %lo(.LCPI17_1)(a2) ; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI11_2) -; RV64I-NEXT: ld a2, %lo(.LCPI11_2)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI17_2) +; RV64I-NEXT: ld a2, %lo(.LCPI17_2)(a2) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI11_3) -; RV64I-NEXT: ld a1, %lo(.LCPI11_3)(a1) +; RV64I-NEXT: lui a1, %hi(.LCPI17_3) +; RV64I-NEXT: ld a1, %lo(.LCPI17_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret -; RV64I-NEXT: .LBB11_2: +; RV64I-NEXT: .LBB17_2: ; RV64I-NEXT: li a0, 64 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: test_cttz_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: bnez a0, .LBB11_2 +; RV32ZBB-NEXT: bnez a0, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: ctz a0, a1 ; RV32ZBB-NEXT: addi a0, a0, 32 ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB11_2: +; RV32ZBB-NEXT: .LBB17_2: ; RV32ZBB-NEXT: ctz a0, a0 ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: ret @@ -1404,14 +2414,14 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: and a0, a0, s6 ; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: bnez s2, .LBB15_2 +; RV32I-NEXT: bnez s2, .LBB21_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srli a0, a0, 24 ; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB15_3 -; RV32I-NEXT: .LBB15_2: +; RV32I-NEXT: j .LBB21_3 +; RV32I-NEXT: .LBB21_2: ; RV32I-NEXT: srli a0, s0, 24 -; RV32I-NEXT: .LBB15_3: +; RV32I-NEXT: .LBB21_3: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1431,24 +2441,24 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; RV64I-NEXT: addi a1, a0, -1 ; RV64I-NEXT: not a0, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, %hi(.LCPI15_0) -; RV64I-NEXT: ld a1, %lo(.LCPI15_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI15_1) -; RV64I-NEXT: ld a2, %lo(.LCPI15_1)(a2) +; RV64I-NEXT: lui a1, %hi(.LCPI21_0) +; RV64I-NEXT: ld a1, %lo(.LCPI21_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI21_1) +; RV64I-NEXT: ld a2, %lo(.LCPI21_1)(a2) ; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI15_2) -; RV64I-NEXT: ld a2, %lo(.LCPI15_2)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI21_2) +; RV64I-NEXT: ld a2, %lo(.LCPI21_2)(a2) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI15_3) -; RV64I-NEXT: ld a1, %lo(.LCPI15_3)(a1) +; RV64I-NEXT: lui a1, %hi(.LCPI21_3) +; RV64I-NEXT: ld a1, %lo(.LCPI21_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -1457,13 +2467,13 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; ; RV32ZBB-LABEL: test_cttz_i64_zero_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: bnez a0, .LBB15_2 +; RV32ZBB-NEXT: bnez a0, .LBB21_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: ctz a0, a1 ; RV32ZBB-NEXT: addi a0, a0, 32 ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB15_2: +; RV32ZBB-NEXT: .LBB21_2: ; RV32ZBB-NEXT: ctz a0, a0 ; RV32ZBB-NEXT: li a1, 0 ; RV32ZBB-NEXT: ret @@ -1608,24 +2618,24 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, %hi(.LCPI17_0) -; RV64I-NEXT: ld a1, %lo(.LCPI17_0)(a1) -; RV64I-NEXT: lui a2, %hi(.LCPI17_1) -; RV64I-NEXT: ld a2, %lo(.LCPI17_1)(a2) +; RV64I-NEXT: lui a1, %hi(.LCPI23_0) +; RV64I-NEXT: ld a1, %lo(.LCPI23_0)(a1) +; RV64I-NEXT: lui a2, %hi(.LCPI23_1) +; RV64I-NEXT: ld a2, %lo(.LCPI23_1)(a2) ; RV64I-NEXT: srli a3, a0, 1 ; RV64I-NEXT: and a1, a3, a1 ; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: and a1, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a2, %hi(.LCPI17_2) -; RV64I-NEXT: ld a2, %lo(.LCPI17_2)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI23_2) +; RV64I-NEXT: ld a2, %lo(.LCPI23_2)(a2) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: lui a1, %hi(.LCPI17_3) -; RV64I-NEXT: ld a1, %lo(.LCPI17_3)(a1) +; RV64I-NEXT: lui a1, %hi(.LCPI23_3) +; RV64I-NEXT: ld a1, %lo(.LCPI23_3)(a1) ; RV64I-NEXT: call __muldi3@plt ; RV64I-NEXT: srli a0, a0, 56 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload From 47d7e922d8438c801198a2901a01fcd2cfbdb353 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 13:59:59 -0800 Subject: [PATCH 311/946] [mlir] Ensure a newline at the end of a file (NFC) --- mlir/lib/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/CMakeLists.txt b/mlir/lib/CMakeLists.txt index 467b0ae33dd87..05bfbb47e28e7 100644 --- a/mlir/lib/CMakeLists.txt +++ b/mlir/lib/CMakeLists.txt @@ -20,4 +20,4 @@ add_subdirectory(Translation) # Only enable the ExecutionEngine if the native target is configured in. if(TARGET ${LLVM_NATIVE_ARCH}) add_subdirectory(ExecutionEngine) -endif() \ No newline at end of file +endif() From fa90fc6e0566a245cafa0afa4da4967cf4831779 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 14:00:01 -0800 Subject: [PATCH 312/946] [Sema] Fix a bugprone argument comment (NFC) Identified with bugprone-argument-comment. --- clang/lib/Sema/SemaTemplateDeduction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 8e1f1d294d6ec..22dd395d99439 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -5355,7 +5355,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2, bool AtLeastAsSpecialized; S.runWithSufficientStackSpace(Info.getLocation(), [&] { AtLeastAsSpecialized = !FinishTemplateArgumentDeduction( - S, P2, /*PartialOrdering=*/true, + S, P2, /*IsPartialOrdering=*/true, TemplateArgumentList(TemplateArgumentList::OnStack, TST1->template_arguments()), Deduced, Info); From 448d0dfab701ab00d081136d8373304fd91693a0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 14:00:03 -0800 Subject: [PATCH 313/946] [Analysis] Remove a redundant const from a return type (NFC) Identified with readability-const-return-type. --- llvm/lib/Analysis/ScalarEvolution.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e2d5df84be52f..3019ff526b66d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3486,7 +3486,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, return S; } -static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { +const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { APInt A = C1->getAPInt().abs(); APInt B = C2->getAPInt().abs(); uint32_t ABW = A.getBitWidth(); From ab4756338c5b2216d52d9152b2f7e65f233c4dac Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 21 Jan 2022 20:20:39 -0800 Subject: [PATCH 314/946] DebugInfo: Don't put types in type units if they reference internal linkage types Doing this causes a declaration of the internal linkage (anonymous namespace) type to be emitted in the type unit, which would then be ambiguous as to which internal linkage definition it refers to (since the name is only valid internally). It's possible these internal linkage types could be resolved relative to the unit the TU is referred to from - but that doesn't seem ideal, and there's no reason to put the type in a type unit since it can only be defined in one CU anyway (since otherwise it'd be an ODR violation) & so avoiding the type unit should be a smaller DWARF encoding anyway. This also addresses an issue with Simplified Template Names where the template parameter could not be rebuilt from the declaration emitted into the TU (specifically for an enum non-type template parameter, where looking up the enumerators is necessary to rebuild the full template name) --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 12 +++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 3 ++ llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 22 +++++++-- llvm/test/DebugInfo/X86/tu-to-non-tu.ll | 54 +++++++++++----------- 4 files changed, 58 insertions(+), 33 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 609b568f28beb..680b9586228f7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3367,7 +3367,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // Fast path if we're building some type units and one has already used the // address pool we know we're going to throw away all this work anyway, so // don't bother building dependent types. - if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) + if (!TypeUnitsUnderConstruction.empty() && + (AddrPool.hasBeenUsed() || SeenLocalType)) return; auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); @@ -3378,6 +3379,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); + SeenLocalType = false; auto OwnedUnit = std::make_unique(CU, Asm, this, &InfoHolder, getDwoLineTable(CU)); @@ -3421,7 +3423,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // Types referencing entries in the address table cannot be placed in type // units. - if (AddrPool.hasBeenUsed()) { + if (AddrPool.hasBeenUsed() || SeenLocalType) { // Remove all the types built while building this type. // This is pessimistic as some of these types might not be dependent on @@ -3449,14 +3451,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), + AddrPoolUsed(DD->AddrPool.hasBeenUsed()), + SeenLocalType(DD->SeenLocalType) { DD->TypeUnitsUnderConstruction.clear(); DD->AddrPool.resetUsedFlag(); + DD->SeenLocalType = false; } DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); DD->AddrPool.resetUsedFlag(AddrPoolUsed); + DD->SeenLocalType = SeenLocalType; } DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4e1a1b1e068df..0043000652e89 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -433,6 +433,7 @@ class DwarfDebug : public DebugHandlerBase { DenseMap StringTypeLocMap; AddressPool AddrPool; + bool SeenLocalType = false; /// Accelerator tables. AccelTable AccelDebugNames; @@ -671,6 +672,7 @@ class DwarfDebug : public DebugHandlerBase { DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; bool AddrPoolUsed; + bool SeenLocalType; friend class DwarfDebug; NonTypeUnitContext(DwarfDebug *DD); public: @@ -679,6 +681,7 @@ class DwarfDebug : public DebugHandlerBase { }; NonTypeUnitContext enterNonTypeUnitContext(); + void seenLocalType() { SeenLocalType = true; } /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 6b2eb8f2bf1d9..956ed33b9eaa1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -597,10 +597,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, // Skip updating the accelerator tables since this is not the full type. if (MDString *TypeId = CTy->getRawIdentifier()) DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); - else { - auto X = DD->enterNonTypeUnitContext(); + else finishNonUnitTypeDIE(TyDIE, CTy); - } return &TyDIE; } constructTypeDIE(TyDIE, CTy); @@ -1842,5 +1840,23 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { StringRef Name = CTy->getName(); if (!Name.empty()) addString(D, dwarf::DW_AT_name, Name); + // If the type is in an anonymous namespace, we can't reference it from a TU + // (since the type would be CU local and the TU doesn't specify which TU has + // the appropriate type definition) - so flag this emission as such and skip + // the rest of the emission now since we're going to throw out all this work + // and put the outer/referencing type in the CU instead. + // FIXME: Probably good to generalize this to a DICompositeType flag populated + // by the frontend, then we could use that to have types that can have + // decl+def merged by LTO but where the definition still doesn't go in a type + // unit because the type has only one definition. + for (DIScope *S = CTy->getScope(); S; S = S->getScope()) { + if (auto *NS = dyn_cast(S)) { + if (NS->getName().empty()) { + DD->seenLocalType(); + break; + } + } + } + auto X = DD->enterNonTypeUnitContext(); getCU().createTypeDIE(CTy); } diff --git a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll index cdd4f52fc7252..6b9e0d9e60999 100644 --- a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll +++ b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll @@ -1,28 +1,24 @@ ; RUN: llc -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu < %s \ ; RUN: | llvm-dwarfdump -debug-info -debug-types - | FileCheck %s -; Test that a type unit referencing a non-type unit (in this case, it's -; bordering on an ODR violation - a type with linkage references a type without -; linkage, so there's no way for the first type to be defined in more than one -; translation unit, so there's no need for it to be in a type unit - but this -; is quirky/rare and an easy way to test a broader issue). The type unit should -; not end up with a whole definition of the referenced type - instead it should -; have a declaration of the type, while the definition remains in the primary -; CU. -; (again, arguably in this instance - since the type is only referenced once, it -; could go in the TU only - but that requires tracking usage & then deciding -; where to put types, which isn't worthwhile right now) +; Test that a type unit referencing a non-type unit produces a declaration of +; the referent in the referee. + +; Also check that an attempt to reference an internal linkage (defined in an anonymous +; namespace) type from a type unit (could happen with a pimpl idiom, for instance - +; it does mean the linkage-having type can only be defined in one translation +; unit anyway) forces the referent to not be placed in a type unit (because the +; declaration of the internal linkage type would be ambiguous/wouldn't allow a +; consumer to find the definition with certainty) ; CHECK: Type Unit: ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name {{.*}}"bar" +; CHECK-NEXT: DW_AT_name {{.*}}"t1" -; CHECK: DW_TAG_namespace -; CHECK-NOT: {{DW_AT_name|DW_TAG}} ; CHECK: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_declaration -; CHECK-NEXT: DW_AT_name {{.*}}"foo" +; CHECK-NEXT: DW_AT_name {{.*}}"t2" ; CHECK: Compile Unit: @@ -30,32 +26,36 @@ ; CHECK-NEXT: DW_AT_declaration ; CHECK-NEXT: DW_AT_signature -; CHECK: DW_TAG_namespace -; CHECK-NOT: {{DW_AT_name|DW_TAG}} ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name {{.*}}"foo" +; CHECK-NEXT: DW_AT_name {{.*}}"t2" ; CHECK-NEXT: DW_AT_byte_size -%struct.bar = type { %"struct.(anonymous namespace)::foo" } -%"struct.(anonymous namespace)::foo" = type { i8 } +; CHECK: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_name {{.*}}"t3" -@b = global %struct.bar zeroinitializer, align 1, !dbg !0 +; CHECK: DW_TAG_namespace +; CHECK-NOT: {{DW_TAG|DW_AT}} + +; CHECK: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_name {{.*}}"t4" !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!11, !13} !llvm.ident = !{!12} -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "b", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "tu-to-non-tu.dwo", emissionKind: FullDebug, enums: !4, globals: !5) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "tu-to-non-tu.dwo", emissionKind: FullDebug, enums: !4, retainedTypes: !14) !3 = !DIFile(filename: "tu.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch") !4 = !{} -!5 = !{!0} -!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "bar", file: !3, line: 5, size: 8, elements: !7, identifier: "_ZTS3bar") +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 5, size: 8, elements: !7, identifier: "_ZTS2t1") !7 = !{!8} !8 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !6, file: !3, line: 6, baseType: !9, size: 8) -!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", scope: !10, file: !3, line: 2, size: 8, elements: !4) +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t2", file: !3, line: 2, size: 8, elements: !4) !10 = !DINamespace(scope: null) !11 = !{i32 2, !"Debug Info Version", i32 3} !12 = !{!"clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)"} !13 = !{i32 2, !"Dwarf Version", i32 5} +!14 = !{!6, !15} +!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t3", file: !3, line: 5, size: 8, elements: !16, identifier: "_ZTS2t3") +!16 = !{!17} +!17 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !15, file: !3, line: 6, baseType: !18, size: 8) +!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t4", scope: !10, file: !3, line: 2, size: 8, elements: !4) From 7c77df1528c8ef6ab430bca18661aaf578066b96 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 23 Jan 2022 22:48:26 +0000 Subject: [PATCH 315/946] [X86] Add some basic tests for PR46809 --- llvm/test/CodeGen/X86/select-lea.ll | 177 ++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 llvm/test/CodeGen/X86/select-lea.ll diff --git a/llvm/test/CodeGen/X86/select-lea.ll b/llvm/test/CodeGen/X86/select-lea.ll new file mode 100644 index 0000000000000..4b50fdc2ca4e0 --- /dev/null +++ b/llvm/test/CodeGen/X86/select-lea.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=CMOV +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=NOCMOV + +; PR46809 + +define i32 @sadd_add_imm(i32 %x, i32 %y) { +; X64-LABEL: sadd_add_imm: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rsi), %eax +; X64-NEXT: addl $100, %eax +; X64-NEXT: addl %esi, %edi +; X64-NEXT: cmovnol %edi, %eax +; X64-NEXT: retq +; +; CMOV-LABEL: sadd_add_imm: +; CMOV: # %bb.0: +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CMOV-NEXT: leal (%eax,%ecx), %edx +; CMOV-NEXT: addl $100, %edx +; CMOV-NEXT: addl %ecx, %eax +; CMOV-NEXT: cmovol %edx, %eax +; CMOV-NEXT: retl +; +; NOCMOV-LABEL: sadd_add_imm: +; NOCMOV: # %bb.0: +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; NOCMOV-NEXT: leal (%eax,%edx), %ecx +; NOCMOV-NEXT: addl %edx, %eax +; NOCMOV-NEXT: jno .LBB0_2 +; NOCMOV-NEXT: # %bb.1: +; NOCMOV-NEXT: addl $100, %ecx +; NOCMOV-NEXT: movl %ecx, %eax +; NOCMOV-NEXT: .LBB0_2: +; NOCMOV-NEXT: retl + %o = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) + %v1 = extractvalue { i32, i1 } %o, 1 + %v2 = extractvalue { i32, i1 } %o, 0 + %a = add i32 %v2, 100 + %r = select i1 %v1, i32 %a, i32 %v2 + ret i32 %r +} + +define i32 @uadd_add_imm(i32 %x, i32 %y) { +; X64-LABEL: uadd_add_imm: +; X64: # %bb.0: +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rsi), %eax +; X64-NEXT: addl $100, %eax +; X64-NEXT: addl %esi, %edi +; X64-NEXT: cmovael %edi, %eax +; X64-NEXT: retq +; +; CMOV-LABEL: uadd_add_imm: +; CMOV: # %bb.0: +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CMOV-NEXT: leal (%eax,%ecx), %edx +; CMOV-NEXT: addl $100, %edx +; CMOV-NEXT: addl %ecx, %eax +; CMOV-NEXT: cmovbl %edx, %eax +; CMOV-NEXT: retl +; +; NOCMOV-LABEL: uadd_add_imm: +; NOCMOV: # %bb.0: +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; NOCMOV-NEXT: leal (%eax,%edx), %ecx +; NOCMOV-NEXT: addl %edx, %eax +; NOCMOV-NEXT: jae .LBB1_2 +; NOCMOV-NEXT: # %bb.1: +; NOCMOV-NEXT: addl $100, %ecx +; NOCMOV-NEXT: movl %ecx, %eax +; NOCMOV-NEXT: .LBB1_2: +; NOCMOV-NEXT: retl + %o = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + %v1 = extractvalue { i32, i1 } %o, 1 + %v2 = extractvalue { i32, i1 } %o, 0 + %a = add i32 %v2, 100 + %r = select i1 %v1, i32 %a, i32 %v2 + ret i32 %r +} + +define i32 @ssub_add_imm(i32 %x, i32 %y) { +; X64-LABEL: ssub_add_imm: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: addl $100, %eax +; X64-NEXT: subl %esi, %edi +; X64-NEXT: cmovnol %edi, %eax +; X64-NEXT: retq +; +; CMOV-LABEL: ssub_add_imm: +; CMOV: # %bb.0: +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CMOV-NEXT: movl %eax, %edx +; CMOV-NEXT: subl %ecx, %edx +; CMOV-NEXT: addl $100, %edx +; CMOV-NEXT: subl %ecx, %eax +; CMOV-NEXT: cmovol %edx, %eax +; CMOV-NEXT: retl +; +; NOCMOV-LABEL: ssub_add_imm: +; NOCMOV: # %bb.0: +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; NOCMOV-NEXT: movl %eax, %ecx +; NOCMOV-NEXT: subl %edx, %ecx +; NOCMOV-NEXT: subl %edx, %eax +; NOCMOV-NEXT: jno .LBB2_2 +; NOCMOV-NEXT: # %bb.1: +; NOCMOV-NEXT: addl $100, %ecx +; NOCMOV-NEXT: movl %ecx, %eax +; NOCMOV-NEXT: .LBB2_2: +; NOCMOV-NEXT: retl + %o = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y) + %v1 = extractvalue { i32, i1 } %o, 1 + %v2 = extractvalue { i32, i1 } %o, 0 + %a = add i32 %v2, 100 + %r = select i1 %v1, i32 %a, i32 %v2 + ret i32 %r +} + +define i32 @usub_add_imm(i32 %x, i32 %y) { +; X64-LABEL: usub_add_imm: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: addl $100, %eax +; X64-NEXT: subl %esi, %edi +; X64-NEXT: cmovael %edi, %eax +; X64-NEXT: retq +; +; CMOV-LABEL: usub_add_imm: +; CMOV: # %bb.0: +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CMOV-NEXT: movl %eax, %edx +; CMOV-NEXT: subl %ecx, %edx +; CMOV-NEXT: addl $100, %edx +; CMOV-NEXT: subl %ecx, %eax +; CMOV-NEXT: cmovbl %edx, %eax +; CMOV-NEXT: retl +; +; NOCMOV-LABEL: usub_add_imm: +; NOCMOV: # %bb.0: +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; NOCMOV-NEXT: movl %eax, %ecx +; NOCMOV-NEXT: subl %edx, %ecx +; NOCMOV-NEXT: subl %edx, %eax +; NOCMOV-NEXT: jae .LBB3_2 +; NOCMOV-NEXT: # %bb.1: +; NOCMOV-NEXT: addl $100, %ecx +; NOCMOV-NEXT: movl %ecx, %eax +; NOCMOV-NEXT: .LBB3_2: +; NOCMOV-NEXT: retl + %o = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y) + %v1 = extractvalue { i32, i1 } %o, 1 + %v2 = extractvalue { i32, i1 } %o, 0 + %a = add i32 %v2, 100 + %r = select i1 %v1, i32 %a, i32 %v2 + ret i32 %r +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) From 2e58a18910867ba6795066e044293e6daf89edf5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 23 Jan 2022 16:07:43 -0800 Subject: [PATCH 316/946] DebugInfo: Include template parameters for simplified template decls in type units LLVM DebugInfo CodeGen synthesizes type declarations in type units when referencing types that are not in type units. When those synthesized types are templates and simplified template names (or mangled simplified template names) are in use, the template arguments must be attached to those declarations. A deeper fix (with a CU or DICompositeType flag) that would also support other uses of clang's -debug-forward-template-args (such as Sony's platform) could/should be implemented to fix this more broadly. --- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 2 + llvm/test/DebugInfo/X86/tu-to-non-tu.ll | 308 +++++++++++++++++++--- 2 files changed, 280 insertions(+), 30 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 956ed33b9eaa1..bfc98580002c5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1840,6 +1840,8 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { StringRef Name = CTy->getName(); if (!Name.empty()) addString(D, dwarf::DW_AT_name, Name); + if (Name.startswith("_STN") || !Name.contains('<')) + addTemplateParams(D, CTy->getTemplateParams()); // If the type is in an anonymous namespace, we can't reference it from a TU // (since the type would be CU local and the TU doesn't specify which TU has // the appropriate type definition) - so flag this emission as such and skip diff --git a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll index 6b9e0d9e60999..517872d2b92e9 100644 --- a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll +++ b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll @@ -11,51 +11,299 @@ ; declaration of the internal linkage type would be ambiguous/wouldn't allow a ; consumer to find the definition with certainty) -; CHECK: Type Unit: +; Built from the following source, compiled with this command: +; $ clang++-tot decl.cpp -g -fdebug-types-section -c +; And modified (as noted in the comments) to produce some "simplified" and "mangled" +; simplified template names, to ensure they get template parameters in declarations +; created in type units. +; struct non_tu { +; virtual void f1(); +; }; +; void non_tu::f1() {} +; struct tu_ref_non_tu { +; non_tu v1; +; }; +; tu_ref_non_tu v1; +; +; // Reference internal +; namespace { +; struct internal {}; +; } // namespace +; struct ref_internal { +; internal i; +; }; +; ref_internal v5; +; +; +; template +; struct templ_non_tu; +; +; // Reference to (normal, non-mangled/simplified) non-tu type with template +; // parameters. +; template <> +; struct templ_non_tu { +; virtual void f1(); +; }; +; void templ_non_tu::f1() {} +; struct ref_templ_non_tu { +; templ_non_tu v1; +; }; +; ref_templ_non_tu v2; +; +; // Modify templ_non_tu's name to be simplified (strip template parameter +; // list from the "name" attribute) +; template <> +; struct templ_non_tu { +; virtual void f1(); +; }; +; void templ_non_tu::f1() {} +; struct ref_templ_non_tu_simple { +; templ_non_tu v1; +; }; +; ref_templ_non_tu_simple v3; +; +; // Modify templ_non_tu's name to be mangled ('_STN' name '|' args) +; template <> +; struct templ_non_tu { +; virtual void f1(); +; }; +; void templ_non_tu::f1() {} +; struct ref_templ_non_tu_mangled { +; templ_non_tu v1; +; }; +; ref_templ_non_tu_mangled v4; + + + +; CHECK-LABEL: Type Unit: +; CHECK: DW_TAG_structure_type +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}}"tu_ref_non_tu" + +; CHECK-LABEL: Type Unit: +; CHECK: DW_TAG_structure_type +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}}"ref_templ_non_tu" ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name {{.*}}"t1" +; CHECK-NEXT: DW_AT_declaration (true) +; CHECK-NEXT: DW_AT_name ("templ_non_tu") +; CHECK-LABEL: Type Unit: ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_declaration -; CHECK-NEXT: DW_AT_name {{.*}}"t2" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}}"ref_templ_non_tu_simple" +; CHECK: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_declaration (true) +; CHECK-NEXT: DW_AT_name ("templ_non_tu") +; CHECK-NOT: DW_TAG +; CHECK: DW_TAG_template_type_parameter +; CHECK-NEXT: DW_AT_type {{.*}}"long" + +; CHECK-LABEL: Type Unit: +; CHECK: DW_TAG_structure_type +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}}"ref_templ_non_tu_mangled" +; CHECK: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_declaration (true) +; CHECK-NEXT: DW_AT_name ("_STNtempl_non_tu|") +; CHECK-NOT: DW_TAG +; CHECK: DW_TAG_template_type_parameter +; CHECK-NEXT: DW_AT_type {{.*}}"bool" + -; CHECK: Compile Unit: +; CHECK-LABEL: Compile Unit: ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_declaration -; CHECK-NEXT: DW_AT_signature +; CHECK-NEXT: DW_AT_declaration (true) +; CHECK-NEXT: DW_AT_signature (0xb1cde890d320f5c2) ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name {{.*}}"t2" -; CHECK-NEXT: DW_AT_byte_size +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}}"non_tu" + ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name {{.*}}"t3" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}}"ref_internal" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_byte_size ; CHECK: DW_TAG_namespace ; CHECK-NOT: {{DW_TAG|DW_AT}} - ; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name {{.*}}"t4" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_name {{.*}}"internal" + +%struct.ref_internal = type { %"struct.(anonymous namespace)::internal" } +%"struct.(anonymous namespace)::internal" = type { i8 } +%struct.non_tu = type { i32 (...)** } +%struct.templ_non_tu = type { i32 (...)** } +%struct.templ_non_tu.0 = type { i32 (...)** } +%struct.templ_non_tu.1 = type { i32 (...)** } + +@_ZTV6non_tu = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI6non_tu to i8*), i8* bitcast (void (%struct.non_tu*)* @_ZN6non_tu2f1Ev to i8*)] }, align 8 +@v1 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV6non_tu, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !0 +@v5 = dso_local global %struct.ref_internal zeroinitializer, align 1, !dbg !5 +@_ZTV12templ_non_tuIiE = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI12templ_non_tuIiE to i8*), i8* bitcast (void (%struct.templ_non_tu*)* @_ZN12templ_non_tuIiE2f1Ev to i8*)] }, align 8 +@v2 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV12templ_non_tuIiE, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !13 +@_ZTV12templ_non_tuIlE = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI12templ_non_tuIlE to i8*), i8* bitcast (void (%struct.templ_non_tu.0*)* @_ZN12templ_non_tuIlE2f1Ev to i8*)] }, align 8 +@v3 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV12templ_non_tuIlE, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !32 +@_ZTV12templ_non_tuIbE = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI12templ_non_tuIbE to i8*), i8* bitcast (void (%struct.templ_non_tu.1*)* @_ZN12templ_non_tuIbE2f1Ev to i8*)] }, align 8 +@v4 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV12templ_non_tuIbE, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !46 +@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* +@_ZTS6non_tu = dso_local constant [8 x i8] c"6non_tu\00", align 1 +@_ZTI6non_tu = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @_ZTS6non_tu, i32 0, i32 0) }, align 8 +@_ZTS12templ_non_tuIiE = dso_local constant [18 x i8] c"12templ_non_tuIiE\00", align 1 +@_ZTI12templ_non_tuIiE = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([18 x i8], [18 x i8]* @_ZTS12templ_non_tuIiE, i32 0, i32 0) }, align 8 +@_ZTS12templ_non_tuIlE = dso_local constant [18 x i8] c"12templ_non_tuIlE\00", align 1 +@_ZTI12templ_non_tuIlE = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([18 x i8], [18 x i8]* @_ZTS12templ_non_tuIlE, i32 0, i32 0) }, align 8 +@_ZTS12templ_non_tuIbE = dso_local constant [18 x i8] c"12templ_non_tuIbE\00", align 1 +@_ZTI12templ_non_tuIbE = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([18 x i8], [18 x i8]* @_ZTS12templ_non_tuIbE, i32 0, i32 0) }, align 8 + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @_ZN6non_tu2f1Ev(%struct.non_tu* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !76 { +entry: + %this.addr = alloca %struct.non_tu*, align 8 + store %struct.non_tu* %this, %struct.non_tu** %this.addr, align 8 + call void @llvm.dbg.declare(metadata %struct.non_tu** %this.addr, metadata !77, metadata !DIExpression()), !dbg !79 + %this1 = load %struct.non_tu*, %struct.non_tu** %this.addr, align 8 + ret void, !dbg !80 +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @_ZN12templ_non_tuIiE2f1Ev(%struct.templ_non_tu* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !81 { +entry: + %this.addr = alloca %struct.templ_non_tu*, align 8 + store %struct.templ_non_tu* %this, %struct.templ_non_tu** %this.addr, align 8 + call void @llvm.dbg.declare(metadata %struct.templ_non_tu** %this.addr, metadata !82, metadata !DIExpression()), !dbg !84 + %this1 = load %struct.templ_non_tu*, %struct.templ_non_tu** %this.addr, align 8 + ret void, !dbg !85 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @_ZN12templ_non_tuIlE2f1Ev(%struct.templ_non_tu.0* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !86 { +entry: + %this.addr = alloca %struct.templ_non_tu.0*, align 8 + store %struct.templ_non_tu.0* %this, %struct.templ_non_tu.0** %this.addr, align 8 + call void @llvm.dbg.declare(metadata %struct.templ_non_tu.0** %this.addr, metadata !87, metadata !DIExpression()), !dbg !89 + %this1 = load %struct.templ_non_tu.0*, %struct.templ_non_tu.0** %this.addr, align 8 + ret void, !dbg !90 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @_ZN12templ_non_tuIbE2f1Ev(%struct.templ_non_tu.1* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !91 { +entry: + %this.addr = alloca %struct.templ_non_tu.1*, align 8 + store %struct.templ_non_tu.1* %this, %struct.templ_non_tu.1** %this.addr, align 8 + call void @llvm.dbg.declare(metadata %struct.templ_non_tu.1** %this.addr, metadata !92, metadata !DIExpression()), !dbg !94 + %this1 = load %struct.templ_non_tu.1*, %struct.templ_non_tu.1** %this.addr, align 8 + ret void, !dbg !95 +} + +attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } !llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!11, !13} -!llvm.ident = !{!12} - -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "tu-to-non-tu.dwo", emissionKind: FullDebug, enums: !4, retainedTypes: !14) -!3 = !DIFile(filename: "tu.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch") -!4 = !{} -!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 5, size: 8, elements: !7, identifier: "_ZTS2t1") -!7 = !{!8} -!8 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !6, file: !3, line: 6, baseType: !9, size: 8) -!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t2", file: !3, line: 2, size: 8, elements: !4) -!10 = !DINamespace(scope: null) -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{!"clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)"} -!13 = !{i32 2, !"Dwarf Version", i32 5} -!14 = !{!6, !15} -!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t3", file: !3, line: 5, size: 8, elements: !16, identifier: "_ZTS2t3") +!llvm.module.flags = !{!70, !71, !72, !73, !74} +!llvm.ident = !{!75} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "v1", scope: !2, file: !3, line: 8, type: !60, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git ab4756338c5b2216d52d9152b2f7e65f233c4dac)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "decl.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch") +!4 = !{!0, !5, !13, !32, !46} +!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression()) +!6 = distinct !DIGlobalVariable(name: "v5", scope: !2, file: !3, line: 17, type: !7, isLocal: false, isDefinition: true) +!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_internal", file: !3, line: 14, size: 8, flags: DIFlagTypePassByValue, elements: !8, identifier: "_ZTS12ref_internal") +!8 = !{!9} +!9 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !7, file: !3, line: 15, baseType: !10, size: 8) +!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "internal", scope: !11, file: !3, line: 12, size: 8, flags: DIFlagTypePassByValue, elements: !12) +!11 = !DINamespace(scope: null) +!12 = !{} +!13 = !DIGlobalVariableExpression(var: !14, expr: !DIExpression()) +!14 = distinct !DIGlobalVariable(name: "v2", scope: !2, file: !3, line: 33, type: !15, isLocal: false, isDefinition: true) +!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_templ_non_tu", file: !3, line: 30, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !16, identifier: "_ZTS16ref_templ_non_tu") !16 = !{!17} -!17 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !15, file: !3, line: 6, baseType: !18, size: 8) -!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t4", scope: !10, file: !3, line: 2, size: 8, elements: !4) +!17 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !15, file: !3, line: 31, baseType: !18, size: 64) +!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "templ_non_tu", file: !3, line: 26, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !19, vtableHolder: !18, templateParams: !30) +!19 = !{!20, !26} +!20 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$templ_non_tu", scope: !3, file: !3, baseType: !21, size: 64, flags: DIFlagArtificial) +!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64) +!22 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "__vtbl_ptr_type", baseType: !23, size: 64) +!23 = !DISubroutineType(types: !24) +!24 = !{!25} +!25 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!26 = !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIiE2f1Ev", scope: !18, file: !3, line: 27, type: !27, scopeLine: 27, containingType: !18, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual) +!27 = !DISubroutineType(types: !28) +!28 = !{null, !29} +!29 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!30 = !{!31} +!31 = !DITemplateTypeParameter(name: "T", type: !25) +!32 = !DIGlobalVariableExpression(var: !33, expr: !DIExpression()) +!33 = distinct !DIGlobalVariable(name: "v3", scope: !2, file: !3, line: 45, type: !34, isLocal: false, isDefinition: true) +!34 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_templ_non_tu_simple", file: !3, line: 42, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !35, identifier: "_ZTS23ref_templ_non_tu_simple") +!35 = !{!36} +!36 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !34, file: !3, line: 43, baseType: !37, size: 64) +!37 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "templ_non_tu", file: !3, line: 38, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !38, vtableHolder: !37, templateParams: !43) +!38 = !{!20, !39} +!39 = !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIlE2f1Ev", scope: !37, file: !3, line: 39, type: !40, scopeLine: 39, containingType: !37, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual) +!40 = !DISubroutineType(types: !41) +!41 = !{null, !42} +!42 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !37, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!43 = !{!44} +!44 = !DITemplateTypeParameter(name: "T", type: !45) +!45 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!46 = !DIGlobalVariableExpression(var: !47, expr: !DIExpression()) +!47 = distinct !DIGlobalVariable(name: "v4", scope: !2, file: !3, line: 56, type: !48, isLocal: false, isDefinition: true) +!48 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_templ_non_tu_mangled", file: !3, line: 53, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !49, identifier: "_ZTS24ref_templ_non_tu_mangled") +!49 = !{!50} +!50 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !48, file: !3, line: 54, baseType: !51, size: 64) +!51 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "_STNtempl_non_tu|", file: !3, line: 49, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !52, vtableHolder: !51, templateParams: !57) +!52 = !{!20, !53} +!53 = !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIbE2f1Ev", scope: !51, file: !3, line: 50, type: !54, scopeLine: 50, containingType: !51, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual) +!54 = !DISubroutineType(types: !55) +!55 = !{null, !56} +!56 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !51, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!57 = !{!58} +!58 = !DITemplateTypeParameter(name: "T", type: !59) +!59 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean) +!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "tu_ref_non_tu", file: !3, line: 5, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !61, identifier: "_ZTS13tu_ref_non_tu") +!61 = !{!62} +!62 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !60, file: !3, line: 6, baseType: !63, size: 64) +!63 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "non_tu", file: !3, line: 1, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !64, vtableHolder: !63) +!64 = !{!65, !66} +!65 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$non_tu", scope: !3, file: !3, baseType: !21, size: 64, flags: DIFlagArtificial) +!66 = !DISubprogram(name: "f1", linkageName: "_ZN6non_tu2f1Ev", scope: !63, file: !3, line: 2, type: !67, scopeLine: 2, containingType: !63, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual) +!67 = !DISubroutineType(types: !68) +!68 = !{null, !69} +!69 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !63, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!70 = !{i32 7, !"Dwarf Version", i32 5} +!71 = !{i32 2, !"Debug Info Version", i32 3} +!72 = !{i32 1, !"wchar_size", i32 4} +!73 = !{i32 7, !"uwtable", i32 1} +!74 = !{i32 7, !"frame-pointer", i32 2} +!75 = !{!"clang version 14.0.0 (git@github.com:llvm/llvm-project.git ab4756338c5b2216d52d9152b2f7e65f233c4dac)"} +!76 = distinct !DISubprogram(name: "f1", linkageName: "_ZN6non_tu2f1Ev", scope: !63, file: !3, line: 4, type: !67, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !66, retainedNodes: !12) +!77 = !DILocalVariable(name: "this", arg: 1, scope: !76, type: !78, flags: DIFlagArtificial | DIFlagObjectPointer) +!78 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !63, size: 64) +!79 = !DILocation(line: 0, scope: !76) +!80 = !DILocation(line: 4, column: 20, scope: !76) +!81 = distinct !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIiE2f1Ev", scope: !18, file: !3, line: 29, type: !27, scopeLine: 29, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !26, retainedNodes: !12) +!82 = !DILocalVariable(name: "this", arg: 1, scope: !81, type: !83, flags: DIFlagArtificial | DIFlagObjectPointer) +!83 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64) +!84 = !DILocation(line: 0, scope: !81) +!85 = !DILocation(line: 29, column: 31, scope: !81) +!86 = distinct !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIlE2f1Ev", scope: !37, file: !3, line: 41, type: !40, scopeLine: 41, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !39, retainedNodes: !12) +!87 = !DILocalVariable(name: "this", arg: 1, scope: !86, type: !88, flags: DIFlagArtificial | DIFlagObjectPointer) +!88 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !37, size: 64) +!89 = !DILocation(line: 0, scope: !86) +!90 = !DILocation(line: 41, column: 32, scope: !86) +!91 = distinct !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIbE2f1Ev", scope: !51, file: !3, line: 52, type: !54, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !53, retainedNodes: !12) +!92 = !DILocalVariable(name: "this", arg: 1, scope: !91, type: !93, flags: DIFlagArtificial | DIFlagObjectPointer) +!93 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !51, size: 64) +!94 = !DILocation(line: 0, scope: !91) +!95 = !DILocation(line: 52, column: 32, scope: !91) From 3a3af2bbc97e7db045eccb8683e93b9aa7ef562b Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 24 Jan 2022 10:22:33 +0800 Subject: [PATCH 317/946] [C++20] [Module] fix bug 47716 and implement [module.interface]/p6 This fixes bug 47716. According to [module.interface]p2, it is meaningless to export an entity which is not in namespace scope. The reason why the compiler crashes is that the compiler missed ExportDecl when the compiler traverse the subclass of DeclContext. So here is the crash. Also, the patch implements [module.interface]p6 in Sema::CheckRedeclaration* functions. Reviewed By: aaron.ballman, urnathan Differential Revision: https://reviews.llvm.org/D112903 --- clang/include/clang/AST/DeclBase.h | 14 +++ .../clang/Basic/DiagnosticSemaKinds.td | 5 + clang/include/clang/Sema/Sema.h | 2 + clang/lib/AST/DeclBase.cpp | 9 ++ clang/lib/Sema/SemaDecl.cpp | 49 +++++++++- clang/lib/Sema/SemaDeclCXX.cpp | 2 +- clang/lib/Sema/SemaTemplate.cpp | 2 +- .../test/CXX/module/module.interface/p2-2.cpp | 37 ++++++++ clang/test/CXX/module/module.interface/p6.cpp | 93 +++++++++++++++++++ 9 files changed, 207 insertions(+), 6 deletions(-) create mode 100644 clang/test/CXX/module/module.interface/p2-2.cpp create mode 100644 clang/test/CXX/module/module.interface/p6.cpp diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 1328d377d00fa..06d2f17d14300 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -607,6 +607,20 @@ class alignas(8) Decl { return getModuleOwnershipKind() == ModuleOwnershipKind::ModulePrivate; } + /// Whether this declaration was exported in a lexical context. + /// e.g.: + /// + /// export namespace A { + /// void f1(); // isInExportDeclContext() == true + /// } + /// void A::f1(); // isInExportDeclContext() == false + /// + /// namespace B { + /// void f2(); // isInExportDeclContext() == false + /// } + /// export void B::f2(); // isInExportDeclContext() == true + bool isInExportDeclContext() const; + /// Return true if this declaration has an attribute which acts as /// definition of the entity, such as 'alias' or 'ifunc'. bool hasDefiningAttr() const; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 88e430d8eb09f..7fccdcaa9fc6a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7803,6 +7803,11 @@ def err_expected_class_or_namespace : Error<"%0 is not a class" "%select{ or namespace|, namespace, or enumeration}1">; def err_invalid_declarator_scope : Error<"cannot define or redeclare %0 here " "because namespace %1 does not enclose namespace %2">; +def err_export_non_namespace_scope_name : Error< + "cannot export %0 as it is not at namespace scope">; +def err_redeclaration_non_exported : Error < + "cannot export redeclaration %0 here since the previous declaration is not " + "exported">; def err_invalid_declarator_global_scope : Error< "definition or redeclaration of %0 cannot name the global scope">; def err_invalid_declarator_in_function : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b1ef02865328f..4b609f4b1477c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4320,6 +4320,8 @@ class Sema final { bool ConsiderLinkage, bool AllowInlineNamespace); bool CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old); + bool CheckRedeclarationExported(NamedDecl *New, NamedDecl *Old); + bool CheckRedeclarationInModule(NamedDecl *New, NamedDecl *Old); void DiagnoseAmbiguousLookup(LookupResult &Result); //@} diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 98a5c6b664713..9ee1cc0830867 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -995,6 +995,15 @@ bool Decl::AccessDeclContextCheck() const { return true; } +bool Decl::isInExportDeclContext() const { + const DeclContext *DC = getLexicalDeclContext(); + + while (DC && !isa(DC)) + DC = DC->getLexicalParent(); + + return DC && isa(DC); +} + static Decl::Kind getKind(const Decl *D) { return D->getKind(); } static Decl::Kind getKind(const DeclContext *DC) { return DC->getDeclKind(); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index d4ed721e0545b..a29409461f575 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1628,6 +1628,39 @@ bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) { return false; } +// [module.interface]p6: +// A redeclaration of an entity X is implicitly exported if X was introduced by +// an exported declaration; otherwise it shall not be exported. +bool Sema::CheckRedeclarationExported(NamedDecl *New, NamedDecl *Old) { + bool IsNewExported = New->isInExportDeclContext(); + bool IsOldExported = Old->isInExportDeclContext(); + + // It should be irrevelant if both of them are not exported. + if (!IsNewExported && !IsOldExported) + return false; + + if (IsOldExported) + return false; + + assert(IsNewExported); + + Diag(New->getLocation(), diag::err_redeclaration_non_exported) << New; + Diag(Old->getLocation(), diag::note_previous_declaration); + return true; +} + +// A wrapper function for checking the semantic restrictions of +// a redeclaration within a module. +bool Sema::CheckRedeclarationInModule(NamedDecl *New, NamedDecl *Old) { + if (CheckRedeclarationModuleOwnership(New, Old)) + return true; + + if (CheckRedeclarationExported(New, Old)) + return true; + + return false; +} + static bool isUsingDecl(NamedDecl *D) { return isa(D) || isa(D) || @@ -3390,7 +3423,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, } } - if (CheckRedeclarationModuleOwnership(New, Old)) + if (CheckRedeclarationInModule(New, Old)) return true; if (!getLangOpts().CPlusPlus) { @@ -4269,7 +4302,7 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) { return New->setInvalidDecl(); } - if (CheckRedeclarationModuleOwnership(New, Old)) + if (CheckRedeclarationInModule(New, Old)) return; // Variables with external linkage are analyzed in FinalizeDeclaratorGroup. @@ -5759,7 +5792,15 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC, else if (isa(Cur)) Diag(Loc, diag::err_invalid_declarator_in_block) << Name << SS.getRange(); - else + else if (isa(Cur)) { + if (!isa(DC)) + Diag(Loc, diag::err_export_non_namespace_scope_name) + << Name << SS.getRange(); + else + // The cases that DC is not NamespaceDecl should be handled in + // CheckRedeclarationExported. + return false; + } else Diag(Loc, diag::err_invalid_declarator_scope) << Name << cast(Cur) << cast(DC) << SS.getRange(); @@ -16535,7 +16576,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SetMemberAccessSpecifier(New, PrevDecl, AS); if (PrevDecl) - CheckRedeclarationModuleOwnership(New, PrevDecl); + CheckRedeclarationInModule(New, PrevDecl); if (TUK == TUK_Definition && (!SkipBody || !SkipBody->ShouldSkip)) New->startDefinition(); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 01f0079198c74..16cdb7e577237 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -13012,7 +13012,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, AccessSpecifier AS, NewDecl->setInvalidDecl(); else if (OldDecl) { NewDecl->setPreviousDecl(OldDecl); - CheckRedeclarationModuleOwnership(NewDecl, OldDecl); + CheckRedeclarationInModule(NewDecl, OldDecl); } NewND = NewDecl; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 3278ca143dcce..64a0b45feb980 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2063,7 +2063,7 @@ DeclResult Sema::CheckClassTemplate( } if (PrevClassTemplate) - CheckRedeclarationModuleOwnership(NewTemplate, PrevClassTemplate); + CheckRedeclarationInModule(NewTemplate, PrevClassTemplate); if (Invalid) { NewTemplate->setInvalidDecl(); diff --git a/clang/test/CXX/module/module.interface/p2-2.cpp b/clang/test/CXX/module/module.interface/p2-2.cpp new file mode 100644 index 0000000000000..359e068d230af --- /dev/null +++ b/clang/test/CXX/module/module.interface/p2-2.cpp @@ -0,0 +1,37 @@ +// The intention of this file to check we could only export declarations in namesapce scope. +// +// RUN: %clang_cc1 -std=c++20 %s -verify + +export module X; + +export template +struct X { + struct iterator { + T node; + }; + void foo() {} + template + U bar(); +}; + +export template X::iterator; // expected-error {{cannot export 'iterator' as it is not at namespace scope}} +export template void X::foo(); // expected-error {{cannot export 'foo' as it is not at namespace scope}} +export template template U X::bar(); // expected-error {{cannot export 'bar' as it is not at namespace scope}} + +export struct Y { + struct iterator { + int node; + }; + void foo() {} + template + U bar(); +}; + +export struct Y::iterator; // expected-error {{cannot export 'iterator' as it is not at namespace scope}} +export void Y::foo(); // expected-error {{cannot export 'foo' as it is not at namespace scope}} +export template U Y::bar(); // expected-error {{cannot export 'bar' as it is not at namespace scope}} + +export { + template X::iterator; // expected-error {{cannot export 'iterator' as it is not at namespace scope}} + struct Y::iterator; // expected-error {{cannot export 'iterator' as it is not at namespace scope}} +} diff --git a/clang/test/CXX/module/module.interface/p6.cpp b/clang/test/CXX/module/module.interface/p6.cpp new file mode 100644 index 0000000000000..a696851ccbff4 --- /dev/null +++ b/clang/test/CXX/module/module.interface/p6.cpp @@ -0,0 +1,93 @@ +// The test is check we couldn't export a redeclaration which isn't exported previously and +// check it is OK to redeclare no matter exported nor not if is the previous declaration is exported. +// RUN: %clang_cc1 -std=c++20 %s -verify + +export module X; + +struct S { // expected-note {{previous declaration is here}} + int n; +}; +typedef S S; +export typedef S S; // OK, does not redeclare an entity +export struct S; // expected-error {{cannot export redeclaration 'S' here since the previous declaration is not exported}} + +namespace A { +struct X; // expected-note {{previous declaration is here}} +export struct Y; +} // namespace A + +namespace A { +export struct X; // expected-error {{cannot export redeclaration 'X' here since the previous declaration is not exported}} +export struct Y; // OK +struct Z; // expected-note {{previous declaration is here}} +export struct Z; // expected-error {{cannot export redeclaration 'Z' here since the previous declaration is not exported}} +} // namespace A + +namespace A { +struct B; // expected-note {{previous declaration is here}} +struct C {}; // expected-note {{previous declaration is here}} +} // namespace A + +namespace A { +export struct B {}; // expected-error {{cannot export redeclaration 'B' here since the previous declaration is not exported}} +export struct C; // expected-error {{cannot export redeclaration 'C' here since the previous declaration is not exported}} +} // namespace A + +template +struct TemplS; // expected-note {{previous declaration is here}} + +export template +struct TemplS {}; // expected-error {{cannot export redeclaration 'TemplS' here since the previous declaration is not exported}} + +template +struct TemplS2; // expected-note {{previous declaration is here}} + +export template +struct TemplS2 {}; // expected-error {{cannot export redeclaration 'TemplS2' here since the previous declaration is not exported}} + +void baz(); // expected-note {{previous declaration is here}} +export void baz(); // expected-error {{cannot export redeclaration 'baz' here since the previous declaration is not exported}} + +namespace A { +export void foo(); +void bar(); // expected-note {{previous declaration is here}} +export void bar(); // expected-error {{cannot export redeclaration 'bar' here since the previous declaration is not exported}} +void f1(); // expected-note {{previous declaration is here}} +} // namespace A + +// OK +// +// [module.interface]/p6 +// A redeclaration of an entity X is implicitly exported if X was introduced by an exported declaration +void A::foo(); + +// The compiler couldn't export A::f1() here since A::f1() is declared above without exported. +// See [module.interface]/p6 for details. +export void A::f1(); // expected-error {{cannot export redeclaration 'f1' here since the previous declaration is not exported}} + +template +void TemplFunc(); // expected-note {{previous declaration is here}} + +export template +void TemplFunc() { // expected-error {{cannot export redeclaration 'TemplFunc' here since the previous declaration is not exported}} +} + +namespace A { +template +void TemplFunc2(); // expected-note {{previous declaration is here}} +export template +void TemplFunc2() {} // expected-error {{cannot export redeclaration 'TemplFunc2' here since the previous declaration is not exported}} +template +void TemplFunc3(); // expected-note {{previous declaration is here}} +} // namespace A + +export template +void A::TemplFunc3() {} // expected-error {{cannot export redeclaration 'TemplFunc3' here since the previous declaration is not exported}} + +int var; // expected-note {{previous declaration is here}} +export int var; // expected-error {{cannot export redeclaration 'var' here since the previous declaration is not exported}} + +template +T TemplVar; // expected-note {{previous declaration is here}} +export template +T TemplVar; // expected-error {{cannot export redeclaration 'TemplVar' here since the previous declaration is not exported}} From 3f24cdec2572741f018457d5f24ef479e1291f1c Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 24 Jan 2022 10:20:16 +0800 Subject: [PATCH 318/946] [RISCV][NFC] Remove tailing whitespaces in RISCVInstrInfoVSDPatterns.td and RISCVInstrInfoVVLPatterns.td --- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 16 ++++++++-------- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 078025051e716..e452a84a9a6f2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -368,22 +368,22 @@ multiclass VPatWidenBinarySDNode_VV_VX_WV_WX(instruction_name#"_VV_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, + vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))), (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))), (!cast(instruction_name#"_VX_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, GPR:$rs1, + vti.Vti.RegClass:$rs2, GPR:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), (!cast(instruction_name#"_WV_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, + vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))), (!cast(instruction_name#"_WX_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, GPR:$rs1, + vti.Wti.RegClass:$rs2, GPR:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; } } @@ -418,12 +418,12 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF { def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))), (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), (!cast(instruction_name#"_VV_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, + vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))), (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))), (!cast(instruction_name#"_V"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX) - vti.Vti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, + vti.Vti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; } } @@ -433,12 +433,12 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF { def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), (!cast(instruction_name#"_WV_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, + vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))), (!cast(instruction_name#"_W"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX) - vti.Wti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, + vti.Wti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 28cb8fc413793..0ac959d79a024 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -616,7 +616,7 @@ multiclass VPatReductionVL { multiclass VPatBinarySDNodeExt_V_WV { foreach vti = AllWidenableIntVectors in { def : Pat< - (vti.Vti.Vector + (vti.Vti.Vector (riscv_trunc_vector_vl (op (vti.Wti.Vector vti.Wti.RegClass:$rs2), (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), @@ -631,7 +631,7 @@ multiclass VPatBinarySDNodeExt_V_WV { foreach vti = AllWidenableIntVectors in { def : Pat< - (vti.Vti.Vector + (vti.Vti.Vector (riscv_trunc_vector_vl (op (vti.Wti.Vector vti.Wti.RegClass:$rs2), (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))), From b574048239bc6fbd9dd356fbaa8bd475fa4b64e6 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 24 Jan 2022 11:03:12 +0800 Subject: [PATCH 319/946] [NFC] [Coroutines] Rename tests in coro-align This is required by ychen. See https://reviews.llvm.org/D117542 --- .../{coro-align-03.ll => coro-align16.ll} | 0 .../Transforms/Coroutines/coro-align32.ll | 60 +++++++++++++++++++ .../{coro-align-05.ll => coro-align64-02.ll} | 0 .../{coro-align-04.ll => coro-align64.ll} | 0 .../{coro-align-02.ll => coro-align8-02.ll} | 0 .../{coro-align-01.ll => coro-align8.ll} | 0 6 files changed, 60 insertions(+) rename llvm/test/Transforms/Coroutines/{coro-align-03.ll => coro-align16.ll} (100%) create mode 100644 llvm/test/Transforms/Coroutines/coro-align32.ll rename llvm/test/Transforms/Coroutines/{coro-align-05.ll => coro-align64-02.ll} (100%) rename llvm/test/Transforms/Coroutines/{coro-align-04.ll => coro-align64.ll} (100%) rename llvm/test/Transforms/Coroutines/{coro-align-02.ll => coro-align8-02.ll} (100%) rename llvm/test/Transforms/Coroutines/{coro-align-01.ll => coro-align8.ll} (100%) diff --git a/llvm/test/Transforms/Coroutines/coro-align-03.ll b/llvm/test/Transforms/Coroutines/coro-align16.ll similarity index 100% rename from llvm/test/Transforms/Coroutines/coro-align-03.ll rename to llvm/test/Transforms/Coroutines/coro-align16.ll diff --git a/llvm/test/Transforms/Coroutines/coro-align32.ll b/llvm/test/Transforms/Coroutines/coro-align32.ll new file mode 100644 index 0000000000000..8cd01f42ed15c --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-align32.ll @@ -0,0 +1,60 @@ +; Tests that the coro.align intrinsic could be lowered to correct alignment +; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s + +define i8* @f() "coroutine.presplit"="1" { +entry: + %x = alloca i64, align 16 + %y = alloca i32, align 32 + %z = alloca i32, align 16 + %alpha = alloca i1, align 8 + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %align = call i32 @llvm.coro.align.i32() + %alloc = call i8* @aligned_alloc(i32 %align, i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sp1 = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sp1, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %x.alias = bitcast i64* %x to i32* + call void @capture_call(i32* %x.alias) + %y.alias = bitcast i32* %y to i32* + call void @capture_call(i32* %y.alias) + %z.alias = bitcast i32* %z to i32* + call void @capture_call(i32* %z.alias) + %alpha.alias = bitcast i1* %alpha to i32* + call void @capture_call(i32* %alpha.alias) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape. +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1, i1, [6 x i8], i32, [12 x i8], i32 } +; CHECK-LABEL: define i8* @f() +; CHECK: %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 32, i32 56) +; CHECK-NEXT: call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]]) + +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i32 @llvm.coro.align.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare void @llvm.coro.resume(i8*) +declare void @llvm.coro.destroy(i8*) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare void @capture_call(i32*) +declare void @nocapture_call(i32* nocapture) +declare noalias i8* @aligned_alloc(i32, i32) +declare void @free(i8*) diff --git a/llvm/test/Transforms/Coroutines/coro-align-05.ll b/llvm/test/Transforms/Coroutines/coro-align64-02.ll similarity index 100% rename from llvm/test/Transforms/Coroutines/coro-align-05.ll rename to llvm/test/Transforms/Coroutines/coro-align64-02.ll diff --git a/llvm/test/Transforms/Coroutines/coro-align-04.ll b/llvm/test/Transforms/Coroutines/coro-align64.ll similarity index 100% rename from llvm/test/Transforms/Coroutines/coro-align-04.ll rename to llvm/test/Transforms/Coroutines/coro-align64.ll diff --git a/llvm/test/Transforms/Coroutines/coro-align-02.ll b/llvm/test/Transforms/Coroutines/coro-align8-02.ll similarity index 100% rename from llvm/test/Transforms/Coroutines/coro-align-02.ll rename to llvm/test/Transforms/Coroutines/coro-align8-02.ll diff --git a/llvm/test/Transforms/Coroutines/coro-align-01.ll b/llvm/test/Transforms/Coroutines/coro-align8.ll similarity index 100% rename from llvm/test/Transforms/Coroutines/coro-align-01.ll rename to llvm/test/Transforms/Coroutines/coro-align8.ll From 943aa1bfacaa143ef98caa360bc98a648703ce2e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 20:32:52 -0800 Subject: [PATCH 320/946] Add modernize-use-default-member-init.UseAssignment to .clang-tidy --- .clang-tidy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.clang-tidy b/.clang-tidy index 1d4438dbfda0c..879c3661b302d 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -18,3 +18,5 @@ CheckOptions: value: 1 - key: readability-redundant-member-init.IgnoreBaseInCopyConstructors value: 1 + - key: modernize-use-default-member-init.UseAssignment + value: 1 From f63a9cd99db79e0d79f03169a9fa8a1baad54f1f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 20:32:54 -0800 Subject: [PATCH 321/946] [Vectorize] Remove unused variables (NFC) --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 475ab7e1f495e..8bfd3aa525d55 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8727,7 +8727,6 @@ class HorizontalReduction { static RecurKind getRdxKind(Instruction *I) { assert(I && "Expected instruction for reduction matching"); - TargetTransformInfo::ReductionFlags RdxFlags; if (match(I, m_Add(m_Value(), m_Value()))) return RecurKind::Add; if (match(I, m_Mul(m_Value(), m_Value()))) @@ -8801,7 +8800,6 @@ class HorizontalReduction { return RecurKind::None; } - TargetTransformInfo::ReductionFlags RdxFlags; switch (Pred) { default: return RecurKind::None; From b752eb887f7ef160e000c83e4c720d9ecb2bf620 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 20:32:56 -0800 Subject: [PATCH 322/946] [Analysis] Use default member initialization (NFC) Identified with modernize-use-default-member-init. --- .../llvm/Analysis/AliasAnalysisEvaluator.h | 16 ++++----- .../llvm/Analysis/DependenceAnalysis.h | 17 ++++------ .../llvm/Analysis/DivergenceAnalysis.h | 2 +- .../llvm/Analysis/IRSimilarityIdentifier.h | 2 +- .../llvm/Analysis/InstructionSimplify.h | 2 +- .../llvm/Analysis/LazyBlockFrequencyInfo.h | 11 +++---- .../llvm/Analysis/LazyBranchProbabilityInfo.h | 4 +-- .../llvm/Analysis/LoopAccessAnalysis.h | 33 +++++++++---------- .../llvm/Analysis/LoopAnalysisManager.h | 4 +-- llvm/include/llvm/Analysis/MemorySSA.h | 4 +-- llvm/include/llvm/Analysis/PHITransAddr.h | 4 +-- .../llvm/Analysis/TargetTransformInfo.h | 10 +++--- llvm/lib/Analysis/CaptureTracking.cpp | 12 +++---- llvm/lib/Analysis/CostModel.cpp | 6 ++-- llvm/lib/Analysis/DivergenceAnalysis.cpp | 2 +- llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 2 +- llvm/lib/Analysis/InlineCost.cpp | 7 ++-- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 10 ++---- llvm/lib/Analysis/LoopInfo.cpp | 5 ++- llvm/lib/Analysis/MemorySSA.cpp | 4 +-- llvm/lib/Analysis/ReplayInlineAdvisor.cpp | 3 +- 21 files changed, 73 insertions(+), 87 deletions(-) diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h index 972eceaa3ba92..043b1b7ca2dce 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h +++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h @@ -31,17 +31,15 @@ namespace llvm { class AAResults; class AAEvaluator : public PassInfoMixin { - int64_t FunctionCount; - int64_t NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount; - int64_t NoModRefCount, ModCount, RefCount, ModRefCount; - int64_t MustCount, MustRefCount, MustModCount, MustModRefCount; + int64_t FunctionCount = 0; + int64_t NoAliasCount = 0, MayAliasCount = 0, PartialAliasCount = 0; + int64_t MustAliasCount = 0; + int64_t NoModRefCount = 0, ModCount = 0, RefCount = 0, ModRefCount = 0; + int64_t MustCount = 0, MustRefCount = 0, MustModCount = 0; + int64_t MustModRefCount = 0; public: - AAEvaluator() - : FunctionCount(), NoAliasCount(), MayAliasCount(), PartialAliasCount(), - MustAliasCount(), NoModRefCount(), ModCount(), RefCount(), - ModRefCount(), MustCount(), MustRefCount(), MustModCount(), - MustModRefCount() {} + AAEvaluator() = default; AAEvaluator(AAEvaluator &&Arg) : FunctionCount(Arg.FunctionCount), NoAliasCount(Arg.NoAliasCount), MayAliasCount(Arg.MayAliasCount), diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 305c9b1d88f22..8c852e85b04a9 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -74,12 +74,8 @@ namespace llvm { Dependence &operator=(Dependence &&) = default; public: - Dependence(Instruction *Source, - Instruction *Destination) : - Src(Source), - Dst(Destination), - NextPredecessor(nullptr), - NextSuccessor(nullptr) {} + Dependence(Instruction *Source, Instruction *Destination) + : Src(Source), Dst(Destination) {} virtual ~Dependence() {} /// Dependence::DVEntry - Each level in the distance/direction vector @@ -99,9 +95,10 @@ namespace llvm { bool PeelFirst : 1; // Peeling the first iteration will break dependence. bool PeelLast : 1; // Peeling the last iteration will break the dependence. bool Splitable : 1; // Splitting the loop will break dependence. - const SCEV *Distance; // NULL implies no distance available. - DVEntry() : Direction(ALL), Scalar(true), PeelFirst(false), - PeelLast(false), Splitable(false), Distance(nullptr) { } + const SCEV *Distance = nullptr; // NULL implies no distance available. + DVEntry() + : Direction(ALL), Scalar(true), PeelFirst(false), PeelLast(false), + Splitable(false) {} }; /// getSrc - Returns the source instruction for this dependence. @@ -200,7 +197,7 @@ namespace llvm { private: Instruction *Src, *Dst; - const Dependence *NextPredecessor, *NextSuccessor; + const Dependence *NextPredecessor = nullptr, *NextSuccessor = nullptr; friend class DependenceInfo; }; diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h index 7e526b2fad84c..c52b42ae8dc22 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -146,7 +146,7 @@ class DivergenceInfo { // analysis can run indefinitely. We set ContainsIrreducible and no // analysis is actually performed on the function. All values in // this function are conservatively reported as divergent instead. - bool ContainsIrreducible; + bool ContainsIrreducible = false; std::unique_ptr SDA; std::unique_ptr DA; diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 51c5c620230ba..2f8ae205657d8 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -121,7 +121,7 @@ struct IRInstructionData /// and is used when checking when two instructions are considered similar. /// If either instruction is not legal, the instructions are automatically not /// considered similar. - bool Legal; + bool Legal = false; /// This is only relevant if we are wrapping a CmpInst where we needed to /// change the predicate of a compare instruction from a greater than form diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 29973fe95ef79..8b49c115f1019 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -63,7 +63,7 @@ class Value; /// results if the users specified it is safe to use. struct InstrInfoQuery { InstrInfoQuery(bool UMD) : UseInstrInfo(UMD) {} - InstrInfoQuery() : UseInstrInfo(true) {} + InstrInfoQuery() = default; bool UseInstrInfo = true; MDNode *getMetadata(const Instruction *I, unsigned KindID) const { diff --git a/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h index ab6d6ce9ec5ae..a6d8b76b12aea 100644 --- a/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h +++ b/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h @@ -33,8 +33,7 @@ template class LazyBlockFrequencyInfo { public: - LazyBlockFrequencyInfo() - : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {} + LazyBlockFrequencyInfo() = default; /// Set up the per-function input. void setAnalysis(const FunctionT *F, BranchProbabilityInfoPassT *BPIPass, @@ -67,10 +66,10 @@ class LazyBlockFrequencyInfo { private: BlockFrequencyInfoT BFI; - bool Calculated; - const FunctionT *F; - BranchProbabilityInfoPassT *BPIPass; - const LoopInfoT *LI; + bool Calculated = false; + const FunctionT *F = nullptr; + BranchProbabilityInfoPassT *BPIPass = nullptr; + const LoopInfoT *LI = nullptr; }; /// This is an alternative analysis pass to diff --git a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h index 3c632f02905a7..bad7423616b45 100644 --- a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -57,7 +57,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { public: LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI, const TargetLibraryInfo *TLI) - : Calculated(false), F(F), LI(LI), TLI(TLI) {} + : F(F), LI(LI), TLI(TLI) {} /// Retrieve the BPI with the branch probabilities computed. BranchProbabilityInfo &getCalculated() { @@ -75,7 +75,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { private: BranchProbabilityInfo BPI; - bool Calculated; + bool Calculated = false; const Function *F; const LoopInfo *LI; const TargetLibraryInfo *TLI; diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index e55a90b0ea41f..c83a04991b040 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -169,10 +169,7 @@ class MemoryDepChecker { }; MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) - : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeDepDistBytes(0), - MaxSafeVectorWidthInBits(-1U), - FoundNonConstantDistanceDependence(false), - Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {} + : PSE(PSE), InnermostLoop(L) {} /// Register the location (instructions are given increasing numbers) /// of a write access. @@ -264,30 +261,30 @@ class MemoryDepChecker { SmallVector InstMap; /// The program order index to be used for the next instruction. - unsigned AccessIdx; + unsigned AccessIdx = 0; // We can access this many bytes in parallel safely. - uint64_t MaxSafeDepDistBytes; + uint64_t MaxSafeDepDistBytes = 0; /// Number of elements (from consecutive iterations) that are safe to /// operate on simultaneously, multiplied by the size of the element in bits. /// The size of the element is taken from the memory access that is most /// restrictive. - uint64_t MaxSafeVectorWidthInBits; + uint64_t MaxSafeVectorWidthInBits = -1U; /// If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. - bool FoundNonConstantDistanceDependence; + bool FoundNonConstantDistanceDependence = false; /// Result of the dependence checks, indicating whether the checked /// dependences are safe for vectorization, require RT checks or are known to /// be unsafe. - VectorizationSafetyStatus Status; + VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe; //// True if Dependences reflects the dependences in the //// loop. If false we exceeded MaxDependences and //// Dependences is invalid. - bool RecordDependences; + bool RecordDependences = true; /// Memory dependences collected during the analysis. Only valid if /// RecordDependences is true. @@ -395,7 +392,7 @@ class RuntimePointerChecking { AliasSetId(AliasSetId), Expr(Expr) {} }; - RuntimePointerChecking(ScalarEvolution *SE) : Need(false), SE(SE) {} + RuntimePointerChecking(ScalarEvolution *SE) : SE(SE) {} /// Reset the state of the pointer runtime information. void reset() { @@ -444,7 +441,7 @@ class RuntimePointerChecking { unsigned Depth = 0) const; /// This flag indicates if we need to add the runtime check. - bool Need; + bool Need = false; /// Information about the pointers that may require checking. SmallVector Pointers; @@ -620,17 +617,17 @@ class LoopAccessInfo { Loop *TheLoop; - unsigned NumLoads; - unsigned NumStores; + unsigned NumLoads = 0; + unsigned NumStores = 0; - uint64_t MaxSafeDepDistBytes; + uint64_t MaxSafeDepDistBytes = -1; /// Cache the result of analyzeLoop. - bool CanVecMem; - bool HasConvergentOp; + bool CanVecMem = false; + bool HasConvergentOp = false; /// Indicator that there are non vectorizable stores to a uniform address. - bool HasDependenceInvolvingLoopInvariantAddress; + bool HasDependenceInvolvingLoopInvariantAddress = false; /// The diagnostics report generated for the analysis. E.g. why we /// couldn't analyze the loop. diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h index bc8a1e74e447d..d07e6977fed1e 100644 --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -87,7 +87,7 @@ typedef InnerAnalysisManagerProxy template <> class LoopAnalysisManagerFunctionProxy::Result { public: explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI) - : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {} + : InnerAM(&InnerAM), LI(&LI) {} Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) { // We have to null out the analysis manager in the moved-from state @@ -136,7 +136,7 @@ template <> class LoopAnalysisManagerFunctionProxy::Result { private: LoopAnalysisManager *InnerAM; LoopInfo *LI; - bool MSSAUsed; + bool MSSAUsed = false; }; /// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 9198bd8412e68..b41f5771bacdb 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -865,7 +865,7 @@ class MemorySSA { AccessList *getOrCreateAccessList(const BasicBlock *); DefsList *getOrCreateDefsList(const BasicBlock *); void renumberBlock(const BasicBlock *) const; - AliasAnalysis *AA; + AliasAnalysis *AA = nullptr; DominatorTree *DT; Function &F; @@ -892,7 +892,7 @@ class MemorySSA { std::unique_ptr> WalkerBase; std::unique_ptr> Walker; std::unique_ptr> SkipWalker; - unsigned NextID; + unsigned NextID = 0; }; /// Enables verification of MemorySSA. diff --git a/llvm/include/llvm/Analysis/PHITransAddr.h b/llvm/include/llvm/Analysis/PHITransAddr.h index 54a07f0534787..a23f8e61c3033 100644 --- a/llvm/include/llvm/Analysis/PHITransAddr.h +++ b/llvm/include/llvm/Analysis/PHITransAddr.h @@ -40,7 +40,7 @@ class PHITransAddr { const DataLayout &DL; /// TLI - The target library info if known, otherwise null. - const TargetLibraryInfo *TLI; + const TargetLibraryInfo *TLI = nullptr; /// A cache of \@llvm.assume calls used by SimplifyInstruction. AssumptionCache *AC; @@ -50,7 +50,7 @@ class PHITransAddr { public: PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC) - : Addr(addr), DL(DL), TLI(nullptr), AC(AC) { + : Addr(addr), DL(DL), AC(AC) { // If the address is an instruction, the whole thing is considered an input. if (Instruction *I = dyn_cast(Addr)) InstInputs.push_back(I); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 179f0e8e548bf..34ef9cc61c4ff 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1364,10 +1364,12 @@ class TargetTransformInfo { /// Flags describing the kind of vector reduction. struct ReductionFlags { - ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {} - bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation. - bool IsSigned; ///< Whether the operation is a signed int reduction. - bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present. + ReductionFlags() = default; + bool IsMaxOp = + false; ///< If the op a min/max kind, true if it's a max operation. + bool IsSigned = false; ///< Whether the operation is a signed int reduction. + bool NoNaN = + false; ///< If op is an fp min/max, whether NaNs may be present. }; /// \returns True if the target prefers reductions in loop. diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index 9b45f455be087..ba8462e659d53 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -75,7 +75,7 @@ bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) { namespace { struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) - : ReturnCaptures(ReturnCaptures), Captured(false) {} + : ReturnCaptures(ReturnCaptures) {} void tooManyUses() override { Captured = true; } @@ -89,7 +89,7 @@ namespace { bool ReturnCaptures; - bool Captured; + bool Captured = false; }; /// Only find pointer captures which happen before the given instruction. Uses @@ -101,7 +101,7 @@ namespace { CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI, const LoopInfo *LI) : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), - IncludeI(IncludeI), Captured(false), LI(LI) {} + IncludeI(IncludeI), LI(LI) {} void tooManyUses() override { Captured = true; } @@ -139,7 +139,7 @@ namespace { bool ReturnCaptures; bool IncludeI; - bool Captured; + bool Captured = false; const LoopInfo *LI; }; @@ -155,7 +155,7 @@ namespace { struct EarliestCaptures : public CaptureTracker { EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT) - : DT(DT), ReturnCaptures(ReturnCaptures), Captured(false), F(F) {} + : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {} void tooManyUses() override { Captured = true; @@ -199,7 +199,7 @@ namespace { bool ReturnCaptures; - bool Captured; + bool Captured = false; Function &F; }; diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index f407ec0d017a8..326bacad01fe1 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -50,7 +50,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { + CostModelAnalysis() : FunctionPass(ID) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -69,9 +69,9 @@ namespace { void print(raw_ostream &OS, const Module*) const override; /// The function that we analyze. - Function *F; + Function *F = nullptr; /// Target information. - const TargetTransformInfo *TTI; + const TargetTransformInfo *TTI = nullptr; }; } // End of anonymous namespace diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp index 62ac6f693fe30..39e80c2ad51c1 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -348,7 +348,7 @@ DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible) - : F(F), ContainsIrreducible(false) { + : F(F) { if (!KnownReducible) { using RPOTraversal = ReversePostOrderTraversal; RPOTraversal FuncRPOT(&F); diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index ca1a2907e51cf..ea08e50bf7283 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -62,7 +62,7 @@ void IRInstructionData::initializeInstruction() { } IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) - : Inst(nullptr), Legal(false), IDL(&IDList) {} + : IDL(&IDList) {} void IRInstructionData::setBranchSuccessors( DenseMap &BasicBlockToInteger) { diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 5dce896feb442..d5411d916c777 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -361,10 +361,10 @@ class CallAnalyzer : public InstVisitor { /// Model the elimination of repeated loads that is expected to happen /// whenever we simplify away the stores that would otherwise cause them to be /// loads. - bool EnableLoadElimination; + bool EnableLoadElimination = true; /// Whether we allow inlining for recursive call. - bool AllowRecursiveCall; + bool AllowRecursiveCall = false; SmallPtrSet LoadAddrSet; @@ -455,8 +455,7 @@ class CallAnalyzer : public InstVisitor { OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), - CandidateCall(Call), EnableLoadElimination(true), - AllowRecursiveCall(false) {} + CandidateCall(Call) {} InlineResult analyze(); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index b8b1b5ad53c9e..2ab78d2b7ee23 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -519,8 +519,7 @@ class AccessAnalysis { AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, PredicatedScalarEvolution &PSE) - : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), - IsRTCheckAnalysisNeeded(false), PSE(PSE) {} + : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {} /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -620,7 +619,7 @@ class AccessAnalysis { /// memcheck analysis without dependency checking /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is /// cleared while this remains set if we have potentially dependent accesses. - bool IsRTCheckAnalysisNeeded; + bool IsRTCheckAnalysisNeeded = false; /// The SCEV predicate containing all the SCEV-related assumptions. PredicatedScalarEvolution &PSE; @@ -2244,10 +2243,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(std::make_unique(SE)), - DepChecker(std::make_unique(*PSE, L)), TheLoop(L), - NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), - HasConvergentOp(false), - HasDependenceInvolvingLoopInvariantAddress(false) { + DepChecker(std::make_unique(*PSE, L)), TheLoop(L) { if (canAnalyzeLoop()) analyzeLoop(AA, LI, TLI, DT); } diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index b35fb2a190f62..dd69587161272 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -695,11 +695,10 @@ class UnloopUpdater { // Flag the presence of an irreducible backedge whose destination is a block // directly contained by the original unloop. - bool FoundIB; + bool FoundIB = false; public: - UnloopUpdater(Loop *UL, LoopInfo *LInfo) - : Unloop(*UL), LI(LInfo), DFS(UL), FoundIB(false) {} + UnloopUpdater(Loop *UL, LoopInfo *LInfo) : Unloop(*UL), LI(LInfo), DFS(UL) {} void updateBlockParents(); diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index ac20e20f0c0d0..57f431ec21f56 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -1265,8 +1265,8 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { } MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) - : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), - SkipWalker(nullptr), NextID(0) { + : DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), + SkipWalker(nullptr) { // Build MemorySSA using a batch alias analysis. This reuses the internal // state that AA collects during an alias()/getModRefInfo() call. This is // safe because there are no CFG changes while building MemorySSA and can diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp index f83d8b0fd2301..294bc38c17ad0 100644 --- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp +++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp @@ -28,8 +28,7 @@ ReplayInlineAdvisor::ReplayInlineAdvisor( std::unique_ptr OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)), - HasReplayRemarks(false), ReplaySettings(ReplaySettings), - EmitRemarks(EmitRemarks) { + ReplaySettings(ReplaySettings), EmitRemarks(EmitRemarks) { auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile); std::error_code EC = BufferOrErr.getError(); From d3b26dea16108c427b19b5480c9edc76edf8f5b4 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 23 Jan 2022 20:45:25 -0800 Subject: [PATCH 323/946] Clang: Change the default DWARF version to 5 (except on platforms that already opt in to specific versions - SCE, Android, and Darwin using DWARFv4 explicitly, for instance) --- clang/docs/ReleaseNotes.rst | 8 ++++++++ clang/include/clang/Driver/ToolChain.h | 2 +- clang/lib/Driver/ToolChains/Linux.h | 1 + clang/test/CodeGen/debug-info-extern-call.c | 4 ++-- clang/test/CodeGen/dwarf-version.c | 8 ++++---- clang/test/Driver/cl-options.c | 2 +- clang/test/Driver/clang-g-opts.c | 2 +- clang/test/Driver/ve-toolchain.c | 2 +- clang/test/Driver/ve-toolchain.cpp | 2 +- 9 files changed, 20 insertions(+), 11 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2eec63901932e..4fe037741256f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -252,6 +252,14 @@ X86 Support in Clang - Support for ``AVX512-FP16`` instructions has been added. +DWARF Support in Clang +---------------------- + +- The default DWARF version has increased from DWARFv4 to DWARFv5. You can opt + back in to the old behavior with -gdwarf-4. Some platforms (Darwin, Android, + and SCE for instance) already opt out of this version bump as is suitable for + the platform + Arm and AArch64 Support in Clang -------------------------------- diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index eb95806a2f75d..37011de6bd6d7 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -510,7 +510,7 @@ class ToolChain { // Return the DWARF version to emit, in the absence of arguments // to the contrary. - virtual unsigned GetDefaultDwarfVersion() const { return 4; } + virtual unsigned GetDefaultDwarfVersion() const { return 5; } // Some toolchains may have different restrictions on the DWARF version and // may need to adjust it. E.g. NVPTX may need to enforce DWARF2 even when host diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h index a5ec33bd44f10..a5648d79d655f 100644 --- a/clang/lib/Driver/ToolChains/Linux.h +++ b/clang/lib/Driver/ToolChains/Linux.h @@ -40,6 +40,7 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF { void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; RuntimeLibType GetDefaultRuntimeLibType() const override; + unsigned GetDefaultDwarfVersion() const override; CXXStdlibType GetDefaultCXXStdlibType() const override; bool IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList &Args) const override; diff --git a/clang/test/CodeGen/debug-info-extern-call.c b/clang/test/CodeGen/debug-info-extern-call.c index 7cf90550ac00a..fad52d0df0b3f 100644 --- a/clang/test/CodeGen/debug-info-extern-call.c +++ b/clang/test/CodeGen/debug-info-extern-call.c @@ -12,13 +12,13 @@ // decls so that the dwarf generator can describe information needed for tail // call frame reconstrution. // -// RUN: %clang -g -O2 -target x86_64-none-linux-gnu -ggdb -S -emit-llvm %s -o - \ +// RUN: %clang -gdwarf-4 -O2 -target x86_64-none-linux-gnu -ggdb -S -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=DECLS-FOR-EXTERN // // Do not emit a subprogram for extern decls when entry values are disabled and // the tuning is not set to gdb. // -// RUN: %clang -g -O2 -target x86_64-none-linux-gnu -gsce -S -emit-llvm %s -o - \ +// RUN: %clang -gdwarf-4 -O2 -target x86_64-none-linux-gnu -gsce -S -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=NO-DECLS-FOR-EXTERN // DECLS-FOR-EXTERN-NOT: !DICompileUnit({{.*}}retainedTypes: !{{[0-9]+}} diff --git a/clang/test/CodeGen/dwarf-version.c b/clang/test/CodeGen/dwarf-version.c index 6d131c470d5b3..b329556ae0d9d 100644 --- a/clang/test/CodeGen/dwarf-version.c +++ b/clang/test/CodeGen/dwarf-version.c @@ -2,8 +2,8 @@ // RUN: %clang -target x86_64-linux-gnu -gdwarf-3 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER3 // RUN: %clang -target x86_64-linux-gnu -gdwarf-4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 // RUN: %clang -target x86_64-linux-gnu -gdwarf-5 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5 -// RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 -// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 +// RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5 +// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5 // The -isysroot is used as a hack to avoid LIT messing with the SDKROOT // environment variable which indirecty overrides the version in the target @@ -28,10 +28,10 @@ // RUN: | FileCheck %s --check-prefixes=NODWARF,CODEVIEW // Explicitly request DWARF. // RUN: %clang -target i686-pc-windows-msvc -gdwarf -S -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefixes=VER4,NOCODEVIEW +// RUN: | FileCheck %s --check-prefixes=VER5,NOCODEVIEW // Explicitly request both. // RUN: %clang -target i686-pc-windows-msvc -gdwarf -gcodeview -S -emit-llvm -o - %s \ -// RUN: | FileCheck %s --check-prefixes=VER4,CODEVIEW +// RUN: | FileCheck %s --check-prefixes=VER5,CODEVIEW // RUN: %clang -target powerpc-ibm-aix-xcoff -g -S -emit-llvm -o - %s | \ // RUN: FileCheck %s --check-prefix=VER3 // RUN: %clang -target powerpc-ibm-aix-xcoff -gdwarf-2 -S -emit-llvm -o - %s | \ diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index f39db87660125..733e733de738e 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -596,7 +596,7 @@ // RUN: %clang_cl /Z7 -gdwarf /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7_gdwarf %s // Z7_gdwarf: "-gcodeview" // Z7_gdwarf: "-debug-info-kind=constructor" -// Z7_gdwarf: "-dwarf-version=4" +// Z7_gdwarf: "-dwarf-version= // RUN: %clang_cl -fmsc-version=1800 -TP -### -- %s 2>&1 | FileCheck -check-prefix=CXX11 %s // CXX11: -std=c++11 diff --git a/clang/test/Driver/clang-g-opts.c b/clang/test/Driver/clang-g-opts.c index bb129e75769c9..d982b1070cae1 100644 --- a/clang/test/Driver/clang-g-opts.c +++ b/clang/test/Driver/clang-g-opts.c @@ -32,7 +32,7 @@ // CHECK-WITHOUT-G-NOT: -debug-info-kind // CHECK-WITH-G: "-debug-info-kind=constructor" -// CHECK-WITH-G: "-dwarf-version=4" +// CHECK-WITH-G: "-dwarf-version=5" // CHECK-WITH-G-DWARF2: "-dwarf-version=2" // CHECK-WITH-G-STANDALONE: "-debug-info-kind=standalone" diff --git a/clang/test/Driver/ve-toolchain.c b/clang/test/Driver/ve-toolchain.c index 8878bd8f83cc0..35af3c81c4c6f 100644 --- a/clang/test/Driver/ve-toolchain.c +++ b/clang/test/Driver/ve-toolchain.c @@ -6,7 +6,7 @@ /// Checking dwarf-version // RUN: %clang -### -g -target ve %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s -// DWARF_VER: "-dwarf-version=4" +// DWARF_VER: "-dwarf-version=5" ///----------------------------------------------------------------------------- /// Checking include-path diff --git a/clang/test/Driver/ve-toolchain.cpp b/clang/test/Driver/ve-toolchain.cpp index 7666cfbfe8b27..7447f34b70e0c 100644 --- a/clang/test/Driver/ve-toolchain.cpp +++ b/clang/test/Driver/ve-toolchain.cpp @@ -7,7 +7,7 @@ // RUN: %clangxx -### -g -target ve-unknown-linux-gnu \ // RUN: %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s -// DWARF_VER: "-dwarf-version=4" +// DWARF_VER: "-dwarf-version=5" ///----------------------------------------------------------------------------- /// Checking include-path From 90abe181da7c61d982e4873c97fd12bc06fefe09 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 23 Jan 2022 21:10:16 -0800 Subject: [PATCH 324/946] Add missing function implementation from DWARF default change Fix for d3b26dea16108c427b19b5480c9edc76edf8f5b4 --- clang/lib/Driver/ToolChains/Linux.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index e413640abad35..af74b108e04ee 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -324,6 +324,12 @@ ToolChain::RuntimeLibType Linux::GetDefaultRuntimeLibType() const { return Generic_ELF::GetDefaultRuntimeLibType(); } +unsigned Linux::GetDefaultDwarfVersion() const { + if (getTriple().isAndroid()) + return 4; + return ToolChain::GetDefaultDwarfVersion(); +} + ToolChain::CXXStdlibType Linux::GetDefaultCXXStdlibType() const { if (getTriple().isAndroid()) return ToolChain::CST_Libcxx; From 68b70d17d8dea3fe9fa8e8f8bffd37bfe8125a65 Mon Sep 17 00:00:00 2001 From: Abinav Puthan Purayil Date: Mon, 3 Jan 2022 15:45:52 +0530 Subject: [PATCH 325/946] [GlobalISel] Fold or of shifts with constant amount to funnel shift. This change folds (or (shl x, C0), (lshr y, C1)) to funnel shift iff C0 and C1 are constants where C0 + C1 is the bit-width of the shift instructions. Differential Revision: https://reviews.llvm.org/D116529 --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 47 +++--- llvm/test/CodeGen/AArch64/arm64-rev.ll | 7 +- .../CodeGen/AMDGPU/GlobalISel/combine-fsh.mir | 57 ++++++- .../CodeGen/AMDGPU/GlobalISel/combine-rot.mir | 54 ++++++- .../GlobalISel/llvm.amdgcn.intersect_ray.ll | 24 ++- .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 146 ++++++++---------- .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 146 ++++++++---------- 7 files changed, 268 insertions(+), 213 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ed1aa9d80840c..4b5a19155c672 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3878,39 +3878,48 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, LLT Ty = MRI.getType(Dst); unsigned BitWidth = Ty.getScalarSizeInBits(); - Register ShlSrc, ShlAmt, LShrSrc, LShrAmt; + Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt; unsigned FshOpc = 0; - // Match (or (shl x, amt), (lshr y, sub(bw, amt))). - if (mi_match( - Dst, MRI, - // m_GOr() handles the commuted version as well. - m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), - m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth), - m_Reg(LShrAmt)))))) { + // Match (or (shl ...), (lshr ...)). + if (!mi_match(Dst, MRI, + // m_GOr() handles the commuted version as well. + m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), + m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt))))) + return false; + + // Given constants C0 and C1 such that C0 + C1 is bit-width: + // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1) + // TODO: Match constant splat. + int64_t CstShlAmt, CstLShrAmt; + if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) && + mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) && + CstShlAmt + CstLShrAmt == BitWidth) { + FshOpc = TargetOpcode::G_FSHR; + Amt = LShrAmt; + + } else if (mi_match(LShrAmt, MRI, + m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && + ShlAmt == Amt) { + // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt) FshOpc = TargetOpcode::G_FSHL; - // Match (or (shl x, sub(bw, amt)), (lshr y, amt)). - } else if (mi_match(Dst, MRI, - m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)), - m_GShl(m_Reg(ShlSrc), - m_GSub(m_SpecificICstOrSplat(BitWidth), - m_Reg(ShlAmt)))))) { + } else if (mi_match(ShlAmt, MRI, + m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) && + LShrAmt == Amt) { + // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt) FshOpc = TargetOpcode::G_FSHR; } else { return false; } - if (ShlAmt != LShrAmt) - return false; - - LLT AmtTy = MRI.getType(ShlAmt); + LLT AmtTy = MRI.getType(Amt); if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) return false; MatchInfo = [=](MachineIRBuilder &B) { - B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt}); + B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt}); }; return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index df481b8e39f45..aa223eefbbfaf 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -216,8 +216,7 @@ define i64 @test_rev16_x(i64 %a) nounwind { ; GISEL-LABEL: test_rev16_x: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: rev x8, x0 -; GISEL-NEXT: lsl x9, x8, #48 -; GISEL-NEXT: orr x0, x9, x8, lsr #16 +; GISEL-NEXT: ror x0, x8, #16 ; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) @@ -235,9 +234,7 @@ define i64 @test_rev32_x(i64 %a) nounwind { ; ; GISEL-LABEL: test_rev32_x: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: rev x8, x0 -; GISEL-NEXT: lsl x9, x8, #32 -; GISEL-NEXT: orr x0, x9, x8, lsr #32 +; GISEL-NEXT: rev32 x0, x0 ; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir index 0e2816cbc9393..ad93f1bf4d39e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir @@ -107,13 +107,66 @@ body: | ... --- -name: fshl_i32_bad_const +name: fsh_i32_const +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: fsh_i32_const + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: %or:_(s32) = G_FSHR %a, %b, %amt1(s32) + ; CHECK-NEXT: $vgpr2 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %b:_(s32) = COPY $vgpr1 + %amt0:_(s32) = G_CONSTANT i32 20 + %amt1:_(s32) = G_CONSTANT i32 12 + %shl:_(s32) = G_SHL %a, %amt0 + %lshr:_(s32) = G_LSHR %b, %amt1 + %or:_(s32) = G_OR %shl, %lshr + $vgpr2 = COPY %or +... + +--- +name: fsh_i32_bad_const +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: fsh_i32_bad_const + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: %amt0:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt0(s32) + ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %b, %amt1(s32) + ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr2 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %b:_(s32) = COPY $vgpr1 + %amt0:_(s32) = G_CONSTANT i32 20 + %amt1:_(s32) = G_CONSTANT i32 11 + %shl:_(s32) = G_SHL %a, %amt0 + %lshr:_(s32) = G_LSHR %b, %amt1 + %or:_(s32) = G_OR %shl, %lshr + $vgpr2 = COPY %or +... + +--- +name: fshl_i32_bad_bw tracksRegLiveness: true body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK-LABEL: name: fshl_i32_bad_const + ; CHECK-LABEL: name: fshl_i32_bad_bw ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir index 60791842443d8..2649ee4bdf72a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir @@ -99,13 +99,63 @@ body: | ... --- -name: rotl_i32_bad_const +name: rot_i32_const +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: rot_i32_const + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: %or:_(s32) = G_ROTR %a, %amt1(s32) + ; CHECK-NEXT: $vgpr1 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %amt0:_(s32) = G_CONSTANT i32 20 + %amt1:_(s32) = G_CONSTANT i32 12 + %shl:_(s32) = G_SHL %a, %amt0 + %lshr:_(s32) = G_LSHR %a, %amt1 + %or:_(s32) = G_OR %shl, %lshr + $vgpr1 = COPY %or +... + +--- +name: rot_i32_bad_const +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: rot_i32_bad_const + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %amt0:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt0(s32) + ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %a, %amt1(s32) + ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr + ; CHECK-NEXT: $vgpr1 = COPY %or(s32) + %a:_(s32) = COPY $vgpr0 + %amt0:_(s32) = G_CONSTANT i32 20 + %amt1:_(s32) = G_CONSTANT i32 11 + %shl:_(s32) = G_SHL %a, %amt0 + %lshr:_(s32) = G_LSHR %a, %amt1 + %or:_(s32) = G_OR %shl, %lshr + $vgpr1 = COPY %or +... + + +--- +name: rotl_i32_bad_bw tracksRegLiveness: true body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2 - ; CHECK-LABEL: name: rotl_i32_bad_const + ; CHECK-LABEL: name: rotl_i32_bad_bw ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll index 0236ebc947267..15755f4455cb5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll @@ -52,11 +52,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 %node_ptr, float % ; GCN-NEXT: s_mov_b32 s4, 0xffff ; GCN-NEXT: v_lshrrev_b32_e32 v9, 16, v5 ; GCN-NEXT: v_and_b32_e32 v10, s4, v7 -; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v7 ; GCN-NEXT: v_and_b32_e32 v8, s4, v8 ; GCN-NEXT: v_lshlrev_b32_e32 v9, 16, v9 ; GCN-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GCN-NEXT: v_lshl_or_b32 v7, v8, 16, v7 +; GCN-NEXT: v_alignbit_b32 v7, v8, v7, 16 ; GCN-NEXT: v_and_or_b32 v5, v5, s4, v9 ; GCN-NEXT: v_and_or_b32 v6, v6, s4, v10 ; GCN-NEXT: image_bvh_intersect_ray v[0:3], v[0:7], s[0:3] a16 @@ -105,11 +104,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float ; GCN-NEXT: s_mov_b32 s4, 0xffff ; GCN-NEXT: v_lshrrev_b32_e32 v10, 16, v6 ; GCN-NEXT: v_and_b32_e32 v11, s4, v8 -; GCN-NEXT: v_lshrrev_b32_e32 v8, 16, v8 ; GCN-NEXT: v_and_b32_e32 v9, s4, v9 ; GCN-NEXT: v_lshlrev_b32_e32 v10, 16, v10 ; GCN-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GCN-NEXT: v_lshl_or_b32 v8, v9, 16, v8 +; GCN-NEXT: v_alignbit_b32 v8, v9, v8, 16 ; GCN-NEXT: v_and_or_b32 v6, v6, s4, v10 ; GCN-NEXT: v_and_or_b32 v7, v7, s4, v11 ; GCN-NEXT: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16 @@ -210,16 +208,15 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX1030-NEXT: v_and_b32_e32 v1, s0, v7 ; GFX1030-NEXT: v_mov_b32_e32 v15, v2 +; GFX1030-NEXT: v_and_b32_e32 v2, s0, v8 ; GFX1030-NEXT: v_mov_b32_e32 v16, v3 -; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 16, v7 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_and_b32_e32 v3, s0, v8 ; GFX1030-NEXT: v_mov_b32_e32 v17, v4 +; GFX1030-NEXT: v_alignbit_b32 v20, v2, v7, 16 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: v_and_or_b32 v18, v5, s0, v0 ; GFX1030-NEXT: v_and_or_b32 v19, v6, s0, v1 -; GFX1030-NEXT: v_lshl_or_b32 v20, v3, 16, v2 ; GFX1030-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 ; GFX1030-NEXT: v_readfirstlane_b32 s4, v9 ; GFX1030-NEXT: v_readfirstlane_b32 s5, v10 @@ -252,12 +249,11 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p ; GFX1013-NEXT: s_mov_b32 s0, 0xffff ; GFX1013-NEXT: v_lshrrev_b32_e32 v13, 16, v5 ; GFX1013-NEXT: v_and_b32_e32 v14, s0, v7 -; GFX1013-NEXT: v_lshrrev_b32_e32 v7, 16, v7 ; GFX1013-NEXT: v_and_b32_e32 v8, s0, v8 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: v_lshlrev_b32_e32 v13, 16, v13 ; GFX1013-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX1013-NEXT: v_lshl_or_b32 v7, v8, 16, v7 +; GFX1013-NEXT: v_alignbit_b32 v7, v8, v7, 16 ; GFX1013-NEXT: v_and_or_b32 v5, v5, s0, v13 ; GFX1013-NEXT: v_and_or_b32 v6, v6, s0, v14 ; GFX1013-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -381,16 +377,15 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6 ; GFX1030-NEXT: v_and_b32_e32 v1, s0, v8 ; GFX1030-NEXT: v_mov_b32_e32 v16, v2 +; GFX1030-NEXT: v_and_b32_e32 v2, s0, v9 ; GFX1030-NEXT: v_mov_b32_e32 v17, v3 -; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 16, v8 ; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_and_b32_e32 v3, s0, v9 ; GFX1030-NEXT: v_mov_b32_e32 v18, v4 ; GFX1030-NEXT: v_mov_b32_e32 v19, v5 +; GFX1030-NEXT: v_alignbit_b32 v22, v2, v8, 16 ; GFX1030-NEXT: v_and_or_b32 v20, v6, s0, v0 ; GFX1030-NEXT: v_and_or_b32 v21, v7, s0, v1 -; GFX1030-NEXT: v_lshl_or_b32 v22, v3, 16, v2 ; GFX1030-NEXT: s_mov_b32 s1, exec_lo ; GFX1030-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GFX1030-NEXT: v_readfirstlane_b32 s4, v10 @@ -427,13 +422,12 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node ; GFX1013-NEXT: v_mov_b32_e32 v17, v11 ; GFX1013-NEXT: v_lshrrev_b32_e32 v10, 16, v6 ; GFX1013-NEXT: v_and_b32_e32 v11, s0, v8 -; GFX1013-NEXT: v_lshrrev_b32_e32 v8, 16, v8 ; GFX1013-NEXT: v_and_b32_e32 v9, s0, v9 ; GFX1013-NEXT: v_mov_b32_e32 v18, v12 +; GFX1013-NEXT: v_mov_b32_e32 v19, v13 ; GFX1013-NEXT: v_lshlrev_b32_e32 v10, 16, v10 ; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX1013-NEXT: v_mov_b32_e32 v19, v13 -; GFX1013-NEXT: v_lshl_or_b32 v8, v9, 16, v8 +; GFX1013-NEXT: v_alignbit_b32 v8, v9, v8, 16 ; GFX1013-NEXT: s_mov_b32 s1, exec_lo ; GFX1013-NEXT: v_and_or_b32 v6, v6, s0, v10 ; GFX1013-NEXT: v_and_or_b32 v7, v7, s0, v11 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index 503b45d49991c..c248c4236e067 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -383,14 +383,12 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1 ; GFX6-NEXT: v_min_u32_e32 v4, v5, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 24, v3 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 24 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v3 @@ -534,18 +532,18 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; GFX6-NEXT: s_lshl_b32 s3, s4, 24 ; GFX6-NEXT: s_lshl_b32 s4, s7, 24 ; GFX6-NEXT: s_not_b32 s5, s3 -; GFX6-NEXT: s_lshr_b32 s1, s1, 24 ; GFX6-NEXT: s_min_u32 s4, s5, s4 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 +; GFX6-NEXT: s_lshr_b32 s1, s1, 24 ; GFX6-NEXT: s_lshr_b32 s2, s2, 24 ; GFX6-NEXT: s_add_i32 s3, s3, s4 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: s_lshr_b32 s3, s3, 24 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s2, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 24 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 +; GFX6-NEXT: s_lshl_b32 s0, s2, 16 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: s_lshl_b32 s0, s3, 24 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_uaddsat_v4i8: @@ -1814,9 +1812,9 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs ; GFX6-NEXT: s_min_u32 s2, s3, s2 ; GFX6-NEXT: s_add_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_uaddsat_v2i16: @@ -1864,9 +1862,7 @@ define amdgpu_ps float @uaddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) { ; GFX6-NEXT: v_min_u32_e32 v1, s1, v1 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, s0, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: uaddsat_v2i16_sv: @@ -1908,9 +1904,7 @@ define amdgpu_ps float @uaddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-NEXT: v_min_u32_e32 v2, s0, v2 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: uaddsat_v2i16_vs: @@ -1972,15 +1966,11 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v3 ; GFX6-NEXT: v_min_u32_e32 v4, v5, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, v3, v2, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_v4i16: @@ -2038,15 +2028,15 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre ; GFX6-NEXT: s_lshl_b32 s4, s7, 16 ; GFX6-NEXT: s_not_b32 s5, s3 ; GFX6-NEXT: s_min_u32 s4, s5, s4 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_add_i32 s3, s3, s4 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_lshr_b32 s3, s3, 16 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 16 -; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s1, v1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_uaddsat_v4i16: @@ -2137,20 +2127,14 @@ define <3 x float> @v_uaddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) { ; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11 ; GFX6-NEXT: v_xor_b32_e32 v7, -1, v5 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_u32_e32 v6, v7, v6 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 -; GFX6-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, v3, v2, 16 +; GFX6-NEXT: v_alignbit_b32 v2, v5, v4, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_v6i16: @@ -2224,20 +2208,20 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre ; GFX6-NEXT: s_add_i32 s4, s4, s6 ; GFX6-NEXT: s_lshl_b32 s6, s11, 16 ; GFX6-NEXT: s_not_b32 s7, s5 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_min_u32 s6, s7, s6 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_add_i32 s5, s5, s6 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_lshr_b32 s5, s5, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 16 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_or_b32 s2, s4, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_mov_b32_e32 v2, s4 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 16 +; GFX6-NEXT: v_alignbit_b32 v2, s5, v2, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_readfirstlane_b32 s2, v2 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_uaddsat_v6i16: @@ -2341,24 +2325,16 @@ define <4 x float> @v_uaddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15 ; GFX6-NEXT: v_xor_b32_e32 v9, -1, v7 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_u32_e32 v8, v9, v8 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v7 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX6-NEXT: v_or_b32_e32 v2, v4, v2 -; GFX6-NEXT: v_or_b32_e32 v3, v6, v3 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, v3, v2, 16 +; GFX6-NEXT: v_alignbit_b32 v2, v5, v4, 16 +; GFX6-NEXT: v_alignbit_b32 v3, v7, v6, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_v8i16: @@ -2448,24 +2424,24 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre ; GFX6-NEXT: s_add_i32 s6, s6, s8 ; GFX6-NEXT: s_lshl_b32 s8, s15, 16 ; GFX6-NEXT: s_not_b32 s9, s7 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_min_u32 s8, s9, s8 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_add_i32 s7, s7, s8 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_lshr_b32 s5, s5, 16 ; GFX6-NEXT: s_lshr_b32 s7, s7, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 16 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 -; GFX6-NEXT: s_lshr_b32 s6, s6, 16 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_lshl_b32 s3, s7, 16 -; GFX6-NEXT: s_or_b32 s2, s4, s2 -; GFX6-NEXT: s_or_b32 s3, s6, s3 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_mov_b32_e32 v2, s4 +; GFX6-NEXT: v_mov_b32_e32 v3, s6 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 16 +; GFX6-NEXT: v_alignbit_b32 v2, s5, v2, 16 +; GFX6-NEXT: v_alignbit_b32 v3, s7, v3, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_readfirstlane_b32 s2, v2 +; GFX6-NEXT: v_readfirstlane_b32 s3, v3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_uaddsat_v8i16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index a4da5822dac57..9676db3eba17e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -371,14 +371,12 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1 ; GFX6-NEXT: v_min_u32_e32 v4, v3, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 24, v3 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 24 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v3 @@ -518,18 +516,18 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) { ; GFX6-NEXT: s_sub_i32 s2, s2, s3 ; GFX6-NEXT: s_lshl_b32 s3, s4, 24 ; GFX6-NEXT: s_lshl_b32 s4, s7, 24 -; GFX6-NEXT: s_lshr_b32 s1, s1, 24 ; GFX6-NEXT: s_min_u32 s4, s3, s4 -; GFX6-NEXT: s_lshr_b32 s0, s0, 24 +; GFX6-NEXT: s_lshr_b32 s1, s1, 24 ; GFX6-NEXT: s_lshr_b32 s2, s2, 24 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 -; GFX6-NEXT: s_lshl_b32 s1, s1, 8 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: s_lshr_b32 s3, s3, 24 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s2, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 24 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 +; GFX6-NEXT: s_lshl_b32 s0, s2, 16 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: s_lshl_b32 s0, s3, 24 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_usubsat_v4i8: @@ -1724,9 +1722,9 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs ; GFX6-NEXT: s_min_u32 s2, s1, s2 ; GFX6-NEXT: s_sub_i32 s1, s1, s2 ; GFX6-NEXT: s_lshr_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_usubsat_v2i16: @@ -1772,9 +1770,7 @@ define amdgpu_ps float @usubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) { ; GFX6-NEXT: v_min_u32_e32 v1, s0, v1 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_v2i16_sv: @@ -1814,9 +1810,7 @@ define amdgpu_ps float @usubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; GFX6-NEXT: v_min_u32_e32 v2, s0, v1 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_v2i16_vs: @@ -1874,15 +1868,11 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7 ; GFX6-NEXT: v_min_u32_e32 v4, v3, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, v3, v2, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_v4i16: @@ -1936,15 +1926,15 @@ define amdgpu_ps <2 x i32> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre ; GFX6-NEXT: s_lshl_b32 s3, s3, 16 ; GFX6-NEXT: s_lshl_b32 s4, s7, 16 ; GFX6-NEXT: s_min_u32 s4, s3, s4 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_sub_i32 s3, s3, s4 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_lshr_b32 s3, s3, 16 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 16 -; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s1, v1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_usubsat_v4i16: @@ -2029,20 +2019,14 @@ define <3 x float> @v_usubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) { ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_u32_e32 v6, v5, v6 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v6 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 -; GFX6-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, v3, v2, 16 +; GFX6-NEXT: v_alignbit_b32 v2, v5, v4, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_v6i16: @@ -2110,20 +2094,20 @@ define amdgpu_ps <3 x i32> @s_usubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre ; GFX6-NEXT: s_sub_i32 s4, s4, s6 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16 ; GFX6-NEXT: s_lshl_b32 s6, s11, 16 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_min_u32 s6, s5, s6 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_sub_i32 s5, s5, s6 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_lshr_b32 s5, s5, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 16 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_or_b32 s2, s4, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_mov_b32_e32 v2, s4 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 16 +; GFX6-NEXT: v_alignbit_b32 v2, s5, v2, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_readfirstlane_b32 s2, v2 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_usubsat_v6i16: @@ -2219,24 +2203,16 @@ define <4 x float> @v_usubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_min_u32_e32 v8, v7, v8 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v5 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v7 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v6 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7 -; GFX6-NEXT: v_or_b32_e32 v2, v4, v2 -; GFX6-NEXT: v_or_b32_e32 v3, v6, v3 +; GFX6-NEXT: v_alignbit_b32 v0, v1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, v3, v2, 16 +; GFX6-NEXT: v_alignbit_b32 v2, v5, v4, 16 +; GFX6-NEXT: v_alignbit_b32 v3, v7, v6, 16 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_v8i16: @@ -2318,24 +2294,24 @@ define amdgpu_ps <4 x i32> @s_usubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre ; GFX6-NEXT: s_sub_i32 s6, s6, s8 ; GFX6-NEXT: s_lshl_b32 s7, s7, 16 ; GFX6-NEXT: s_lshl_b32 s8, s15, 16 -; GFX6-NEXT: s_lshr_b32 s1, s1, 16 ; GFX6-NEXT: s_min_u32 s8, s7, s8 -; GFX6-NEXT: s_lshr_b32 s0, s0, 16 -; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_sub_i32 s7, s7, s8 -; GFX6-NEXT: s_lshl_b32 s1, s1, 16 -; GFX6-NEXT: s_lshr_b32 s2, s2, 16 +; GFX6-NEXT: s_lshr_b32 s1, s1, 16 +; GFX6-NEXT: s_lshr_b32 s3, s3, 16 ; GFX6-NEXT: s_lshr_b32 s5, s5, 16 ; GFX6-NEXT: s_lshr_b32 s7, s7, 16 -; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s3, 16 -; GFX6-NEXT: s_lshr_b32 s4, s4, 16 -; GFX6-NEXT: s_lshr_b32 s6, s6, 16 -; GFX6-NEXT: s_or_b32 s1, s2, s1 -; GFX6-NEXT: s_lshl_b32 s2, s5, 16 -; GFX6-NEXT: s_lshl_b32 s3, s7, 16 -; GFX6-NEXT: s_or_b32 s2, s4, s2 -; GFX6-NEXT: s_or_b32 s3, s6, s3 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_mov_b32_e32 v2, s4 +; GFX6-NEXT: v_mov_b32_e32 v3, s6 +; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 16 +; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 16 +; GFX6-NEXT: v_alignbit_b32 v2, s5, v2, 16 +; GFX6-NEXT: v_alignbit_b32 v3, s7, v3, 16 +; GFX6-NEXT: v_readfirstlane_b32 s0, v0 +; GFX6-NEXT: v_readfirstlane_b32 s1, v1 +; GFX6-NEXT: v_readfirstlane_b32 s2, v2 +; GFX6-NEXT: v_readfirstlane_b32 s3, v3 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_usubsat_v8i16: From 8b280df504b97a13d06a929fbc85348903456fdd Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Sun, 23 Jan 2022 21:24:05 -0800 Subject: [PATCH 326/946] Rough guess at fixing lldb tests to handle Clang defaulting to DWARFv5 --- .../basic_entry_values/TestBasicEntryValues.py | 2 +- .../unambiguous_sequence/TestUnambiguousTailCalls.py | 2 +- .../SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py index 4b9a814764158..f4ae1fc015569 100644 --- a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py +++ b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py @@ -15,4 +15,4 @@ lldbinline.MakeInlineTest(__file__, globals(), decorators=decorators+[skipIf(debug_info="dsym")], name="BasicEntryValues_GNU", - build_dict=dict(CXXFLAGS_EXTRAS="-O2 -ggdb")) + build_dict=dict(CXXFLAGS_EXTRAS="-O2 -ggdb -gdwarf-4")) diff --git a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py index cbdf40e2416f7..19aad2ab1ec32 100644 --- a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py +++ b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py @@ -7,5 +7,5 @@ lldbinline.MakeInlineTest(__file__, globals(), name="UnambiguousTailCalls_V5", build_dict=dict(CFLAGS_EXTRAS="-O2 -glldb"), decorators=decor) lldbinline.MakeInlineTest(__file__, globals(), name="UnambiguousTailCalls_GNU", - build_dict=dict(CFLAGS_EXTRAS="-O2 -ggdb"), + build_dict=dict(CFLAGS_EXTRAS="-O2 -ggdb -gdwarf-4"), decorators=decor+[decorators.skipIf(debug_info="dsym")]) diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp index 29adff62cd1ee..0e29cb3e7f16e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp @@ -7,8 +7,8 @@ // RUN: -fdebug-types-section -gsplit-dwarf -c -o %t1.o -DONE // RUN: %clang %s -target x86_64-pc-linux -fno-standalone-debug -g \ // RUN: -fdebug-types-section -gsplit-dwarf -c -o %t2.o -DTWO -// RUN: llvm-dwarfdump %t1.dwo -debug-types | FileCheck --check-prefix=ONEUNIT %s -// RUN: llvm-dwarfdump %t2.dwo -debug-types | FileCheck --check-prefix=ONEUNIT %s +// RUN: llvm-dwarfdump %t1.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s +// RUN: llvm-dwarfdump %t2.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s // RUN: ld.lld %t1.o %t2.o -o %t // RUN: %lldb %t -o "target var a b **b.a" -b | FileCheck %s From 1f4a0531b3fdb9b4747c155805393a91926fe058 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Sun, 23 Jan 2022 22:01:48 -0800 Subject: [PATCH 327/946] [TSan] Mark test unsupported on Darwin --- compiler-rt/test/tsan/vfork.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/test/tsan/vfork.cpp b/compiler-rt/test/tsan/vfork.cpp index 2d669b305a9de..1e34a568fb5a8 100644 --- a/compiler-rt/test/tsan/vfork.cpp +++ b/compiler-rt/test/tsan/vfork.cpp @@ -1,4 +1,6 @@ // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s +// UNSUPPORTED: ios + #include #include #include From e29d8fb16978c463c7ea08cb255f5a97eca16d36 Mon Sep 17 00:00:00 2001 From: Wu Xinlong <821408745@qq.com> Date: Mon, 24 Jan 2022 12:00:09 +0800 Subject: [PATCH 328/946] [RISCV] Initially support the K-extension instructions on the LLVM MC layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit is currently implementing supports for scalar cryptography extension for LLVM according to version v1.0.0 of [K Ext specification](https://github.com/riscv/riscv-crypto/releases)(scala crypto has been ratified already). Currently, we are implementing the MC (Machine Code) layer of his extension and the majority of work is done under `llvm/lib/Target/RISCV` directory. There are also some test files in `llvm/test/MC/RISCV` directory. Remove the subfeature of Zbk* which conflict with b extensions to reduce the size of the patch. (Zbk* will be resubmit after this patch has been merged) **Co-author:**@ksyx & @VincentWu & @lihongliang & @achieveartificialintelligence Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D98136 --- llvm/lib/Support/RISCVISAInfo.cpp | 16 +++ .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 13 ++ .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 1 + llvm/lib/Target/RISCV/RISCV.td | 78 +++++++++++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 + llvm/lib/Target/RISCV/RISCVInstrInfo.td | 1 + llvm/lib/Target/RISCV/RISCVInstrInfoZk.td | 123 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVSchedRocket.td | 5 +- llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 4 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 16 +++ llvm/lib/Target/RISCV/RISCVSystemOperands.td | 6 + llvm/test/CodeGen/RISCV/attributes.ll | 41 +++++- llvm/test/MC/RISCV/attribute-arch.s | 30 +++++ llvm/test/MC/RISCV/rv32zknd-only-invalid.s | 17 +++ llvm/test/MC/RISCV/rv32zknd-only-valid.s | 13 ++ llvm/test/MC/RISCV/rv32zkne-only-invalid.s | 17 +++ llvm/test/MC/RISCV/rv32zkne-only-valid.s | 13 ++ llvm/test/MC/RISCV/rv32zknh-only-valid.s | 29 +++++ llvm/test/MC/RISCV/rv32zknh-valid.s | 26 ++++ llvm/test/MC/RISCV/rv32zksed-invalid.s | 13 ++ llvm/test/MC/RISCV/rv32zksed-valid.s | 18 +++ llvm/test/MC/RISCV/rv32zksh-valid.s | 18 +++ llvm/test/MC/RISCV/rv64zknd-only-valid.s | 25 ++++ llvm/test/MC/RISCV/rv64zkne-only-invalid.s | 17 +++ llvm/test/MC/RISCV/rv64zkne-only-valid.s | 21 +++ llvm/test/MC/RISCV/rv64zknh-only-valid.s | 21 +++ llvm/test/MC/RISCV/rv64zksed-invalid.s | 13 ++ llvm/test/MC/RISCV/rvk-user-csr-name.s | 29 +++++ 28 files changed, 624 insertions(+), 3 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoZk.td create mode 100644 llvm/test/MC/RISCV/rv32zknd-only-invalid.s create mode 100644 llvm/test/MC/RISCV/rv32zknd-only-valid.s create mode 100644 llvm/test/MC/RISCV/rv32zkne-only-invalid.s create mode 100644 llvm/test/MC/RISCV/rv32zkne-only-valid.s create mode 100644 llvm/test/MC/RISCV/rv32zknh-only-valid.s create mode 100644 llvm/test/MC/RISCV/rv32zknh-valid.s create mode 100644 llvm/test/MC/RISCV/rv32zksed-invalid.s create mode 100644 llvm/test/MC/RISCV/rv32zksed-valid.s create mode 100644 llvm/test/MC/RISCV/rv32zksh-valid.s create mode 100644 llvm/test/MC/RISCV/rv64zknd-only-valid.s create mode 100644 llvm/test/MC/RISCV/rv64zkne-only-invalid.s create mode 100644 llvm/test/MC/RISCV/rv64zkne-only-valid.s create mode 100644 llvm/test/MC/RISCV/rv64zknh-only-valid.s create mode 100644 llvm/test/MC/RISCV/rv64zksed-invalid.s create mode 100644 llvm/test/MC/RISCV/rvk-user-csr-name.s diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index f8a17f7440f6a..c34817920e1bc 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -58,6 +58,16 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zbkb", RISCVExtensionVersion{1, 0}}, {"zbkc", RISCVExtensionVersion{1, 0}}, + {"zknd", RISCVExtensionVersion{1, 0}}, + {"zkne", RISCVExtensionVersion{1, 0}}, + {"zknh", RISCVExtensionVersion{1, 0}}, + {"zksed", RISCVExtensionVersion{1, 0}}, + {"zksh", RISCVExtensionVersion{1, 0}}, + {"zkr", RISCVExtensionVersion{1, 0}}, + {"zkn", RISCVExtensionVersion{1, 0}}, + {"zks", RISCVExtensionVersion{1, 0}}, + {"zkt", RISCVExtensionVersion{1, 0}}, + {"zk", RISCVExtensionVersion{1, 0}}, {"v", RISCVExtensionVersion{1, 0}}, {"zvl32b", RISCVExtensionVersion{1, 0}}, @@ -751,6 +761,9 @@ static const char *ImpliedExtsZvl512b[] = {"zvl256b"}; static const char *ImpliedExtsZvl256b[] = {"zvl128b"}; static const char *ImpliedExtsZvl128b[] = {"zvl64b"}; static const char *ImpliedExtsZvl64b[] = {"zvl32b"}; +static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"}; +static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zkne", "zknd", "zknh"}; +static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zksed", "zksh"}; struct ImpliedExtsEntry { StringLiteral Name; @@ -766,6 +779,9 @@ struct ImpliedExtsEntry { static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"v"}, {ImpliedExtsV}}, {{"zfh"}, {ImpliedExtsZfh}}, + {{"zk"}, {ImpliedExtsZk}}, + {{"zkn"}, {ImpliedExtsZkn}}, + {{"zks"}, {ImpliedExtsZks}}, {{"zve32f"}, {ImpliedExtsZve32f}}, {{"zve32x"}, {ImpliedExtsZve32x}}, {{"zve64d"}, {ImpliedExtsZve64d}}, diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 1c8ed0d60d8e8..a2ea34fe11c73 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -566,6 +566,16 @@ struct RISCVOperand : public MCParsedAsmOperand { return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } + bool isRnumArg() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + if (!isImm()) + return false; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && Imm >= INT64_C(0) && Imm <= INT64_C(10) && + VK == RISCVMCExpr::VK_RISCV_None; + } + bool isSImm5() const { if (!isImm()) return false; @@ -1240,6 +1250,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, (1 << 4), "immediate must be in the range"); } + case Match_InvalidRnumArg: { + return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10); + } } llvm_unreachable("Unknown match type detected!"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 9cfd36745f46f..72d91b1044d65 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -192,6 +192,7 @@ enum OperandType : unsigned { OPERAND_UIMM20, OPERAND_UIMMLOG2XLEN, OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN, + OPERAND_RVKRNUM, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index dea042348d4db..72caa88104e85 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -178,6 +178,84 @@ def HasStdExtZbcOrZbkc "'Zbc' (Carry-Less 'B' Instructions) or " "'Zbkc' (Carry-less multiply instructions for Cryptography)">; +def FeatureStdExtZknd + : SubtargetFeature<"zknd", "HasStdExtZknd", "true", + "'Zknd' (NIST Suite: AES Decryption)">; +def HasStdExtZknd : Predicate<"Subtarget->hasStdExtZknd()">, + AssemblerPredicate<(all_of FeatureStdExtZknd), + "'Zknd' (NIST Suite: AES Decryption)">; + +def FeatureStdExtZkne + : SubtargetFeature<"zkne", "HasStdExtZkne", "true", + "'Zkne' (NIST Suite: AES Encryption)">; +def HasStdExtZkne : Predicate<"Subtarget->hasStdExtZkne()">, + AssemblerPredicate<(all_of FeatureStdExtZkne), + "'Zkne' (NIST Suite: AES Encryption)">; + +// Some instructions belong to both Zknd and Zkne subextensions. +// They should be enabled if either has been specified. +def HasStdExtZkndOrZkne + : Predicate<"Subtarget->hasStdExtZknd() || Subtarget->hasStdExtZkne()">, + AssemblerPredicate<(any_of FeatureStdExtZknd, FeatureStdExtZkne), + "'Zknd' (NIST Suite: AES Decryption) or " + "'Zkne' (NIST Suite: AES Encryption)">; + +def FeatureStdExtZknh + : SubtargetFeature<"zknh", "HasStdExtZknh", "true", + "'Zknh' (NIST Suite: Hash Function Instructions)">; +def HasStdExtZknh : Predicate<"Subtarget->hasStdExtZknh()">, + AssemblerPredicate<(all_of FeatureStdExtZknh), + "'Zknh' (NIST Suite: Hash Function Instructions)">; + +def FeatureStdExtZksed + : SubtargetFeature<"zksed", "HasStdExtZksed", "true", + "'Zksed' (ShangMi Suite: SM4 Block Cipher Instructions)">; +def HasStdExtZksed : Predicate<"Subtarget->hasStdExtZksed()">, + AssemblerPredicate<(all_of FeatureStdExtZksed), + "'Zksed' (ShangMi Suite: SM4 Block Cipher Instructions)">; + +def FeatureStdExtZksh + : SubtargetFeature<"zksh", "HasStdExtZksh", "true", + "'Zksh' (ShangMi Suite: SM3 Hash Function Instructions)">; +def HasStdExtZksh : Predicate<"Subtarget->hasStdExtZksh()">, + AssemblerPredicate<(all_of FeatureStdExtZksh), + "'Zksh' (ShangMi Suite: SM3 Hash Function Instructions)">; + +def FeatureStdExtZkr + : SubtargetFeature<"zkr", "HasStdExtZkr", "true", + "'Zkr' (Entropy Source Extension)">; +def HasStdExtZkr : Predicate<"Subtarget->hasStdExtZkr()">, + AssemblerPredicate<(all_of FeatureStdExtZkr), + "'Zkr' (Entropy Source Extension)">; + +def FeatureStdExtZkn + : SubtargetFeature<"zkn", "HasStdExtZkn", "true", + "'Zkn' (NIST Algorithm Suite)", + [FeatureStdExtZbkb, + FeatureStdExtZbkc, + FeatureStdExtZkne, + FeatureStdExtZknd, + FeatureStdExtZknh]>; + +def FeatureStdExtZks + : SubtargetFeature<"zks", "HasStdExtZks", "true", + "'Zks' (ShangMi Algorithm Suite)", + [FeatureStdExtZbkb, + FeatureStdExtZbkc, + FeatureStdExtZksed, + FeatureStdExtZksh]>; + +def FeatureStdExtZkt + : SubtargetFeature<"zkt", "HasStdExtZkt", "true", + "'Zkt' (Data Independent Execution Latency)">; + +def FeatureStdExtZk + : SubtargetFeature<"zk", "HasStdExtZk", "true", + "'Zk' (Standard scalar cryptography extension)", + [FeatureStdExtZkn, + FeatureStdExtZkr, + FeatureStdExtZkt]>; + def FeatureNoRVCHints : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false", "Disable RVC Hint Instructions.">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index a4e752b7e8839..2ab9ab653328d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1146,6 +1146,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, else Ok = isUInt<5>(Imm); break; + case RISCVOp::OPERAND_RVKRNUM: + Ok = Imm >= 0 && Imm <= 10; + break; } if (!Ok) { ErrInfo = "Invalid immediate"; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index dd1627231db4c..64cd89cda06a8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1516,5 +1516,6 @@ include "RISCVInstrInfoF.td" include "RISCVInstrInfoD.td" include "RISCVInstrInfoC.td" include "RISCVInstrInfoZb.td" +include "RISCVInstrInfoZk.td" include "RISCVInstrInfoV.td" include "RISCVInstrInfoZfh.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td new file mode 100644 index 0000000000000..52a29526a541a --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td @@ -0,0 +1,123 @@ +//===- RISCVInstrInfoZk.td - RISC-V Scalar Crypto instructions - tablegen -*===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard 'Zk', +// Scalar Cryptography Instructions extension, version 1.0. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +def RnumArg : AsmOperandClass { + let Name = "RnumArg"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidRnumArg"; +} + +def rnum : Operand, ImmLeaf= 0 && Imm <= 10);}]> { + let ParserMatchClass = RnumArg; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<4>"; + let OperandType = "OPERAND_RVKRNUM"; + let OperandNamespace = "RISCVOp"; +} + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVKUnary imm12_in, bits<3> funct3, string opcodestr> + : RVInstI{ + let imm12 = imm12_in; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVKByteSelect funct5, string opcodestr> + : RVInstR<{0b00, funct5}, 0b000, OPC_OP, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, uimm2:$bs), + opcodestr, "$rd, $rs1, $rs2, $bs">{ + bits<2> bs; + let Inst{31-30} = bs; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVKUnary_rnum funct7, bits<3> funct3, string opcodestr> + : RVInstI{ + bits<4> rnum; + let Inst{31-25} = funct7; + let Inst{24} = 1; + let Inst{23-20} = rnum; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// +let Predicates = [HasStdExtZknd, IsRV32] in { +def AES32DSI : RVKByteSelect<0b10101, "aes32dsi">; +def AES32DSMI : RVKByteSelect<0b10111, "aes32dsmi">; +} // Predicates = [HasStdExtZknd, IsRV32] + +let Predicates = [HasStdExtZknd, IsRV64] in { +def AES64DS : ALU_rr<0b0011101, 0b000, "aes64ds">; +def AES64DSM : ALU_rr<0b0011111, 0b000, "aes64dsm">; + +def AES64IM : RVKUnary<0b001100000000, 0b001, "aes64im">; +} // Predicates = [HasStdExtZknd, IsRV64] + +let Predicates = [HasStdExtZkndOrZkne, IsRV64] in { +def AES64KS2 : ALU_rr<0b0111111, 0b000, "aes64ks2">; + +def AES64KS1I : RVKUnary_rnum<0b0011000, 0b001, "aes64ks1i">; +} // Predicates = [HasStdExtZkndOrZkne, IsRV64] + +let Predicates = [HasStdExtZkne, IsRV32] in { +def AES32ESI : RVKByteSelect<0b10001, "aes32esi">; +def AES32ESMI : RVKByteSelect<0b10011, "aes32esmi">; +} // Predicates = [HasStdExtZkne, IsRV32] + +let Predicates = [HasStdExtZkne, IsRV64] in { +def AES64ES : ALU_rr<0b0011001, 0b000, "aes64es">; +def AES64ESM : ALU_rr<0b0011011, 0b000, "aes64esm">; +} // Predicates = [HasStdExtZkne, IsRV64] + +let Predicates = [HasStdExtZknh] in { +def SHA256SIG0 : RVKUnary<0b000100000010, 0b001, "sha256sig0">; +def SHA256SIG1 : RVKUnary<0b000100000011, 0b001, "sha256sig1">; +def SHA256SUM0 : RVKUnary<0b000100000000, 0b001, "sha256sum0">; +def SHA256SUM1 : RVKUnary<0b000100000001, 0b001, "sha256sum1">; +} // Predicates = [HasStdExtZknh] + +let Predicates = [HasStdExtZknh, IsRV32] in { +def SHA512SIG0H : ALU_rr<0b0101110, 0b000, "sha512sig0h">; +def SHA512SIG0L : ALU_rr<0b0101010, 0b000, "sha512sig0l">; +def SHA512SIG1H : ALU_rr<0b0101111, 0b000, "sha512sig1h">; +def SHA512SIG1L : ALU_rr<0b0101011, 0b000, "sha512sig1l">; +def SHA512SUM0R : ALU_rr<0b0101000, 0b000, "sha512sum0r">; +def SHA512SUM1R : ALU_rr<0b0101001, 0b000, "sha512sum1r">; +} // [HasStdExtZknh, IsRV32] + +let Predicates = [HasStdExtZknh, IsRV64] in { +def SHA512SIG0 : RVKUnary<0b000100000110, 0b001, "sha512sig0">; +def SHA512SIG1 : RVKUnary<0b000100000111, 0b001, "sha512sig1">; +def SHA512SUM0 : RVKUnary<0b000100000100, 0b001, "sha512sum0">; +def SHA512SUM1 : RVKUnary<0b000100000101, 0b001, "sha512sum1">; +} // Predicates = [HasStdExtZknh, IsRV64] + +let Predicates = [HasStdExtZksed] in { +def SM4ED : RVKByteSelect<0b11000, "sm4ed">; +def SM4KS : RVKByteSelect<0b11010, "sm4ks">; +} // Predicates = [HasStdExtZksed] + +let Predicates = [HasStdExtZksh] in { +def SM3P0 : RVKUnary<0b000100001000, 0b001, "sm3p0">; +def SM3P1 : RVKUnary<0b000100001001, 0b001, "sm3p1">; +} // Predicates = [HasStdExtZksh] diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 783e65c1aa185..92dd3175a460a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -17,7 +17,10 @@ def RocketModel : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = false; - let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions, HasVInstructionsI64]; + let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZknd, + HasStdExtZkne, HasStdExtZknh, HasStdExtZksed, + HasStdExtZksh, HasStdExtZkr, HasVInstructions, + HasVInstructionsI64]; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index d164514ce70f0..e5eaad2a6dd08 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -15,7 +15,9 @@ def SiFive7Model : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = 0; - let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions]; + let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZknd, + HasStdExtZkne, HasStdExtZknh, HasStdExtZksed, + HasStdExtZksh, HasStdExtZkr, HasVInstructions]; } // The SiFive7 microarchitecture has two pipelines: A and B. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index bacb8fae37941..ac1df37345859 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -85,6 +85,16 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasStdExtZfh = false; bool HasStdExtZbkb = false; bool HasStdExtZbkc = false; + bool HasStdExtZknd = false; + bool HasStdExtZkne = false; + bool HasStdExtZknh = false; + bool HasStdExtZksed = false; + bool HasStdExtZksh = false; + bool HasStdExtZkr = false; + bool HasStdExtZkn = false; + bool HasStdExtZks = false; + bool HasStdExtZkt = false; + bool HasStdExtZk = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; @@ -160,6 +170,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool hasStdExtZfh() const { return HasStdExtZfh; } bool hasStdExtZbkb() const { return HasStdExtZbkb; } bool hasStdExtZbkc() const { return HasStdExtZbkc; } + bool hasStdExtZknd() const { return HasStdExtZknd; } + bool hasStdExtZkne() const { return HasStdExtZkne; } + bool hasStdExtZknh() const { return HasStdExtZknh; } + bool hasStdExtZksed() const { return HasStdExtZksed; } + bool hasStdExtZksh() const { return HasStdExtZksh; } + bool hasStdExtZkr() const { return HasStdExtZkr; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 3a3d5ba732b60..b9aa25b321b08 100644 --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -372,3 +372,9 @@ foreach i = 0...3 in { let isRV32Only = 1 in def : SysReg<"hstateen"#i#"h", !add(0x61C, i)>; } + +//===----------------------------------------------- +// Entropy Source CSR +//===----------------------------------------------- + +def SEED : SysReg<"seed", 0x015>; diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index d37d86fc2f04d..3f7ca36844cc5 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -21,6 +21,16 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s ; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s ; RUN: llc -mtriple=riscv32 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV32ZBKC %s +; RUN: llc -mtriple=riscv32 -mattr=+zknd %s -o - | FileCheck --check-prefix=RV32ZKND %s +; RUN: llc -mtriple=riscv32 -mattr=+zkne %s -o - | FileCheck --check-prefix=RV32ZKNE %s +; RUN: llc -mtriple=riscv32 -mattr=+zknh %s -o - | FileCheck --check-prefix=RV32ZKNH %s +; RUN: llc -mtriple=riscv32 -mattr=+zksed %s -o - | FileCheck --check-prefix=RV32ZKSED %s +; RUN: llc -mtriple=riscv32 -mattr=+zksh %s -o - | FileCheck --check-prefix=RV32ZKSH %s +; RUN: llc -mtriple=riscv32 -mattr=+zkr %s -o - | FileCheck --check-prefix=RV32ZKR %s +; RUN: llc -mtriple=riscv32 -mattr=+zkn %s -o - | FileCheck --check-prefix=RV32ZKN %s +; RUN: llc -mtriple=riscv32 -mattr=+zks %s -o - | FileCheck --check-prefix=RV32ZKS %s +; RUN: llc -mtriple=riscv32 -mattr=+zkt %s -o - | FileCheck --check-prefix=RV32ZKT %s +; RUN: llc -mtriple=riscv32 -mattr=+zk %s -o - | FileCheck --check-prefix=RV32ZK %s ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s @@ -42,7 +52,16 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s ; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s ; RUN: llc -mtriple=riscv64 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV64ZBKC %s - +; RUN: llc -mtriple=riscv64 -mattr=+zknd %s -o - | FileCheck --check-prefix=RV64ZKND %s +; RUN: llc -mtriple=riscv64 -mattr=+zkne %s -o - | FileCheck --check-prefix=RV64ZKNE %s +; RUN: llc -mtriple=riscv64 -mattr=+zknh %s -o - | FileCheck --check-prefix=RV64ZKNH %s +; RUN: llc -mtriple=riscv64 -mattr=+zksed %s -o - | FileCheck --check-prefix=RV64ZKSED %s +; RUN: llc -mtriple=riscv64 -mattr=+zksh %s -o - | FileCheck --check-prefix=RV64ZKSH %s +; RUN: llc -mtriple=riscv64 -mattr=+zkr %s -o - | FileCheck --check-prefix=RV64ZKR %s +; RUN: llc -mtriple=riscv64 -mattr=+zkn %s -o - | FileCheck --check-prefix=RV64ZKN %s +; RUN: llc -mtriple=riscv64 -mattr=+zks %s -o - | FileCheck --check-prefix=RV64ZKS %s +; RUN: llc -mtriple=riscv64 -mattr=+zkt %s -o - | FileCheck --check-prefix=RV64ZKT %s +; RUN: llc -mtriple=riscv64 -mattr=+zk %s -o - | FileCheck --check-prefix=RV64ZK %s ; RV32M: .attribute 5, "rv32i2p0_m2p0" ; RV32A: .attribute 5, "rv32i2p0_a2p0" @@ -65,6 +84,16 @@ ; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0" ; RV32ZBKC: .attribute 5, "rv32i2p0_zbkc1p0" +; RV32ZKND: .attribute 5, "rv32i2p0_zknd1p0" +; RV32ZKNE: .attribute 5, "rv32i2p0_zkne1p0" +; RV32ZKNH: .attribute 5, "rv32i2p0_zknh1p0" +; RV32ZKSED: .attribute 5, "rv32i2p0_zksed1p0" +; RV32ZKSH: .attribute 5, "rv32i2p0_zksh1p0" +; RV32ZKR: .attribute 5, "rv32i2p0_zkr1p0" +; RV32ZKN: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0" +; RV32ZKS: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0" +; RV32ZKT: .attribute 5, "rv32i2p0_zkt1p0" +; RV32ZK: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64A: .attribute 5, "rv64i2p0_a2p0" @@ -87,6 +116,16 @@ ; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0" ; RV64ZBKC: .attribute 5, "rv64i2p0_zbkc1p0" +; RV64ZKND: .attribute 5, "rv64i2p0_zknd1p0" +; RV64ZKNE: .attribute 5, "rv64i2p0_zkne1p0" +; RV64ZKNH: .attribute 5, "rv64i2p0_zknh1p0" +; RV64ZKSED: .attribute 5, "rv64i2p0_zksed1p0" +; RV64ZKSH: .attribute 5, "rv64i2p0_zksh1p0" +; RV64ZKR: .attribute 5, "rv64i2p0_zkr1p0" +; RV64ZKN: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0" +; RV64ZKS: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0" +; RV64ZKT: .attribute 5, "rv64i2p0_zkt1p0" +; RV64ZK: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0" define i32 @addi(i32 %a) { %1 = add i32 %a, 1 diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 15ee933dcdd1d..27dc70a7b6f75 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -130,3 +130,33 @@ .attribute arch, "rv32i_zbkc1p0" # CHECK: attribute 5, "rv32i2p0_zbkc1p0" + +.attribute arch, "rv32i_zknd1p0" +# CHECK: attribute 5, "rv32i2p0_zknd1p0" + +.attribute arch, "rv32i_zkne1p0" +# CHECK: attribute 5, "rv32i2p0_zkne1p0" + +.attribute arch, "rv32i_zknh1p0" +# CHECK: attribute 5, "rv32i2p0_zknh1p0" + +.attribute arch, "rv32i_zksed1p0" +# CHECK: attribute 5, "rv32i2p0_zksed1p0" + +.attribute arch, "rv32i_zksh1p0" +# CHECK: attribute 5, "rv32i2p0_zksh1p0" + +.attribute arch, "rv32i_zkr1p0" +# CHECK: attribute 5, "rv32i2p0_zkr1p0" + +.attribute arch, "rv32i_zkn1p0" +# CHECK: attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0" + +.attribute arch, "rv32i_zks1p0" +# CHECK: attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0" + +.attribute arch, "rv32i_zkt1p0" +# CHECK: attribute 5, "rv32i2p0_zkt1p0" + +.attribute arch, "rv32i_zk1p0" +# CHECK: attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0" diff --git a/llvm/test/MC/RISCV/rv32zknd-only-invalid.s b/llvm/test/MC/RISCV/rv32zknd-only-invalid.s new file mode 100644 index 0000000000000..05ae48491a037 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zknd-only-invalid.s @@ -0,0 +1,17 @@ +# With Zk extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zk < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zkn extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zkn < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zknd extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zknd < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# CHECK-ERROR: immediate must be an integer in the range [0, 3] +aes32dsmi a0, a1, a2, 8 + +# CHECK-ERROR: immediate must be an integer in the range [0, 3] +aes32dsi a0, a1, a2, 8 diff --git a/llvm/test/MC/RISCV/rv32zknd-only-valid.s b/llvm/test/MC/RISCV/rv32zknd-only-valid.s new file mode 100644 index 0000000000000..db139595879e6 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zknd-only-valid.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zknd -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zknd < %s \ +# RUN: | llvm-objdump --mattr=+zknd -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: aes32dsi a0, a1, a2, 3 +# CHECK-ASM: [0x33,0x85,0xc5,0xea] +aes32dsi a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: aes32dsmi a0, a1, a2, 3 +# CHECK-ASM: [0x33,0x85,0xc5,0xee] +aes32dsmi a0, a1, a2, 3 diff --git a/llvm/test/MC/RISCV/rv32zkne-only-invalid.s b/llvm/test/MC/RISCV/rv32zkne-only-invalid.s new file mode 100644 index 0000000000000..9ace21cae5eff --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zkne-only-invalid.s @@ -0,0 +1,17 @@ +# With Zk extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zk < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zkn extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zkn < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zkne extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zkne < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# CHECK-ERROR: immediate must be an integer in the range [0, 3] +aes32esmi a0, a1, a2, 8 + +# CHECK-ERROR: immediate must be an integer in the range [0, 3] +aes32esi a0, a1, a2, 8 diff --git a/llvm/test/MC/RISCV/rv32zkne-only-valid.s b/llvm/test/MC/RISCV/rv32zkne-only-valid.s new file mode 100644 index 0000000000000..de99aedd5ebdf --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zkne-only-valid.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zkne -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zkne < %s \ +# RUN: | llvm-objdump --mattr=+zkne -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: aes32esi a0, a1, a2, 3 +# CHECK-ASM: [0x33,0x85,0xc5,0xe2] +aes32esi a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: aes32esmi a0, a1, a2, 3 +# CHECK-ASM: [0x33,0x85,0xc5,0xe6] +aes32esmi a0, a1, a2, 3 diff --git a/llvm/test/MC/RISCV/rv32zknh-only-valid.s b/llvm/test/MC/RISCV/rv32zknh-only-valid.s new file mode 100644 index 0000000000000..d48dd4949dd46 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zknh-only-valid.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zknh -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zknh < %s \ +# RUN: | llvm-objdump --mattr=+zknh -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: sha512sig0h a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x5c] +sha512sig0h a0, a1, a2 + +# CHECK-ASM-AND-OBJ: sha512sig1h a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x5e] +sha512sig1h a0, a1, a2 + +# CHECK-ASM-AND-OBJ: sha512sig0l a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x54] +sha512sig0l a0, a1, a2 + +# CHECK-ASM-AND-OBJ: sha512sig1l a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x56] +sha512sig1l a0, a1, a2 + +# CHECK-ASM-AND-OBJ: sha512sum0r a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x50] +sha512sum0r a0, a1, a2 + +# CHECK-ASM-AND-OBJ: sha512sum1r a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x52] +sha512sum1r a0, a1, a2 diff --git a/llvm/test/MC/RISCV/rv32zknh-valid.s b/llvm/test/MC/RISCV/rv32zknh-valid.s new file mode 100644 index 0000000000000..e1dbc0feea34a --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zknh-valid.s @@ -0,0 +1,26 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zknh -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zknh -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zknh < %s \ +# RUN: | llvm-objdump --mattr=+zknh -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zknh < %s \ +# RUN: | llvm-objdump --mattr=+zknh -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: sha256sig0 a0, a1 +# CHECK-ASM: [0x13,0x95,0x25,0x10] +sha256sig0 a0, a1 + +# CHECK-ASM-AND-OBJ: sha256sig1 a0, a1 +# CHECK-ASM: [0x13,0x95,0x35,0x10] +sha256sig1 a0, a1 + +# CHECK-ASM-AND-OBJ: sha256sum0 a0, a1 +# CHECK-ASM: [0x13,0x95,0x05,0x10] +sha256sum0 a0, a1 + +# CHECK-ASM-AND-OBJ: sha256sum1 a0, a1 +# CHECK-ASM: [0x13,0x95,0x15,0x10] +sha256sum1 a0, a1 diff --git a/llvm/test/MC/RISCV/rv32zksed-invalid.s b/llvm/test/MC/RISCV/rv32zksed-invalid.s new file mode 100644 index 0000000000000..feb9bc09b60bf --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zksed-invalid.s @@ -0,0 +1,13 @@ +# With Zks extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zks < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zksed extension: +# RUN: not llvm-mc -triple=riscv32 -mattr=+zksed < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# CHECK-ERROR: immediate must be an integer in the range [0, 3] +sm4ed a0, a1, a2, 8 + +# CHECK-ERROR: immediate must be an integer in the range [0, 3] +sm4ks a0, a1, a2, 8 diff --git a/llvm/test/MC/RISCV/rv32zksed-valid.s b/llvm/test/MC/RISCV/rv32zksed-valid.s new file mode 100644 index 0000000000000..25d618c082e6e --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zksed-valid.s @@ -0,0 +1,18 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zksed -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zksed -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zksed < %s \ +# RUN: | llvm-objdump --mattr=+zksed -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zksed < %s \ +# RUN: | llvm-objdump --mattr=+zksed -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: sm4ed a0, a1, a2, 3 +# CHECK-ASM: [0x33,0x85,0xc5,0xf0] +sm4ed a0, a1, a2, 3 + +# CHECK-ASM-AND-OBJ: sm4ks a0, a1, a2, 3 +# CHECK-ASM: [0x33,0x85,0xc5,0xf4] +sm4ks a0, a1, a2, 3 diff --git a/llvm/test/MC/RISCV/rv32zksh-valid.s b/llvm/test/MC/RISCV/rv32zksh-valid.s new file mode 100644 index 0000000000000..48ae6652c850e --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zksh-valid.s @@ -0,0 +1,18 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zksh -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zksh -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zksh < %s \ +# RUN: | llvm-objdump --mattr=+zksh -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zksh < %s \ +# RUN: | llvm-objdump --mattr=+zksh -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: sm3p0 a0, a1 +# CHECK-ASM: [0x13,0x95,0x85,0x10] +sm3p0 a0, a1 + +# CHECK-ASM-AND-OBJ: sm3p1 a0, a1 +# CHECK-ASM: [0x13,0x95,0x95,0x10] +sm3p1 a0, a1 diff --git a/llvm/test/MC/RISCV/rv64zknd-only-valid.s b/llvm/test/MC/RISCV/rv64zknd-only-valid.s new file mode 100644 index 0000000000000..03656136d20ae --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zknd-only-valid.s @@ -0,0 +1,25 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zknd -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zknd < %s \ +# RUN: | llvm-objdump --mattr=+zknd -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: aes64ds a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x3a] +aes64ds a0, a1, a2 + +# CHECK-ASM-AND-OBJ: aes64dsm a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x3e] +aes64dsm a0, a1, a2 + +# CHECK-ASM-AND-OBJ: aes64im a0, a1 +# CHECK-ASM: [0x13,0x95,0x05,0x30] +aes64im a0, a1 + +# CHECK-ASM-AND-OBJ: aes64ks1i a0, a1, 5 +# CHECK-ASM: [0x13,0x95,0x55,0x31] +aes64ks1i a0, a1, 5 + +# CHECK-ASM-AND-OBJ: aes64ks2 a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x7e] +aes64ks2 a0, a1, a2 diff --git a/llvm/test/MC/RISCV/rv64zkne-only-invalid.s b/llvm/test/MC/RISCV/rv64zkne-only-invalid.s new file mode 100644 index 0000000000000..5a7331fa0a9bb --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zkne-only-invalid.s @@ -0,0 +1,17 @@ +# With Zk extension: +# RUN: not llvm-mc -triple=riscv64 -mattr=+zk < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zkn extension: +# RUN: not llvm-mc -triple=riscv64 -mattr=+zkn < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zkne extension: +# RUN: not llvm-mc -triple=riscv64 -mattr=+zkne < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# CHECK-ERROR: immediate must be an integer in the range [0, 10] +aes64ks1i a0, a1, 11 + +# CHECK-ERROR: immediate must be an integer in the range [0, 10] +aes64ks1i a0, a1, -1 diff --git a/llvm/test/MC/RISCV/rv64zkne-only-valid.s b/llvm/test/MC/RISCV/rv64zkne-only-valid.s new file mode 100644 index 0000000000000..78950b85f51b8 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zkne-only-valid.s @@ -0,0 +1,21 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zkne -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zkne < %s \ +# RUN: | llvm-objdump --mattr=+zkne -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: aes64es a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x32] +aes64es a0, a1, a2 + +# CHECK-ASM-AND-OBJ: aes64esm a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x36] +aes64esm a0, a1, a2 + +# CHECK-ASM-AND-OBJ: aes64ks1i a0, a1, 5 +# CHECK-ASM: [0x13,0x95,0x55,0x31] +aes64ks1i a0, a1, 5 + +# CHECK-ASM-AND-OBJ: aes64ks2 a0, a1, a2 +# CHECK-ASM: [0x33,0x85,0xc5,0x7e] +aes64ks2 a0, a1, a2 diff --git a/llvm/test/MC/RISCV/rv64zknh-only-valid.s b/llvm/test/MC/RISCV/rv64zknh-only-valid.s new file mode 100644 index 0000000000000..9478b6004a4a5 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zknh-only-valid.s @@ -0,0 +1,21 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zknh -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zknh < %s \ +# RUN: | llvm-objdump --mattr=+zknh -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: sha512sig0 a0, a1 +# CHECK-ASM: [0x13,0x95,0x65,0x10] +sha512sig0 a0, a1 + +# CHECK-ASM-AND-OBJ: sha512sig1 a0, a1 +# CHECK-ASM: [0x13,0x95,0x75,0x10] +sha512sig1 a0, a1 + +# CHECK-ASM-AND-OBJ: sha512sum0 a0, a1 +# CHECK-ASM: [0x13,0x95,0x45,0x10] +sha512sum0 a0, a1 + +# CHECK-ASM-AND-OBJ: sha512sum1 a0, a1 +# CHECK-ASM: [0x13,0x95,0x55,0x10] +sha512sum1 a0, a1 diff --git a/llvm/test/MC/RISCV/rv64zksed-invalid.s b/llvm/test/MC/RISCV/rv64zksed-invalid.s new file mode 100644 index 0000000000000..2c55ac461f51f --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zksed-invalid.s @@ -0,0 +1,13 @@ +# With Zks extension: +# RUN: not llvm-mc -triple=riscv64 -mattr=+zks < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +# With Zksed extension: +# RUN: not llvm-mc -triple=riscv64 -mattr=+zksed < %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +sm4ed a0, a1, a2, 8 +# CHECK-ERROR: immediate must be an integer in the range [0, 3] + +sm4ks a0, a1, a2, 8 +# CHECK-ERROR: immediate must be an integer in the range [0, 3] diff --git a/llvm/test/MC/RISCV/rvk-user-csr-name.s b/llvm/test/MC/RISCV/rvk-user-csr-name.s new file mode 100644 index 0000000000000..cacadf794d95b --- /dev/null +++ b/llvm/test/MC/RISCV/rvk-user-csr-name.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc %s -triple=riscv32 -riscv-no-aliases -mattr=+f -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-ENC %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zkr < %s \ +# RUN: | llvm-objdump -d --mattr=+zkr - \ +# RUN: | FileCheck -check-prefix=CHECK-INST-ALIAS %s +# +# RUN: llvm-mc %s -triple=riscv64 -riscv-no-aliases -mattr=+f -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-INST,CHECK-ENC %s +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zkr < %s \ +# RUN: | llvm-objdump -d --mattr=+zkr - \ +# RUN: | FileCheck -check-prefix=CHECK-INST-ALIAS %s + +################################## +# Entropy Source CSR +################################## + +# seed +# name +# CHECK-INST: csrrs t1, seed, zero +# CHECK-ENC: encoding: [0x73,0x23,0x50,0x01] +# CHECK-INST-ALIAS: csrr t1, seed +# uimm12 +# CHECK-INST: csrrs t2, seed, zero +# CHECK-ENC: encoding: [0xf3,0x23,0x50,0x01] +# CHECK-INST-ALIAS: csrr t2, seed +# name +csrrs t1, seed, zero +# uimm12 +csrrs t2, 0x015, zero From bf039a8620f1779d02280cb0a33c4d818073623b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 23 Jan 2022 22:53:15 -0800 Subject: [PATCH 329/946] [Target] Use range-based for loops (NFC) --- llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp | 13 +++++-------- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 9 ++++----- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 5 ++--- llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 8 ++++---- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 11 +++++------ llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 12 +++++------- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 6 ++---- llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp | 8 ++++---- llvm/lib/Target/VE/LVLGen.cpp | 4 ++-- llvm/lib/Target/VE/VEMCInstLower.cpp | 3 +-- .../lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 4 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +++------ llvm/lib/Target/X86/X86PadShortFunction.cpp | 7 +++---- 13 files changed, 42 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp index fc0d5cc6fbfa8..eeedce2d99cb0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -57,12 +57,9 @@ bool NVPTXImageOptimizer::runOnFunction(Function &F) { InstrToDelete.clear(); // Look for call instructions in the function - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; - ++BI) { - for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); - I != E; ++I) { - Instruction &Instr = *I; - if (CallInst *CI = dyn_cast(I)) { + for (BasicBlock &BB : F) { + for (Instruction &Instr : BB) { + if (CallInst *CI = dyn_cast(&Instr)) { Function *CalledF = CI->getCalledFunction(); if (CalledF && CalledF->isIntrinsic()) { // This is an intrinsic function call, check if its an istypep @@ -84,8 +81,8 @@ bool NVPTXImageOptimizer::runOnFunction(Function &F) { } // Delete any istypep instances we replaced in the IR - for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i) - InstrToDelete[i]->eraseFromParent(); + for (Instruction *I : InstrToDelete) + I->eraseFromParent(); return Changed; } diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 6cf59d285e8d3..f655f25602bc3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -66,10 +66,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { getAnalysis().getTTI(F); // Collect all aggregate loads and mem* calls. - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - if (LoadInst *LI = dyn_cast(II)) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (LoadInst *LI = dyn_cast(&I)) { if (!LI->hasOneUse()) continue; @@ -81,7 +80,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { continue; AggrLoads.push_back(LI); } - } else if (MemIntrinsic *IntrCall = dyn_cast(II)) { + } else if (MemIntrinsic *IntrCall = dyn_cast(&I)) { // Convert intrinsic calls with variable size or with constant size // larger than the MaxAggrCopySize threshold. if (ConstantInt *LenCI = dyn_cast(IntrCall->getLength())) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 7edec82c6e067..eada872c2a7db 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2339,9 +2339,8 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI, Found = true; } } else if (MO.isRegMask()) { - for (TargetRegisterClass::iterator I = RC->begin(), - IE = RC->end(); I != IE; ++I) - if (MO.clobbersPhysReg(*I)) { + for (MCPhysReg R : *RC) + if (MO.clobbersPhysReg(R)) { Pred.push_back(MO); Found = true; } diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 0c920582843ad..e5fa02bc8ccf0 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -258,12 +258,12 @@ void PPCMIPeephole::UpdateTOCSaves( } bool Keep = true; - for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) { - MachineInstr *CurrInst = It->first; + for (auto &I : TOCSaves) { + MachineInstr *CurrInst = I.first; // If new instruction dominates an existing one, mark existing one as // redundant. - if (It->second && MDT->dominates(MI, CurrInst)) - It->second = false; + if (I.second && MDT->dominates(MI, CurrInst)) + I.second = false; // Check if the new instruction is redundant. if (MDT->dominates(CurrInst, MI)) { Keep = false; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 707c1396e5728..cc5738a5d7b63 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -374,11 +374,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, // clobbers ctr. auto asmClobbersCTR = [](InlineAsm *IA) { InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); - for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { - InlineAsm::ConstraintInfo &C = CIV[i]; + for (const InlineAsm::ConstraintInfo &C : CIV) { if (C.Type != InlineAsm::isInput) - for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) - if (StringRef(C.Codes[j]).equals_insensitive("{ctr}")) + for (const auto &Code : C.Codes) + if (StringRef(Code).equals_insensitive("{ctr}")) return true; } return false; @@ -1301,8 +1300,8 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) { // Process nested loops first. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo)) + for (Loop *I : *L) + if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo)) return false; // Stop search. HardwareLoopInfo HWLoopInfo(L); diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 0be35adc35c72..8a7d324ddfe15 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -297,18 +297,16 @@ namespace { // fma result. LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg); - for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end(); - AI != AE; ++AI) { + for (auto &AI : FMAInt) { // Don't add the segment that corresponds to the original copy. - if (AI->valno == AddendValNo) + if (AI.valno == AddendValNo) continue; VNInfo *NewFMAValNo = - NewFMAInt.getNextValue(AI->start, - LIS->getVNInfoAllocator()); + NewFMAInt.getNextValue(AI.start, LIS->getVNInfoAllocator()); - NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end, - NewFMAValNo)); + NewFMAInt.addSegment( + LiveInterval::Segment(AI.start, AI.end, NewFMAValNo)); } LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n'); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 881346bbe47ed..f10651d5c5d7e 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -8314,13 +8314,11 @@ MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( // Add FPR/VR clobbers. if (!NoFloat && (Control & 4) != 0) { if (Subtarget.hasVector()) { - for (int I = 0; I < 32; I++) { - unsigned Reg = SystemZMC::VR128Regs[I]; + for (unsigned Reg : SystemZMC::VR128Regs) { MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } else { - for (int I = 0; I < 16; I++) { - unsigned Reg = SystemZMC::FP64Regs[I]; + for (unsigned Reg : SystemZMC::FP64Regs) { MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp index 4b95d0d67389d..5a2cfc53da494 100644 --- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp +++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -170,15 +170,15 @@ bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB, MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB); RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end()); RestMBB->transferSuccessors(&MBB); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - RestMBB->addLiveIn(*I); + for (MCPhysReg R : LiveRegs) + RestMBB->addLiveIn(R); // Create a new block MoveMBB to hold the move instruction. MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB); MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB); MoveMBB->addLiveIn(SrcReg); - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MoveMBB->addLiveIn(*I); + for (MCPhysReg R : LiveRegs) + MoveMBB->addLiveIn(R); // At the end of MBB, create a conditional branch to RestMBB if the // condition is false, otherwise fall through to MoveMBB. diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp index c4588926af9ed..4db6a59284c27 100644 --- a/llvm/lib/Target/VE/LVLGen.cpp +++ b/llvm/lib/Target/VE/LVLGen.cpp @@ -125,8 +125,8 @@ bool LVLGen::runOnMachineFunction(MachineFunction &F) { TII = Subtarget.getInstrInfo(); TRI = Subtarget.getRegisterInfo(); - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - Changed |= runOnMachineBasicBlock(*FI); + for (MachineBasicBlock &MBB : F) + Changed |= runOnMachineBasicBlock(MBB); if (Changed) { LLVM_DEBUG(dbgs() << "\n"); diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp index bc5577ce4f978..57195f238cf6e 100644 --- a/llvm/lib/Target/VE/VEMCInstLower.cpp +++ b/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -78,8 +78,7 @@ void llvm::LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp = LowerOperand(MI, MO, AP); if (MCOp.isValid()) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 9da0a8129f230..4440bdc3d58f4 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -281,8 +281,8 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) { {codeview::RegisterId::AMD64_XMM31, X86::XMM31}, }; - for (unsigned I = 0; I < array_lengthof(RegMap); ++I) - MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast(RegMap[I].CVReg)); + for (const auto &I : RegMap) + MRI->mapLLVMRegToCVReg(I.Reg, static_cast(I.CVReg)); } MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index be85c116bb037..618b97a2e8dbd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5490,10 +5490,9 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, /// materialize the FP immediate as a load from a constant pool. bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { - for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { - if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) + for (const APFloat &FPImm : LegalFPImmediates) + if (Imm.bitwiseIsEqual(FPImm)) return true; - } return false; } @@ -33438,9 +33437,7 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, MachineBasicBlock *BB) { // Scan forward through BB for a use/def of EFLAGS. - for (MachineBasicBlock::iterator miI = std::next(Itr), miE = BB->end(); - miI != miE; ++miI) { - const MachineInstr& mi = *miI; + for (const MachineInstr &mi : llvm::make_range(std::next(Itr), BB->end())) { if (mi.readsRegister(X86::EFLAGS)) return true; // If we found a def, we can stop searching. diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp index 47ae517ae76d7..e92b1b002bb01 100644 --- a/llvm/lib/Target/X86/X86PadShortFunction.cpp +++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp @@ -129,10 +129,9 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; // Pad the identified basic blocks with NOOPs - for (DenseMap::iterator I = ReturnBBs.begin(); - I != ReturnBBs.end(); ++I) { - MachineBasicBlock *MBB = I->first; - unsigned Cycles = I->second; + for (const auto &ReturnBB : ReturnBBs) { + MachineBasicBlock *MBB = ReturnBB.first; + unsigned Cycles = ReturnBB.second; // Function::hasOptSize is already checked above. bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI); From ba16e3c31f66f02df08ec41394b765aa568a3107 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Fri, 21 Jan 2022 14:35:20 +0800 Subject: [PATCH 330/946] [RISCV] Decouple Zve* extensions and the V extension. According to the spec, there are some difference between V and Zve64d. For example, the vmulh integer multiply variants that return the high word of the product (vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx) are not included for EEW=64 in Zve64*, but V extension does support these instructions. So we should decouple Zve* extensions and the V extension. Differential Revision: https://reviews.llvm.org/D117854 --- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/Basic/Targets/RISCV.cpp | 2 +- clang/lib/Sema/SemaChecking.cpp | 48 ++++++++++++------- .../RISCV/rvb-intrinsics/riscv32-zbb-error.c | 2 +- .../CodeGen/RISCV/rvv-intrinsics/rvv-error.c | 18 +++++++ clang/utils/TableGen/RISCVVEmitter.cpp | 2 +- llvm/lib/Support/RISCVISAInfo.cpp | 17 ++++++- llvm/lib/Target/RISCV/RISCV.td | 8 ++-- llvm/lib/Target/RISCV/RISCVSubtarget.h | 16 +++++-- llvm/test/CodeGen/RISCV/attributes.ll | 8 ++-- llvm/test/MC/RISCV/attribute-arch.s | 26 +++++----- 11 files changed, 101 insertions(+), 48 deletions(-) create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 7fccdcaa9fc6a..db1047586a473 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11471,7 +11471,7 @@ def warn_tcb_enforcement_violation : Warning< // RISC-V builtin required extension warning def err_riscv_builtin_requires_extension : Error< - "builtin requires '%0' extension support to be enabled">; + "builtin requires at least one of the following extensions support to be enabled : %0">; def err_riscv_builtin_invalid_lmul : Error< "LMUL argument must be in the range [0,3] or [5,7]">; } // end of sema component. diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index dc4a451726bbe..0680cad5b07c5 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -188,7 +188,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, if (ISAInfo->hasExtension("c")) Builder.defineMacro("__riscv_compressed"); - if (ISAInfo->hasExtension("zve32x")) + if (ISAInfo->hasExtension("zve32x") || ISAInfo->hasExtension("v")) Builder.defineMacro("__riscv_vector"); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e2b78fa212b81..c8fb36b8311a4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3976,23 +3976,39 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, // Check if each required feature is included for (StringRef F : ReqFeatures) { - if (TI.hasFeature(F)) - continue; - - // If the feature is 64bit, alter the string so it will print better in - // the diagnostic. - if (F == "64bit") - F = "RV64"; - - // Convert features like "zbr" and "experimental-zbr" to "Zbr". - F.consume_front("experimental-"); - std::string FeatureStr = F.str(); - FeatureStr[0] = std::toupper(FeatureStr[0]); + SmallVector ReqOpFeatures; + F.split(ReqOpFeatures, '|'); + bool HasFeature = false; + for (StringRef OF : ReqOpFeatures) { + if (TI.hasFeature(OF)) { + HasFeature = true; + continue; + } + } - // Error message - FeatureMissing = true; - Diag(TheCall->getBeginLoc(), diag::err_riscv_builtin_requires_extension) - << TheCall->getSourceRange() << StringRef(FeatureStr); + if (!HasFeature) { + std::string FeatureStrs = ""; + for (StringRef OF : ReqOpFeatures) { + // If the feature is 64bit, alter the string so it will print better in + // the diagnostic. + if (OF == "64bit") + OF = "RV64"; + + // Convert features like "zbr" and "experimental-zbr" to "Zbr". + OF.consume_front("experimental-"); + std::string FeatureStr = OF.str(); + FeatureStr[0] = std::toupper(FeatureStr[0]); + // Combine strings. + FeatureStrs += FeatureStrs == "" ? "" : ", "; + FeatureStrs += "'"; + FeatureStrs += FeatureStr; + FeatureStrs += "'"; + } + // Error message + FeatureMissing = true; + Diag(TheCall->getBeginLoc(), diag::err_riscv_builtin_requires_extension) + << TheCall->getSourceRange() << StringRef(FeatureStrs); + } } if (FeatureMissing) diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c index b831bfb9402b4..a544434105c8b 100644 --- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c +++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c @@ -2,5 +2,5 @@ // RUN: %clang_cc1 -triple riscv32 -target-feature +zbb -verify %s -o - int orc_b_64(int a) { - return __builtin_riscv_orc_b_64(a); // expected-error {{builtin requires 'RV64' extension support to be enabled}} + return __builtin_riscv_orc_b_64(a); // expected-error {{builtin requires at least one of the following extensions support to be enabled : 'RV64'}} } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c new file mode 100644 index 0000000000000..7de132e1ce6d0 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c @@ -0,0 +1,18 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64V %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64V %s +// RUN: not %clang_cc1 -triple riscv64 -emit-llvm-only %s 2>&1 | FileCheck %s --check-prefix=CHECK-RV64-ERR + +// CHECK-RV64V-LABEL: @test( +// CHECK-RV64V-NEXT: entry: +// CHECK-RV64V-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 1, i64 0, i64 0) +// CHECK-RV64V-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-RV64V-NEXT: ret i32 [[CONV]] +// + +// CHECK-RV64-ERR: error: builtin requires at least one of the following extensions support to be enabled : 'Zve32x', 'V' + +int test() { + return __builtin_rvv_vsetvli(1, 0, 0); +} diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index 837226f4e2a54..c063b766e4a65 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -1034,7 +1034,7 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) { OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n"; OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, " - "ATTRS, \"zve32x\")\n"; + "ATTRS, \"zve32x|v\")\n"; OS << "#endif\n"; for (auto &Def : Defs) { auto P = diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index c34817920e1bc..e6df48e5bb416 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -701,9 +701,11 @@ Error RISCVISAInfo::checkDependency() { bool HasE = Exts.count("e") == 1; bool HasD = Exts.count("d") == 1; bool HasF = Exts.count("f") == 1; - bool HasVector = Exts.count("zve32x") == 1; + bool HasZve32x = Exts.count("zve32x") == 1; bool HasZve32f = Exts.count("zve32f") == 1; bool HasZve64d = Exts.count("zve64d") == 1; + bool HasV = Exts.count("v") == 1; + bool HasVector = HasZve32x || HasV; bool HasZvl = MinVLen != 0; if (HasE && !IsRv32) @@ -736,6 +738,12 @@ Error RISCVISAInfo::checkDependency() { errc::invalid_argument, "zvl*b requires v or zve* extension to also be specified"); + // Could not implement Zve* extension and the V extension at the same time. + if (HasZve32x && HasV) + return createStringError( + errc::invalid_argument, + "It is illegal to specify the v extension with zve* extensions"); + // Additional dependency checks. // TODO: The 'q' extension requires rv64. // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'. @@ -743,7 +751,7 @@ Error RISCVISAInfo::checkDependency() { return Error::success(); } -static const char *ImpliedExtsV[] = {"zvl128b", "zve64d", "f", "d"}; +static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"}; static const char *ImpliedExtsZfh[] = {"zfhmin"}; static const char *ImpliedExtsZve64d[] = {"zve64f"}; static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"}; @@ -872,6 +880,11 @@ void RISCVISAInfo::updateMaxELen() { ExtName.getAsInteger(10, ZveELen); MaxELen = std::max(MaxELen, ZveELen); } + if (ExtName == "v") { + MaxELenFp = 64; + MaxELen = 64; + return; + } } } diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 72caa88104e85..cd8885f6a6e8d 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -306,21 +306,21 @@ def FeatureStdExtZve64d def FeatureStdExtV : SubtargetFeature<"v", "HasStdExtV", "true", "'V' (Vector Extension for Application Processors)", - [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>; + [FeatureStdExtZvl128b, FeatureStdExtF, FeatureStdExtD]>; def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">, AssemblerPredicate< - (any_of FeatureStdExtZve32x), + (any_of FeatureStdExtZve32x, FeatureStdExtV), "'V' (Vector Extension for Application Processors), 'Zve32x' or " "'Zve64x' (Vector Extensions for Embedded Processors)">; def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">, AssemblerPredicate< - (any_of FeatureStdExtZve64x), + (any_of FeatureStdExtZve64x, FeatureStdExtV), "'V' (Vector Extension for Application Processors) or 'Zve64x' " "(Vector Extensions for Embedded Processors)">; def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">, AssemblerPredicate< - (any_of FeatureStdExtZve32f), + (any_of FeatureStdExtZve32f, FeatureStdExtV), "'V' (Vector Extension for Application Processors), 'Zve32f', " "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">; diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index ac1df37345859..62b3d54350779 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -199,13 +199,19 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { } // Vector codegen related methods. - bool hasVInstructions() const { return HasStdExtZve32x; } - bool hasVInstructionsI64() const { return HasStdExtZve64x; } - bool hasVInstructionsF16() const { return HasStdExtZve32f && HasStdExtZfh; } + bool hasVInstructions() const { return HasStdExtV || HasStdExtZve32x; } + bool hasVInstructionsI64() const { return HasStdExtV || HasStdExtZve64x; } + bool hasVInstructionsF16() const { + return (HasStdExtV || HasStdExtZve32f) && HasStdExtZfh; + } // FIXME: Consider Zfinx in the future - bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; } + bool hasVInstructionsF32() const { + return HasStdExtV || (HasStdExtZve32f && HasStdExtF); + } // FIXME: Consider Zdinx in the future - bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; } + bool hasVInstructionsF64() const { + return HasStdExtV || (HasStdExtZve64d && HasStdExtD); + } // F16 and F64 both require F32. bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); } unsigned getMaxInterleaveFactor() const { diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 3f7ca36844cc5..86b384f6df6fc 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -80,8 +80,8 @@ ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93" ; RV32ZBS: .attribute 5, "rv32i2p0_zbs1p0" ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93" -; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0" ; RV32ZBKC: .attribute 5, "rv32i2p0_zbkc1p0" ; RV32ZKND: .attribute 5, "rv32i2p0_zknd1p0" @@ -112,8 +112,8 @@ ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93" ; RV64ZBS: .attribute 5, "rv64i2p0_zbs1p0" ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93" -; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0" ; RV64ZBKC: .attribute 5, "rv64i2p0_zbkc1p0" ; RV64ZKND: .attribute 5, "rv64i2p0_zknd1p0" diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 27dc70a7b6f75..d39566eb81cd9 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -34,43 +34,43 @@ # CHECK: attribute 5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0" .attribute arch, "rv32iv" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl32b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl64b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl128b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl256b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl512b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl1024b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl2048b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl4096b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0" .attribute arch, "rv32ivzvl8192b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" .attribute arch, "rv32ivzvl16384b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" .attribute arch, "rv32ivzvl32768b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0" .attribute arch, "rv32ivzvl65536b" -# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0" +# CHECK: attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0" .attribute arch, "rv32izve32x" # CHECK: attribute 5, "rv32i2p0_zve32x1p0_zvl32b1p0" From c5590396d041e77a84101cdcc4249788403e4e40 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Mon, 24 Jan 2022 15:23:28 +0800 Subject: [PATCH 331/946] [PowerPC] Emit warning for ieeelongdouble on older GNU toolchain GCC 12 should have proper support for IEEE-754 compliant 128-bit floating point in libstdc++. So warning is needed when linking against older libstdc++ versions or LLVM libc++. Glibc starts supporting float128 in both header and libraries since 2.32. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D112906 --- .../clang/Basic/DiagnosticDriverKinds.td | 3 + clang/lib/Driver/ToolChains/PPCLinux.cpp | 56 +++++++++++++++++++ clang/lib/Driver/ToolChains/PPCLinux.h | 7 ++- .../gcc/powerpc64le-linux-gnu/11.2.0/.keep | 0 clang/test/Driver/ppc-float-abi-warning.cpp | 13 +++++ 5 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0/lib/gcc/powerpc64le-linux-gnu/11.2.0/.keep create mode 100644 clang/test/Driver/ppc-float-abi-warning.cpp diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3ea32a8876c91..e635be6b6d1bc 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -380,6 +380,9 @@ def warn_drv_deprecated_arg : Warning< "argument '%0' is deprecated, use '%1' instead">, InGroup; def warn_drv_assuming_mfloat_abi_is : Warning< "unknown platform, assuming -mfloat-abi=%0">; +def warn_drv_unsupported_float_abi_by_lib : Warning< + "float ABI '%0' is not supported by current library">, + InGroup>; def warn_ignoring_ftabstop_value : Warning< "ignoring invalid -ftabstop value '%0', using default value %1">; def warn_drv_overriding_flag_option : Warning< diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp index af2e3a21a0af7..e5e1aa06f4b1d 100644 --- a/clang/lib/Driver/ToolChains/PPCLinux.cpp +++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp @@ -8,11 +8,50 @@ #include "PPCLinux.h" #include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" using namespace clang::driver::toolchains; using namespace llvm::opt; +using namespace llvm::sys; + +// Glibc older than 2.32 doesn't fully support IEEE float128. Here we check +// glibc version by looking at dynamic linker name. +static bool GlibcSupportsFloat128(const std::string &Linker) { + llvm::SmallVector Path; + + // Resolve potential symlinks to linker. + if (fs::real_path(Linker, Path)) + return false; + llvm::StringRef LinkerName = + path::filename(llvm::StringRef(Path.data(), Path.size())); + + // Since glibc 2.34, the installed .so file is not symlink anymore. But we can + // still safely assume it's newer than 2.32. + if (LinkerName.startswith("ld64.so")) + return true; + + if (!LinkerName.startswith("ld-2.")) + return false; + unsigned Minor = (LinkerName[5] - '0') * 10 + (LinkerName[6] - '0'); + if (Minor < 32) + return false; + + return true; +} + +PPCLinuxToolChain::PPCLinuxToolChain(const Driver &D, + const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) + : Linux(D, Triple, Args) { + if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { + StringRef ABIName = A->getValue(); + if (ABIName == "ieeelongdouble" && !SupportIEEEFloat128(D, Triple, Args)) + D.Diag(diag::warn_drv_unsupported_float_abi_by_lib) << ABIName; + } +} void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { @@ -26,3 +65,20 @@ void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, Linux::AddClangSystemIncludeArgs(DriverArgs, CC1Args); } + +bool PPCLinuxToolChain::SupportIEEEFloat128( + const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) const { + if (!Triple.isLittleEndian() || !Triple.isPPC64()) + return false; + + if (Args.hasArg(options::OPT_nostdlib, options::OPT_nostdlibxx)) + return true; + + bool HasUnsupportedCXXLib = + ToolChain::GetCXXStdlibType(Args) == CST_Libcxx && + GCCInstallation.getVersion().isOlderThan(12, 1, 0); + + return GlibcSupportsFloat128(Linux::getDynamicLinker(Args)) && + !(D.CCCIsCXX() && HasUnsupportedCXXLib); +} diff --git a/clang/lib/Driver/ToolChains/PPCLinux.h b/clang/lib/Driver/ToolChains/PPCLinux.h index b3ef7b61dc3aa..e0318ae8a3a2a 100644 --- a/clang/lib/Driver/ToolChains/PPCLinux.h +++ b/clang/lib/Driver/ToolChains/PPCLinux.h @@ -18,12 +18,15 @@ namespace toolchains { class LLVM_LIBRARY_VISIBILITY PPCLinuxToolChain : public Linux { public: PPCLinuxToolChain(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args) - : Linux(D, Triple, Args) {} + const llvm::opt::ArgList &Args); void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + +private: + bool SupportIEEEFloat128(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) const; }; } // end namespace toolchains diff --git a/clang/test/Driver/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0/lib/gcc/powerpc64le-linux-gnu/11.2.0/.keep b/clang/test/Driver/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0/lib/gcc/powerpc64le-linux-gnu/11.2.0/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/ppc-float-abi-warning.cpp b/clang/test/Driver/ppc-float-abi-warning.cpp new file mode 100644 index 0000000000000..3ccb9415a021d --- /dev/null +++ b/clang/test/Driver/ppc-float-abi-warning.cpp @@ -0,0 +1,13 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang -### --driver-mode=g++ -target powerpc64le-linux-gnu %s \ +// RUN: --gcc-toolchain=%S/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0 \ +// RUN: -mabi=ieeelongdouble -stdlib=libstdc++ 2>&1 | FileCheck %s +// RUN: %clang -### --driver-mode=g++ -target powerpc64le-linux-gnu %s \ +// RUN: -mabi=ieeelongdouble -stdlib=libc++ 2>&1 | FileCheck %s +// RUN: %clang -### --driver-mode=g++ -target powerpc64le-linux-gnu %s\ +// RUN: -mabi=ieeelongdouble -stdlib=libc++ -Wno-unsupported-abi 2>&1 | \ +// RUN: FileCheck %s --check-prefix=NOWARN + +// CHECK: warning: float ABI 'ieeelongdouble' is not supported by current library +// NOWARN-NOT: warning: float ABI 'ieeelongdouble' is not supported by current library +long double foo(long double x) { return x; } From ea2112ea15a0f43cb469b29e00cda3d7a48ae875 Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 24 Jan 2022 08:34:24 +0100 Subject: [PATCH 332/946] [clang-format] Remove unused assignment. NFC. Fixes scan-build reported warning: https://llvm.org/reports/scan-build/report-QualifierAlignmentFixer.cpp-analyzeRight-55-191910.html#EndPath. --- clang/lib/Format/QualifierAlignmentFixer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp index a53db5d11848d..b3a4684bead1a 100644 --- a/clang/lib/Format/QualifierAlignmentFixer.cpp +++ b/clang/lib/Format/QualifierAlignmentFixer.cpp @@ -261,10 +261,8 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight( // Move to the end of any template class members e.g. // `Foo::iterator`. if (Next && Next->startsSequence(TT_TemplateCloser, tok::coloncolon, - tok::identifier)) { - Next = Next->Next->Next; + tok::identifier)) return Tok; - } assert(Next && "Missing template opener"); Next = Next->Next; } From 3519dcfec22963fbb84e154cecc2df22e6c7724f Mon Sep 17 00:00:00 2001 From: Nimish Mishra Date: Mon, 24 Jan 2022 10:02:58 +0530 Subject: [PATCH 333/946] Added OpenMP 5.0 specification based semantic checks for atomic update construct --- flang/lib/Semantics/check-omp-structure.cpp | 154 +++++++++++++++++- flang/lib/Semantics/check-omp-structure.h | 6 + flang/test/Semantics/omp-atomic01.f90 | 48 +++++- flang/test/Semantics/omp-atomic02.f90 | 109 +++++++++++++ flang/test/Semantics/omp-atomic03.f90 | 93 +++++++++++ flang/test/Semantics/omp-atomic04.f90 | 168 ++++++++++++++++++++ flang/test/Semantics/omp-atomic05.f90 | 26 +++ 7 files changed, 600 insertions(+), 4 deletions(-) create mode 100644 flang/test/Semantics/omp-atomic02.f90 create mode 100644 flang/test/Semantics/omp-atomic03.f90 create mode 100644 flang/test/Semantics/omp-atomic04.f90 create mode 100644 flang/test/Semantics/omp-atomic05.f90 diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index df66a42dede52..802fdf650a07c 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1324,13 +1324,163 @@ void OmpStructureChecker::Leave(const parser::OmpEndBlockDirective &x) { } } +template +bool OmpStructureChecker::IsOperatorValid(const T &node, const D &variable) { + using AllowedBinaryOperators = + std::variant; + using BinaryOperators = std::variant; + + if constexpr (common::HasMember) { + const auto &variableName{variable.GetSource().ToString()}; + const auto &exprLeft{std::get<0>(node.t)}; + const auto &exprRight{std::get<1>(node.t)}; + if ((exprLeft.value().source.ToString() != variableName) && + (exprRight.value().source.ToString() != variableName)) { + context_.Say(variable.GetSource(), + "Atomic update variable '%s' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct"_err_en_US, + variableName); + } + return common::HasMember; + } + return true; +} + +void OmpStructureChecker::CheckAtomicUpdateAssignmentStmt( + const parser::AssignmentStmt &assignment) { + const auto &expr{std::get(assignment.t)}; + const auto &var{std::get(assignment.t)}; + std::visit( + common::visitors{ + [&](const common::Indirection &x) { + const auto &procedureDesignator{ + std::get(x.value().v.t)}; + const parser::Name *name{ + std::get_if(&procedureDesignator.u)}; + if (name && + !(name->source == "max" || name->source == "min" || + name->source == "iand" || name->source == "ior" || + name->source == "ieor")) { + context_.Say(expr.source, + "Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement"_err_en_US); + } else if (name) { + bool foundMatch{false}; + if (auto varDesignatorIndirection = + std::get_if>(&var.u)) { + const auto &varDesignator = varDesignatorIndirection->value(); + if (const auto *dataRef = std::get_if( + &varDesignator.u)) { + if (const auto *name = + std::get_if(&dataRef->u)) { + const auto &varSymbol = *name->symbol; + if (const auto *e{GetExpr(expr)}) { + for (const Symbol &symbol : + evaluate::CollectSymbols(*e)) { + if (symbol == varSymbol) { + foundMatch = true; + break; + } + } + } + } + } + } + if (!foundMatch) { + context_.Say(expr.source, + "Atomic update variable '%s' not found in the argument list of intrinsic procedure"_err_en_US, + var.GetSource().ToString()); + } + } + }, + [&](const auto &x) { + if (!IsOperatorValid(x, var)) { + context_.Say(expr.source, + "Invalid operator in OpenMP ATOMIC (UPDATE) statement"_err_en_US); + } + }, + }, + expr.u); +} + +void OmpStructureChecker::CheckAtomicMemoryOrderClause( + const parser::OmpAtomicClauseList &clauseList) { + int numMemoryOrderClause = 0; + for (const auto &clause : clauseList.v) { + if (std::get_if(&clause.u)) { + numMemoryOrderClause++; + if (numMemoryOrderClause > 1) { + context_.Say(clause.source, + "More than one memory order clause not allowed on OpenMP Atomic construct"_err_en_US); + return; + } + } + } +} + +void OmpStructureChecker::CheckAtomicMemoryOrderClause( + const parser::OmpAtomicClauseList &leftHandClauseList, + const parser::OmpAtomicClauseList &rightHandClauseList) { + int numMemoryOrderClause = 0; + for (const auto &clause : leftHandClauseList.v) { + if (std::get_if(&clause.u)) { + numMemoryOrderClause++; + if (numMemoryOrderClause > 1) { + context_.Say(clause.source, + "More than one memory order clause not allowed on OpenMP Atomic construct"_err_en_US); + return; + } + } + } + for (const auto &clause : rightHandClauseList.v) { + if (std::get_if(&clause.u)) { + numMemoryOrderClause++; + if (numMemoryOrderClause > 1) { + context_.Say(clause.source, + "More than one memory order clause not allowed on OpenMP Atomic construct"_err_en_US); + return; + } + } + } +} + void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) { std::visit( common::visitors{ - [&](const auto &someAtomicConstruct) { - const auto &dir{std::get(someAtomicConstruct.t)}; + [&](const parser::OmpAtomic &atomicConstruct) { + const auto &dir{std::get(atomicConstruct.t)}; + PushContextAndClauseSets( + dir.source, llvm::omp::Directive::OMPD_atomic); + CheckAtomicUpdateAssignmentStmt( + std::get>( + atomicConstruct.t) + .statement); + CheckAtomicMemoryOrderClause( + std::get(atomicConstruct.t)); + }, + [&](const parser::OmpAtomicUpdate &atomicConstruct) { + const auto &dir{std::get(atomicConstruct.t)}; + PushContextAndClauseSets( + dir.source, llvm::omp::Directive::OMPD_atomic); + CheckAtomicUpdateAssignmentStmt( + std::get>( + atomicConstruct.t) + .statement); + CheckAtomicMemoryOrderClause( + std::get<0>(atomicConstruct.t), std::get<2>(atomicConstruct.t)); + }, + [&](const auto &atomicConstruct) { + const auto &dir{std::get(atomicConstruct.t)}; PushContextAndClauseSets( dir.source, llvm::omp::Directive::OMPD_atomic); + CheckAtomicMemoryOrderClause( + std::get<0>(atomicConstruct.t), std::get<2>(atomicConstruct.t)); }, }, x.u); diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index bf98f360ed58b..88005ad4b64d5 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -229,6 +229,12 @@ class OmpStructureChecker void CheckLoopItrVariableIsInt(const parser::OpenMPLoopConstruct &x); void CheckDoWhile(const parser::OpenMPLoopConstruct &x); void CheckCycleConstraints(const parser::OpenMPLoopConstruct &x); + template bool IsOperatorValid(const T &, const D &); + void CheckAtomicMemoryOrderClause( + const parser::OmpAtomicClauseList &, const parser::OmpAtomicClauseList &); + void CheckAtomicMemoryOrderClause(const parser::OmpAtomicClauseList &); + void CheckAtomicUpdateAssignmentStmt(const parser::AssignmentStmt &); + void CheckAtomicConstructStructure(const parser::OpenMPAtomicConstruct &); void CheckDistLinear(const parser::OpenMPLoopConstruct &x); void CheckSIMDNest(const parser::OpenMPConstruct &x); void CheckTargetNest(const parser::OpenMPConstruct &x); diff --git a/flang/test/Semantics/omp-atomic01.f90 b/flang/test/Semantics/omp-atomic01.f90 index 9f9e26f9081f9..b668de202c17e 100644 --- a/flang/test/Semantics/omp-atomic01.f90 +++ b/flang/test/Semantics/omp-atomic01.f90 @@ -12,152 +12,184 @@ ! At most one memory-order-clause may appear on the construct. !READ + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the READ directive !$omp atomic seq_cst seq_cst read i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the READ directive !$omp atomic read seq_cst seq_cst i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the READ directive !$omp atomic seq_cst read seq_cst i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQUIRE clause can appear on the READ directive !$omp atomic acquire acquire read i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQUIRE clause can appear on the READ directive !$omp atomic read acquire acquire i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQUIRE clause can appear on the READ directive !$omp atomic acquire read acquire i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the READ directive !$omp atomic relaxed relaxed read i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the READ directive !$omp atomic read relaxed relaxed i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the READ directive !$omp atomic relaxed read relaxed i = j !UPDATE + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive !$omp atomic seq_cst seq_cst update i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive !$omp atomic update seq_cst seq_cst i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive !$omp atomic seq_cst update seq_cst i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the UPDATE directive !$omp atomic release release update i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the UPDATE directive !$omp atomic update release release i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the UPDATE directive !$omp atomic release update release i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the UPDATE directive !$omp atomic relaxed relaxed update i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the UPDATE directive !$omp atomic update relaxed relaxed i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the UPDATE directive !$omp atomic relaxed update relaxed i = j !CAPTURE + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive !$omp atomic seq_cst seq_cst capture i = j j = k !$omp end atomic - + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive !$omp atomic capture seq_cst seq_cst i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive !$omp atomic seq_cst capture seq_cst i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the CAPTURE directive !$omp atomic release release capture i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the CAPTURE directive !$omp atomic capture release release i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the CAPTURE directive !$omp atomic release capture release i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the CAPTURE directive !$omp atomic relaxed relaxed capture i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the CAPTURE directive !$omp atomic capture relaxed relaxed i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the CAPTURE directive !$omp atomic relaxed capture relaxed i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive !$omp atomic acq_rel acq_rel capture i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive !$omp atomic capture acq_rel acq_rel i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive !$omp atomic acq_rel capture acq_rel i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive !$omp atomic acquire acquire capture i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive !$omp atomic capture acquire acquire i = j j = k !$omp end atomic + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive !$omp atomic acquire capture acquire i = j @@ -165,43 +197,55 @@ !$omp end atomic !WRITE + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the WRITE directive !$omp atomic seq_cst seq_cst write i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the WRITE directive !$omp atomic write seq_cst seq_cst i = j + + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the WRITE directive !$omp atomic seq_cst write seq_cst i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the WRITE directive !$omp atomic release release write i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the WRITE directive !$omp atomic write release release i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the WRITE directive !$omp atomic release write release i = j - + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the WRITE directive !$omp atomic relaxed relaxed write i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the WRITE directive !$omp atomic write relaxed relaxed i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the WRITE directive !$omp atomic relaxed write relaxed i = j !No atomic-clause + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELAXED clause can appear on the ATOMIC directive !$omp atomic relaxed relaxed i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one SEQ_CST clause can appear on the ATOMIC directive !$omp atomic seq_cst seq_cst i = j + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct !ERROR: At most one RELEASE clause can appear on the ATOMIC directive !$omp atomic release release i = j diff --git a/flang/test/Semantics/omp-atomic02.f90 b/flang/test/Semantics/omp-atomic02.f90 new file mode 100644 index 0000000000000..ec04fda86afc5 --- /dev/null +++ b/flang/test/Semantics/omp-atomic02.f90 @@ -0,0 +1,109 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp + +! OpenMP Atomic construct +! section 2.17.7 +! operator is one of +, *, -, /, .AND., .OR., .EQV., or .NEQV + +program OmpAtomic + use omp_lib + CHARACTER c*3, d*3 + LOGICAL l, m, n + + a = 1 + b = 2 + c = 'foo' + d = 'bar' + m = .TRUE. + n = .FALSE. + !$omp parallel num_threads(4) + + !$omp atomic + a = a + (4*2) + !$omp atomic + a = a*(b + 1) + !$omp atomic + a = a - 3 + !$omp atomic + a = a/(b + 1) + !$omp atomic + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + a = a**4 + !$omp atomic + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + c = c//d + !$omp atomic + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .LT. b + !$omp atomic + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .LE. b + !$omp atomic + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .EQ. b + !$omp atomic + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .NE. b + !$omp atomic + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .GE. b + !$omp atomic + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .GT. b + !$omp atomic + m = m .AND. n + !$omp atomic + m = m .OR. n + !$omp atomic + m = m .EQV. n + !$omp atomic + m = m .NEQV. n + !$omp atomic update + a = a + (4*2) + !$omp atomic update + a = a*(b + 1) + !$omp atomic update + a = a - 3 + !$omp atomic update + a = a/(b + 1) + !$omp atomic update + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + a = a**4 + !$omp atomic update + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + c = c//d + !$omp atomic update + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .LT. b + !$omp atomic update + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .LE. b + !$omp atomic update + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .EQ. b + !$omp atomic update + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .GE. b + !$omp atomic update + !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement + l = a .GT. b + !$omp atomic update + m = m .AND. n + !$omp atomic update + m = m .OR. n + !$omp atomic update + m = m .EQV. n + !$omp atomic update + m = m .NEQV. n + !$omp end parallel +end program OmpAtomic diff --git a/flang/test/Semantics/omp-atomic03.f90 b/flang/test/Semantics/omp-atomic03.f90 new file mode 100644 index 0000000000000..4262416fa9930 --- /dev/null +++ b/flang/test/Semantics/omp-atomic03.f90 @@ -0,0 +1,93 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp + +! OpenMP Atomic construct +! section 2.17.7 +! Intrinsic procedure name is one of MAX, MIN, IAND, IOR, or IEOR. + +program OmpAtomic + use omp_lib + real x + integer :: y, z, a, b, c, d + x = 5.73 + y = 3 + z = 1 +!$omp atomic + y = IAND(y, 4) +!$omp atomic + y = IOR(y, 5) +!$omp atomic + y = IEOR(y, 6) +!$omp atomic + y = MAX(y, 7) +!$omp atomic + y = MIN(y, 8) + +!$omp atomic + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = IAND(y, 4) +!$omp atomic + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = IOR(y, 5) +!$omp atomic + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = IEOR(y, 6) +!$omp atomic + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = MAX(y, 7, b, c) +!$omp atomic + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = MIN(y, 8, a, d) + +!$omp atomic + !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement + y = FRACTION(x) +!$omp atomic + !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement + y = REAL(x) +!$omp atomic update + y = IAND(y, 4) +!$omp atomic update + y = IOR(y, 5) +!$omp atomic update + y = IEOR(y, 6) +!$omp atomic update + y = MAX(y, 7) +!$omp atomic update + y = MIN(y, 8) + +!$omp atomic update + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = IAND(y, 4) +!$omp atomic update + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = IOR(y, 5) +!$omp atomic update + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = IEOR(y, 6) +!$omp atomic update + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = MAX(y, 7) +!$omp atomic update + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = MIN(y, 8) + +!$omp atomic update + !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement + y = MOD(y, 9) +!$omp atomic update + !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement + x = ABS(x) +end program OmpAtomic + +subroutine conflicting_types() + type simple + integer :: z + end type + real x + integer :: y, z + type(simple) ::s + z = 1 + !$omp atomic + !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure + z = IAND(s%z, 4) +end subroutine diff --git a/flang/test/Semantics/omp-atomic04.f90 b/flang/test/Semantics/omp-atomic04.f90 new file mode 100644 index 0000000000000..15b832cd3bcdc --- /dev/null +++ b/flang/test/Semantics/omp-atomic04.f90 @@ -0,0 +1,168 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp + +! OpenMP Atomic construct +! section 2.17.7 +! Update assignment must be 'var = var op expr' or 'var = expr op var' + +program OmpAtomic + use omp_lib + real x + integer y + logical m, n, l + x = 5.73 + y = 3 + m = .TRUE. + n = .FALSE. +!$omp atomic + x = x + 1 +!$omp atomic + x = 1 + x +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y + 1 +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1 + y + +!$omp atomic + x = x - 1 +!$omp atomic + x = 1 - x +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y - 1 +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1 - y + +!$omp atomic + x = x*1 +!$omp atomic + x = 1*x +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y*1 +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1*y + +!$omp atomic + x = x/1 +!$omp atomic + x = 1/x +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y/1 +!$omp atomic + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1/y + +!$omp atomic + m = m .AND. n +!$omp atomic + m = n .AND. m +!$omp atomic + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .AND. l + +!$omp atomic + m = m .OR. n +!$omp atomic + m = n .OR. m +!$omp atomic + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .OR. l + +!$omp atomic + m = m .EQV. n +!$omp atomic + m = n .EQV. m +!$omp atomic + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .EQV. l + +!$omp atomic + m = m .NEQV. n +!$omp atomic + m = n .NEQV. m +!$omp atomic + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .NEQV. l + +!$omp atomic update + x = x + 1 +!$omp atomic update + x = 1 + x +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y + 1 +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1 + y + +!$omp atomic update + x = x - 1 +!$omp atomic update + x = 1 - x +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y - 1 +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1 - y + +!$omp atomic update + x = x*1 +!$omp atomic update + x = 1*x +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y*1 +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1*y + +!$omp atomic update + x = x/1 +!$omp atomic update + x = 1/x +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = y/1 +!$omp atomic update + !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + x = 1/y + +!$omp atomic update + m = m .AND. n +!$omp atomic update + m = n .AND. m +!$omp atomic update + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .AND. l + +!$omp atomic update + m = m .OR. n +!$omp atomic update + m = n .OR. m +!$omp atomic update + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .OR. l + +!$omp atomic update + m = m .EQV. n +!$omp atomic update + m = n .EQV. m +!$omp atomic update + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .EQV. l + +!$omp atomic update + m = m .NEQV. n +!$omp atomic update + m = n .NEQV. m +!$omp atomic update + !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct + m = n .NEQV. l + +end program OmpAtomic diff --git a/flang/test/Semantics/omp-atomic05.f90 b/flang/test/Semantics/omp-atomic05.f90 new file mode 100644 index 0000000000000..1ff13d6cd29ce --- /dev/null +++ b/flang/test/Semantics/omp-atomic05.f90 @@ -0,0 +1,26 @@ +! RUN: %python %S/test_errors.py %s %flang -fopenmp + +! This tests the various semantics related to the clauses of various OpenMP atomic constructs + +program OmpAtomic + use omp_lib + integer :: g, x + + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !$omp atomic relaxed, seq_cst + x = x + 1 + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !$omp atomic read seq_cst, relaxed + x = g + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !$omp atomic write relaxed, release + x = 2 * 4 + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !$omp atomic update release, seq_cst + x = 10 + !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct + !$omp atomic capture release, seq_cst + x = g + g = x * 10 + !$omp end atomic +end program OmpAtomic From 670a721de2a19d0307ceea47349fd9e986a8484f Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 24 Jan 2022 08:48:14 +0100 Subject: [PATCH 334/946] [clang-format] Assert Line->First. NFC. Cf. scan-build reports: * https://llvm.org/reports/scan-build/report-AffectedRangeManager.cpp-nonPPLineAffected-34-16c04b.html#EndPath * https://llvm.org/reports/scan-build/report-SortJavaScriptImports.cpp-parseModuleReferences-34-96a7f8.html#EndPath * https://llvm.org/reports/scan-build/report-TokenAnnotator.cpp-setCommentLineLevels-26-77bdba.html#EndPath * https://llvm.org/reports/scan-build/report-AffectedRangeManager.cpp-nonPPLineAffected-31-714434.html#EndPath * https://llvm.org/reports/scan-build/report-TokenAnnotator.cpp-setCommentLineLevels-16-bd39d0.html#EndPath * https://llvm.org/reports/scan-build/report-UnwrappedLineFormatter.cpp-format-90-668b2d.html#EndPath --- clang/lib/Format/AffectedRangeManager.cpp | 2 ++ clang/lib/Format/SortJavaScriptImports.cpp | 1 + clang/lib/Format/TokenAnnotator.cpp | 22 +++++++++++---------- clang/lib/Format/UnwrappedLineFormatter.cpp | 3 ++- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/clang/lib/Format/AffectedRangeManager.cpp b/clang/lib/Format/AffectedRangeManager.cpp index 3b735c4e68596..f69f65c5ddf1f 100644 --- a/clang/lib/Format/AffectedRangeManager.cpp +++ b/clang/lib/Format/AffectedRangeManager.cpp @@ -27,6 +27,7 @@ bool AffectedRangeManager::computeAffectedLines( const AnnotatedLine *PreviousLine = nullptr; while (I != E) { AnnotatedLine *Line = *I; + assert(Line->First); Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); // If a line is part of a preprocessor directive, it needs to be formatted @@ -113,6 +114,7 @@ bool AffectedRangeManager::nonPPLineAffected( // affected. bool SomeFirstChildAffected = false; + assert(Line->First); for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { // Determine whether 'Tok' was affected. if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index 21f0bdd7323d4..37e79bb15b58b 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -361,6 +361,7 @@ class JavaScriptImportSorter : public TokenAnalyzer { bool AnyImportAffected = false; bool FormattingOff = false; for (auto *Line : AnnotatedLines) { + assert(Line->First); Current = Line->First; LineEnd = Line->Last; // clang-format comments toggle formatting on/off. diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3ba81dfed38c2..cc8b48387fc9e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -2353,9 +2353,10 @@ class ExpressionParser { void TokenAnnotator::setCommentLineLevels( SmallVectorImpl &Lines) { const AnnotatedLine *NextNonCommentLine = nullptr; - for (AnnotatedLine *AL : llvm::reverse(Lines)) { + for (AnnotatedLine *Line : llvm::reverse(Lines)) { + assert(Line->First); bool CommentLine = true; - for (const FormatToken *Tok = AL->First; Tok; Tok = Tok->Next) { + for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { if (!Tok->is(tok::comment)) { CommentLine = false; break; @@ -2367,20 +2368,21 @@ void TokenAnnotator::setCommentLineLevels( if (NextNonCommentLine && CommentLine && NextNonCommentLine->First->NewlinesBefore <= 1 && NextNonCommentLine->First->OriginalColumn == - AL->First->OriginalColumn) { + Line->First->OriginalColumn) { // Align comments for preprocessor lines with the # in column 0 if // preprocessor lines are not indented. Otherwise, align with the next // line. - AL->Level = (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && - (NextNonCommentLine->Type == LT_PreprocessorDirective || - NextNonCommentLine->Type == LT_ImportStatement)) - ? 0 - : NextNonCommentLine->Level; + Line->Level = + (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && + (NextNonCommentLine->Type == LT_PreprocessorDirective || + NextNonCommentLine->Type == LT_ImportStatement)) + ? 0 + : NextNonCommentLine->Level; } else { - NextNonCommentLine = AL->First->isNot(tok::r_brace) ? AL : nullptr; + NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr; } - setCommentLineLevels(AL->Children); + setCommentLineLevels(Line->Children); } } diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 08d1eeb18a9d0..293a693fd4818 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -1213,6 +1213,7 @@ unsigned UnwrappedLineFormatter::format( Joiner.getNextMergedLine(DryRun, IndentTracker); Line; PrevPrevLine = PreviousLine, PreviousLine = Line, Line = NextLine, FirstLine = false) { + assert(Line->First); const AnnotatedLine &TheLine = *Line; unsigned Indent = IndentTracker.getIndent(); @@ -1240,7 +1241,7 @@ unsigned UnwrappedLineFormatter::format( if (ShouldFormat && TheLine.Type != LT_Invalid) { if (!DryRun) { - bool LastLine = Line->First->is(tok::eof); + bool LastLine = TheLine.First->is(tok::eof); formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, Indent, LastLine ? LastStartColumn : NextStartColumn + Indent); } From f533011252578f67a9615fb4ef56dc1ef555551b Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 24 Jan 2022 13:31:23 +0800 Subject: [PATCH 335/946] [Hexagon] Use llvm::Register instead of unsigned in HexagonConstExtenders.cpp. NFC. Reviewed By: kparzysz Differential Revision: https://reviews.llvm.org/D117851 --- llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index d3fcdb6ae9a85..d8af35cbf3a89 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -229,7 +229,7 @@ namespace { private: struct Register { Register() = default; - Register(unsigned R, unsigned S) : Reg(R), Sub(S) {} + Register(llvm::Register R, unsigned S) : Reg(R), Sub(S) {} Register(const MachineOperand &Op) : Reg(Op.getReg()), Sub(Op.getSubReg()) {} Register &operator=(const MachineOperand &Op) { @@ -1573,7 +1573,7 @@ HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) { // No compounds are available. It is not clear whether we should // even process such extenders where the initializer cannot be // a single instruction, but do it for now. - unsigned TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + llvm::Register TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); BuildMI(MBB, At, dl, HII->get(Hexagon::S2_asl_i_r), TmpR) .add(MachineOperand(Ex.Rs)) .addImm(Ex.S); From d6f8f56da04b2e975110c4a5ae15f00a22164e62 Mon Sep 17 00:00:00 2001 From: Lorenzo Chelini Date: Mon, 24 Jan 2022 09:07:20 +0100 Subject: [PATCH 336/946] [MLIR][Presburger] Silence -Wdangling-else warning (NFC) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc suggests explicit braces to avoid ambiguous ‘else’. --- mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp index 5d1cfb4c6e781..cf68c92650548 100644 --- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp +++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp @@ -636,8 +636,9 @@ static void checkDivisionRepresentation( // denominator for a division is zero, we ignore its dividend. EXPECT_TRUE(dividends.size() == expectedDividends.size()); for (unsigned i = 0, e = dividends.size(); i < e; ++i) - if (denominators[i] != 0) + if (denominators[i] != 0) { EXPECT_TRUE(expectedDividends[i] == dividends[i]); + } } TEST(IntegerPolyhedronTest, computeLocalReprSimple) { From 81793bd276afefea0e525307676181478fc614c9 Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Mon, 24 Jan 2022 09:28:11 +0100 Subject: [PATCH 337/946] [clang-format] Assert Line->First and State.NextToken->Previous. NFC. Cf. scan-build reports: * https://llvm.org/reports/scan-build/report-FormatToken.cpp-precomputeFormattingInfos-35-93e1e1.html#EndPath * https://llvm.org/reports/scan-build/report-ContinuationIndenter.cpp-addTokenOnCurrentLine-15-dfdc6d.html#EndPath --- clang/lib/Format/ContinuationIndenter.cpp | 9 +++++++-- clang/lib/Format/FormatToken.cpp | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 28f13c06e3088..b66584652bc82 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -543,13 +543,15 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, bool DryRun, unsigned ExtraSpaces) { const FormatToken &Current = *State.NextToken; + assert(State.NextToken->Previous); + const FormatToken &Previous = *State.NextToken->Previous; assert(!State.Stack.empty()); State.NoContinuation = false; if ((Current.is(TT_ImplicitStringLiteral) && - (Current.Previous->Tok.getIdentifierInfo() == nullptr || - Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == + (Previous.Tok.getIdentifierInfo() == nullptr || + Previous.Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_not_keyword))) { unsigned EndColumn = SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd()); @@ -579,7 +581,9 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, unsigned ExtraSpaces) { FormatToken &Current = *State.NextToken; + assert(State.NextToken->Previous); const FormatToken &Previous = *State.NextToken->Previous; + if (Current.is(tok::equal) && (State.Line->First->is(tok::kw_for) || Current.NestingLevel == 0) && State.Stack.back().VariablePos == 0) { @@ -775,6 +779,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, bool DryRun) { FormatToken &Current = *State.NextToken; + assert(State.NextToken->Previous); const FormatToken &Previous = *State.NextToken->Previous; // Extra penalty that needs to be added because of the way certain line diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index def5663d04498..59d6f29bb54d2 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -189,6 +189,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { bool HasSeparatingComment = false; for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { + assert(ItemBegin); // Skip comments on their own line. while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { ItemBegin = ItemBegin->Next; From 9aaa74aeeff37b85e29369287ee94773e699b8b7 Mon Sep 17 00:00:00 2001 From: "Chenbing.Zheng" Date: Mon, 24 Jan 2022 08:48:39 +0000 Subject: [PATCH 338/946] [RISCV] Add patterns of SET[U]LT_VI for STECC forms This patch optmizes "li a0, 5 vmsgt[u].vx v10, v8, a0" -> "vmsgt[u].vi v10, v8, 5" Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D118014 --- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 2 ++ .../RISCV/rvv/fixed-vectors-int-setcc.ll | 22 ++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 0ac959d79a024..7b556174bbf0c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -820,6 +820,8 @@ foreach vti = AllIntegerVectors in { defm : VPatIntegerSetCCVL_VI_Swappable; defm : VPatIntegerSetCCVL_VI_Swappable; defm : VPatIntegerSetCCVL_VI_Swappable; + defm : VPatIntegerSetCCVL_VI_Swappable; + defm : VPatIntegerSetCCVL_VI_Swappable; defm : VPatIntegerSetCCVL_VIPlus1; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll index 9d0fb925a46e2..403fc6d7f43ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll @@ -553,7 +553,7 @@ define void @setgt_vi_v64i8(<64 x i8>* %x, <64 x i1>* %z) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vx v12, v8, zero +; CHECK-NEXT: vmsgt.vi v12, v8, 0 ; CHECK-NEXT: vsm.v v12, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, <64 x i8>* %x @@ -564,6 +564,23 @@ define void @setgt_vi_v64i8(<64 x i8>* %x, <64 x i1>* %z) { ret void } +define void @setgt_vi_v64i8_nonzero(<64 x i8>* %x, <64 x i1>* %z) { +; CHECK-LABEL: setgt_vi_v64i8_nonzero: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmsgt.vi v12, v8, 5 +; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: ret + %a = load <64 x i8>, <64 x i8>* %x + %b = insertelement <64 x i8> undef, i8 5, i32 0 + %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer + %d = icmp sgt <64 x i8> %a, %c + store <64 x i1> %d, <64 x i1>* %z + ret void +} + define void @setlt_vi_v128i8(<128 x i8>* %x, <128 x i1>* %z) { ; CHECK-LABEL: setlt_vi_v128i8: ; CHECK: # %bb.0: @@ -619,8 +636,7 @@ define void @setugt_vi_v32i8(<32 x i8>* %x, <32 x i1>* %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, 5 -; CHECK-NEXT: vmsgtu.vx v10, v8, a0 +; CHECK-NEXT: vmsgtu.vi v10, v8, 5 ; CHECK-NEXT: vsm.v v10, (a1) ; CHECK-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x From ba845787b3fdd03380b8651d6ce11afeac9d6bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Tue, 4 Jan 2022 13:38:10 +0100 Subject: [PATCH 339/946] [clang][sema] Add missing diagnostic parameter The test case otherwise fails an assertion in Diagnostic::getArgKind(). Differential Revision: https://reviews.llvm.org/D116595 --- clang/lib/Sema/SemaModule.cpp | 2 +- clang/test/Modules/cxx20-export-import.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 clang/test/Modules/cxx20-export-import.cpp diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 996063f83e946..747734f2d0ff0 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -395,7 +395,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, // [module.interface]p1: // An export-declaration shall inhabit a namespace scope and appear in the // purview of a module interface unit. - Diag(ExportLoc, diag::err_export_not_in_module_interface); + Diag(ExportLoc, diag::err_export_not_in_module_interface) << 0; } return Import; diff --git a/clang/test/Modules/cxx20-export-import.cpp b/clang/test/Modules/cxx20-export-import.cpp new file mode 100644 index 0000000000000..a2620bd600649 --- /dev/null +++ b/clang/test/Modules/cxx20-export-import.cpp @@ -0,0 +1,3 @@ + +// RUN: %clang_cc1 -std=c++20 -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -I%S/Inputs -verify %s +export import dummy; // expected-error {{export declaration can only be used within a module interface unit after the module declaration}} From 3ad6de31c0cf5064867e6f9bf99e27e0b5c4128d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Fri, 21 Jan 2022 09:48:43 +0100 Subject: [PATCH 340/946] [clang][tests] Fix a c++/libc++ -stdlib value typo "c++" is not usually a valid value for -stdlib. Differential Revision: https://reviews.llvm.org/D117862 --- clang/test/Driver/wasm-toolchain.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/test/Driver/wasm-toolchain.cpp b/clang/test/Driver/wasm-toolchain.cpp index 18ebddc2093bf..df11324f2024b 100644 --- a/clang/test/Driver/wasm-toolchain.cpp +++ b/clang/test/Driver/wasm-toolchain.cpp @@ -14,35 +14,35 @@ // A basic C++ link command-line with unknown OS. -// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo --stdlib=c++ %s 2>&1 \ +// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo --stdlib=libc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK %s // LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ link command-line with optimization with unknown OS. -// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s --stdlib=c++ 2>&1 \ +// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT %s // LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ link command-line with known OS. -// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=c++ %s 2>&1 \ +// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN %s // LINK_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ link command-line with optimization with known OS. -// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s --stdlib=c++ 2>&1 \ +// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s --stdlib=libc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_KNOWN %s // LINK_OPT_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" // A basic C++ compile command-line with known OS. -// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=c++ %s 2>&1 \ +// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=COMPILE %s // COMPILE: clang{{.*}}" "-cc1" // COMPILE: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]" From d29e319263de17516f50cd46edbf1e62c1289dd4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 12:48:31 +0100 Subject: [PATCH 341/946] [OpaquePtrs] Add getNonOpaquePointerElementType() method (NFC) This method is intended for use in places that cannot be reached with opaque pointers, or part of deprecated methods. This makes it easier to see that some uses of getPointerElementType() don't need further action. Differential Revision: https://reviews.llvm.org/D117870 --- llvm/include/llvm/IR/Type.h | 9 +++++++ llvm/lib/AsmParser/LLParser.cpp | 12 ++++----- llvm/lib/IR/Core.cpp | 25 ++++++++----------- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 4 +-- llvm/lib/Transforms/Scalar/SROA.cpp | 6 +++-- 5 files changed, 32 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index c899c46d40554..98c97375ad7b2 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -366,7 +366,16 @@ class Type { return ContainedTys[0]; } + /// This method is deprecated without replacement. Pointer element types are + /// not available with opaque pointers. Type *getPointerElementType() const { + return getNonOpaquePointerElementType(); + } + + /// Only use this method in code that is not reachable with opaque pointers, + /// or part of deprecated methods that will be removed as part of the opaque + /// pointers transition. + Type *getNonOpaquePointerElementType() const { assert(getTypeID() == PointerTyID); assert(NumContainedTys && "Attempting to get element type of opaque pointer"); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 18c1c31e101dc..cec4ffd82f818 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1410,14 +1410,14 @@ static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy) { nullptr, GlobalVariable::NotThreadLocal, PTy->getAddressSpace()); - if (auto *FT = dyn_cast(PTy->getPointerElementType())) + Type *ElemTy = PTy->getNonOpaquePointerElementType(); + if (auto *FT = dyn_cast(ElemTy)) return Function::Create(FT, GlobalValue::ExternalWeakLinkage, PTy->getAddressSpace(), "", M); else - return new GlobalVariable(*M, PTy->getPointerElementType(), false, - GlobalValue::ExternalWeakLinkage, nullptr, "", - nullptr, GlobalVariable::NotThreadLocal, - PTy->getAddressSpace()); + return new GlobalVariable( + *M, ElemTy, false, GlobalValue::ExternalWeakLinkage, nullptr, "", + nullptr, GlobalVariable::NotThreadLocal, PTy->getAddressSpace()); } Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty, @@ -5602,7 +5602,7 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) { if (FRVI != ForwardRefVals.end()) { FwdFn = FRVI->second.first; if (!FwdFn->getType()->isOpaque()) { - if (!FwdFn->getType()->getPointerElementType()->isFunctionTy()) + if (!FwdFn->getType()->getNonOpaquePointerElementType()->isFunctionTy()) return error(FRVI->second.second, "invalid forward reference to " "function as global value!"); if (FwdFn->getType() != PFT) diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 9415e35a8f512..3f899471843fc 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -1691,8 +1691,7 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal, ArrayRef IdxList(unwrap(ConstantIndices, NumIndices), NumIndices); Constant *Val = unwrap(ConstantVal); - Type *Ty = - cast(Val->getType()->getScalarType())->getElementType(); + Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType(); return wrap(ConstantExpr::getGetElementPtr(Ty, Val, IdxList)); } @@ -1710,8 +1709,7 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal, ArrayRef IdxList(unwrap(ConstantIndices, NumIndices), NumIndices); Constant *Val = unwrap(ConstantVal); - Type *Ty = - cast(Val->getType()->getScalarType())->getElementType(); + Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType(); return wrap(ConstantExpr::getInBoundsGetElementPtr(Ty, Val, IdxList)); } @@ -2278,7 +2276,8 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) { LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name) { auto *PTy = cast(unwrap(Ty)); - return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + return wrap(GlobalAlias::create(PTy->getNonOpaquePointerElementType(), + PTy->getAddressSpace(), GlobalValue::ExternalLinkage, Name, unwrap(Aliasee), unwrap(M))); } @@ -3218,7 +3217,7 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, const char *Name) { Value *V = unwrap(Fn); FunctionType *FnT = - cast(cast(V->getType())->getElementType()); + cast(V->getType()->getNonOpaquePointerElementType()); return wrap( unwrap(B)->CreateInvoke(FnT, unwrap(Fn), unwrap(Then), unwrap(Catch), @@ -3590,7 +3589,8 @@ LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal, Value *V = unwrap(PointerVal); PointerType *Ty = cast(V->getType()); - return wrap(unwrap(B)->CreateLoad(Ty->getElementType(), V, Name)); + return wrap( + unwrap(B)->CreateLoad(Ty->getNonOpaquePointerElementType(), V, Name)); } LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef B, LLVMTypeRef Ty, @@ -3692,8 +3692,7 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer, const char *Name) { ArrayRef IdxList(unwrap(Indices), NumIndices); Value *Val = unwrap(Pointer); - Type *Ty = - cast(Val->getType()->getScalarType())->getElementType(); + Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType(); return wrap(unwrap(B)->CreateGEP(Ty, Val, IdxList, Name)); } @@ -3709,8 +3708,7 @@ LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer, const char *Name) { ArrayRef IdxList(unwrap(Indices), NumIndices); Value *Val = unwrap(Pointer); - Type *Ty = - cast(Val->getType()->getScalarType())->getElementType(); + Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType(); return wrap(unwrap(B)->CreateInBoundsGEP(Ty, Val, IdxList, Name)); } @@ -3725,8 +3723,7 @@ LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer, unsigned Idx, const char *Name) { Value *Val = unwrap(Pointer); - Type *Ty = - cast(Val->getType()->getScalarType())->getElementType(); + Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType(); return wrap(unwrap(B)->CreateStructGEP(Ty, Val, Idx, Name)); } @@ -3947,7 +3944,7 @@ LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn, const char *Name) { Value *V = unwrap(Fn); FunctionType *FnT = - cast(cast(V->getType())->getElementType()); + cast(V->getType()->getNonOpaquePointerElementType()); return wrap(unwrap(B)->CreateCall(FnT, unwrap(Fn), makeArrayRef(unwrap(Args), NumArgs), Name)); diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index a8123aee319ef..965a146c143fa 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -679,8 +679,8 @@ static void checkAsyncFuncPointer(const Instruction *I, Value *V) { if (AsyncFuncPtrAddr->getType()->isOpaquePointerTy()) return; - auto *StructTy = - cast(AsyncFuncPtrAddr->getType()->getPointerElementType()); + auto *StructTy = cast( + AsyncFuncPtrAddr->getType()->getNonOpaquePointerElementType()); if (StructTy->isOpaque() || !StructTy->isPacked() || StructTy->getNumElements() != 2 || !StructTy->getElementType(0)->isIntegerTy(32) || diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 1b0290cf5709d..2ed87ce6295b3 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1437,8 +1437,10 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, if (Indices.size() == 1 && cast(Indices.back())->isZero()) return BasePtr; - return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(), - BasePtr, Indices, NamePrefix + "sroa_idx"); + // buildGEP() is only called for non-opaque pointers. + return IRB.CreateInBoundsGEP( + BasePtr->getType()->getNonOpaquePointerElementType(), BasePtr, Indices, + NamePrefix + "sroa_idx"); } /// Get a natural GEP off of the BasePtr walking through Ty toward From 67346b43e0ed28047b3a4e40ea18d2218febcbf8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 12:01:18 +0100 Subject: [PATCH 342/946] [Attributor] Use MemoryLocation to get pointer operand and accessed type (NFCI) This relies on existing APIs and avoids accessing the pointer element type. The alternative would be to extend getPointerOperand() to also return the accessed type, but I figured going through MemoryLocation would be cleaner. Differential Revision: https://reviews.llvm.org/D117868 --- .../Transforms/IPO/AttributorAttributes.cpp | 75 +++++++------------ 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index d0e13dc269385..76420783b2d15 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -417,12 +417,10 @@ const Value *stripAndAccumulateMinimalOffsets( AttributorAnalysis); } -static const Value *getMinimalBaseOfAccessPointerOperand( - Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I, - int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) { - const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false); - if (!Ptr) - return nullptr; +static const Value * +getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA, + const Value *Ptr, int64_t &BytesOffset, + const DataLayout &DL, bool AllowNonInbounds = false) { APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); const Value *Base = stripAndAccumulateMinimalOffsets( A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds); @@ -431,18 +429,6 @@ static const Value *getMinimalBaseOfAccessPointerOperand( return Base; } -static const Value * -getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, - const DataLayout &DL, - bool AllowNonInbounds = false) { - const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false); - if (!Ptr) - return nullptr; - - return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, - AllowNonInbounds); -} - /// Clamp the information known for all returned values of a function /// (identified by \p QueryingAA) into \p S. template @@ -2151,31 +2137,26 @@ static int64_t getKnownNonNullAndDerefBytesForUse( return DerefAA.getKnownDereferenceableBytes(); } + Optional Loc = MemoryLocation::getOrNone(I); + if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile()) + return 0; + int64_t Offset; const Value *Base = - getMinimalBaseOfAccessPointerOperand(A, QueryingAA, I, Offset, DL); - if (Base) { - if (Base == &AssociatedValue && - getPointerOperand(I, /* AllowVolatile */ false) == UseV) { - int64_t DerefBytes = - (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset; - - IsNonNull |= !NullPointerIsDefined; - return std::max(int64_t(0), DerefBytes); - } + getMinimalBaseOfPointer(A, QueryingAA, Loc->Ptr, Offset, DL); + if (Base && Base == &AssociatedValue) { + int64_t DerefBytes = Loc->Size.getValue() + Offset; + IsNonNull |= !NullPointerIsDefined; + return std::max(int64_t(0), DerefBytes); } /// Corner case when an offset is 0. - Base = getBasePointerOfAccessPointerOperand(I, Offset, DL, - /*AllowNonInbounds*/ true); - if (Base) { - if (Offset == 0 && Base == &AssociatedValue && - getPointerOperand(I, /* AllowVolatile */ false) == UseV) { - int64_t DerefBytes = - (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); - IsNonNull |= !NullPointerIsDefined; - return std::max(int64_t(0), DerefBytes); - } + Base = GetPointerBaseWithConstantOffset(Loc->Ptr, Offset, DL, + /*AllowNonInbounds*/ true); + if (Base && Base == &AssociatedValue && Offset == 0) { + int64_t DerefBytes = Loc->Size.getValue(); + IsNonNull |= !NullPointerIsDefined; + return std::max(int64_t(0), DerefBytes); } return 0; @@ -4083,17 +4064,15 @@ struct AADereferenceableImpl : AADereferenceable { if (!UseV->getType()->isPointerTy()) return; - Type *PtrTy = UseV->getType(); - const DataLayout &DL = A.getDataLayout(); + Optional Loc = MemoryLocation::getOrNone(I); + if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile()) + return; + int64_t Offset; - if (const Value *Base = getBasePointerOfAccessPointerOperand( - I, Offset, DL, /*AllowNonInbounds*/ true)) { - if (Base == &getAssociatedValue() && - getPointerOperand(I, /* AllowVolatile */ false) == UseV) { - uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType()); - State.addAccessedBytes(Offset, Size); - } - } + const Value *Base = GetPointerBaseWithConstantOffset( + Loc->Ptr, Offset, A.getDataLayout(), /*AllowNonInbounds*/ true); + if (Base && Base == &getAssociatedValue()) + State.addAccessedBytes(Offset, Loc->Size.getValue()); } /// See followUsesInMBEC From 7ccacaf4428d1712029594184baa6f617a51c340 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Tue, 11 Jan 2022 13:16:03 +0000 Subject: [PATCH 343/946] [flang][examples] Add missing CMake dependencies Currently, everything that includes "flang/Parser/parse-tree.h" in Flang depends on the `gen_acc` and `gen_omp` CMake targets (these targets generate include files that are used in "parse-tree.h"). The examples in Flang do use this header file and hence also depend on `gen_acc`/`gen_omp`. This patch updates relevant CMake scripts accordingly. I've also taken the liberty to rename some of the example files so that their names follow LLVM's coding guidelines. Differential Revision: https://reviews.llvm.org/D117016 --- flang/examples/CMakeLists.txt | 2 +- flang/examples/FlangOmpReport/CMakeLists.txt | 9 +++++++++ .../FlangOmpReport.cpp} | 2 +- .../FlangOmpReportVisitor.cpp} | 2 +- .../FlangOmpReportVisitor.h} | 0 .../requirements.txt | 0 .../yaml_summarizer.py | 0 flang/examples/PrintFlangFunctionNames/CMakeLists.txt | 7 +++++-- flang/examples/flang-omp-report-plugin/CMakeLists.txt | 6 ------ 9 files changed, 17 insertions(+), 11 deletions(-) create mode 100644 flang/examples/FlangOmpReport/CMakeLists.txt rename flang/examples/{flang-omp-report-plugin/flang-omp-report.cpp => FlangOmpReport/FlangOmpReport.cpp} (98%) rename flang/examples/{flang-omp-report-plugin/flang-omp-report-visitor.cpp => FlangOmpReport/FlangOmpReportVisitor.cpp} (99%) rename flang/examples/{flang-omp-report-plugin/flang-omp-report-visitor.h => FlangOmpReport/FlangOmpReportVisitor.h} (100%) rename flang/examples/{flang-omp-report-plugin => FlangOmpReport}/requirements.txt (100%) rename flang/examples/{flang-omp-report-plugin => FlangOmpReport}/yaml_summarizer.py (100%) delete mode 100644 flang/examples/flang-omp-report-plugin/CMakeLists.txt diff --git a/flang/examples/CMakeLists.txt b/flang/examples/CMakeLists.txt index f9b656ce86fe1..b0a78eeaa4eaa 100644 --- a/flang/examples/CMakeLists.txt +++ b/flang/examples/CMakeLists.txt @@ -12,4 +12,4 @@ target_link_libraries(external-hello-world ) add_subdirectory(PrintFlangFunctionNames) -add_subdirectory(flang-omp-report-plugin) +add_subdirectory(FlangOmpReport) diff --git a/flang/examples/FlangOmpReport/CMakeLists.txt b/flang/examples/FlangOmpReport/CMakeLists.txt new file mode 100644 index 0000000000000..aebebe92d6b30 --- /dev/null +++ b/flang/examples/FlangOmpReport/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_library(flangOmpReport + MODULE + FlangOmpReport.cpp + FlangOmpReportVisitor.cpp + + DEPENDS + acc_gen + omp_gen +) diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp b/flang/examples/FlangOmpReport/FlangOmpReport.cpp similarity index 98% rename from flang/examples/flang-omp-report-plugin/flang-omp-report.cpp rename to flang/examples/FlangOmpReport/FlangOmpReport.cpp index 9ee8eb1a80cbd..0fa7582561d27 100644 --- a/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReport.cpp @@ -15,7 +15,7 @@ // //===----------------------------------------------------------------------===// -#include "flang-omp-report-visitor.h" +#include "FlangOmpReportVisitor.h" #include "flang/Frontend/CompilerInstance.h" #include "flang/Frontend/FrontendActions.h" diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp similarity index 99% rename from flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp rename to flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp index 32dcef25fedab..a0c3b194bfd31 100644 --- a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "flang-omp-report-visitor.h" +#include "FlangOmpReportVisitor.h" #include "llvm/ADT/StringExtras.h" namespace Fortran { diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h similarity index 100% rename from flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h rename to flang/examples/FlangOmpReport/FlangOmpReportVisitor.h diff --git a/flang/examples/flang-omp-report-plugin/requirements.txt b/flang/examples/FlangOmpReport/requirements.txt similarity index 100% rename from flang/examples/flang-omp-report-plugin/requirements.txt rename to flang/examples/FlangOmpReport/requirements.txt diff --git a/flang/examples/flang-omp-report-plugin/yaml_summarizer.py b/flang/examples/FlangOmpReport/yaml_summarizer.py similarity index 100% rename from flang/examples/flang-omp-report-plugin/yaml_summarizer.py rename to flang/examples/FlangOmpReport/yaml_summarizer.py diff --git a/flang/examples/PrintFlangFunctionNames/CMakeLists.txt b/flang/examples/PrintFlangFunctionNames/CMakeLists.txt index 6b107b4e1ea53..490f2ea895163 100644 --- a/flang/examples/PrintFlangFunctionNames/CMakeLists.txt +++ b/flang/examples/PrintFlangFunctionNames/CMakeLists.txt @@ -1,7 +1,10 @@ # TODO: Note that this is currently only available on Linux. # On Windows, we would also have to specify e.g. `PLUGIN_TOOL`. -add_llvm_library( - flangPrintFunctionNames +add_llvm_library(flangPrintFunctionNames MODULE PrintFlangFunctionNames.cpp + + DEPENDS + acc_gen + omp_gen ) diff --git a/flang/examples/flang-omp-report-plugin/CMakeLists.txt b/flang/examples/flang-omp-report-plugin/CMakeLists.txt deleted file mode 100644 index 993ba320d7570..0000000000000 --- a/flang/examples/flang-omp-report-plugin/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library( - flangOmpReport - MODULE - flang-omp-report.cpp - flang-omp-report-visitor.cpp -) From 4f8fdf78279f0cb298dc0dc215ee56b0342235ee Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Wed, 19 Jan 2022 14:40:22 +0000 Subject: [PATCH 344/946] [ISEL] Canonicalise constant splats to RHS. SelectionDAG::getNode() canonicalises constants to the RHS if the operation is commutative, but it doesn't do so for constant splat vectors. Doing this early helps making certain folds on vector types, simplifying the code required for target DAGCombines that are enabled before Type legalization. Somewhat to my surprise, DAGCombine doesn't seem to traverse the DAG in a post-order DFS, so at the time of doing some custom fold where the input is a MUL, DAGCombiner::visitMUL hasn't yet reordered the constant splat to the RHS. This patch leads to a few improvements, but also a few minor regressions, which I traced down to D46492. When I tried reverting this change to see if the changes were still necessary, I ran into some segfaults. Not sure if there is some latent bug there. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117794 --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 ++++---- ...-masked-merge-vector-variablemask-const.ll | 8 ++-- llvm/test/CodeGen/PowerPC/combine-fneg.ll | 8 ++-- .../CodeGen/PowerPC/repeated-fp-divisors.ll | 4 +- llvm/test/CodeGen/X86/dpbusd_const.ll | 17 +++---- llvm/test/CodeGen/X86/extractelement-fp.ll | 8 ++-- llvm/test/CodeGen/X86/fp-round.ll | 14 +++--- llvm/test/CodeGen/X86/fp128-cast.ll | 2 +- ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 16 +++---- llvm/test/CodeGen/X86/pr43509.ll | 8 ++-- ...-masked-merge-vector-variablemask-const.ll | 47 ++++++++++--------- 11 files changed, 75 insertions(+), 76 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 52c69feebaf7e..7ca6f9aa4cf0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5610,22 +5610,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N1.getOpcode() != ISD::DELETED_NODE && N2.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); - ConstantSDNode *N1C = dyn_cast(N1); - ConstantSDNode *N2C = dyn_cast(N2); - ConstantFPSDNode *N1CFP = dyn_cast(N1); - ConstantFPSDNode *N2CFP = dyn_cast(N2); - // Canonicalize constant to RHS if commutative. if (TLI->isCommutativeBinOp(Opcode)) { - if (N1C && !N2C) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } else if (N1CFP && !N2CFP) { - std::swap(N1CFP, N2CFP); + bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); + bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); + bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) std::swap(N1, N2); - } } + ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N2C = dyn_cast(N2); + switch (Opcode) { default: break; case ISD::TokenFactor: diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll index 2e385fdd6f25f..aa0b7e14afc56 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll @@ -126,7 +126,8 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> % define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b +; CHECK-NEXT: bic v0.16b, v2.16b, v1.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %n0 = xor <4 x i32> , %y ; %x %n1 = and <4 x i32> %n0, %mask @@ -152,8 +153,9 @@ define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: and v0.16b, v1.16b, v2.16b -; CHECK-NEXT: orn v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %notmask = xor <4 x i32> %mask, %n0 = xor <4 x i32> , %y ; %x diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll index 1124fbd22a0e5..771c05f184a04 100644 --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -13,10 +13,10 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) { ; CHECK-NEXT: xvredp 2, 0 ; CHECK-NEXT: xxswapd 1, 1 ; CHECK-NEXT: xxlor 3, 1, 1 -; CHECK-NEXT: xvmaddadp 3, 0, 2 -; CHECK-NEXT: xvnmsubadp 2, 2, 3 -; CHECK-NEXT: xvmaddadp 1, 0, 2 -; CHECK-NEXT: xvmsubadp 2, 2, 1 +; CHECK-NEXT: xvnmsubadp 3, 0, 2 +; CHECK-NEXT: xvmaddadp 2, 2, 3 +; CHECK-NEXT: xvnmsubadp 1, 0, 2 +; CHECK-NEXT: xvnmaddadp 2, 2, 1 ; CHECK-NEXT: xvmuldp 34, 34, 2 ; CHECK-NEXT: xvmuldp 35, 35, 2 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll index 2b93974263286..1185737fb0c96 100644 --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -36,9 +36,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) { ; CHECK-NEXT: lvx 4, 0, 3 ; CHECK-NEXT: xxspltw 0, 0, 0 ; CHECK-NEXT: xvresp 1, 0 -; CHECK-NEXT: xvnmsubasp 35, 0, 1 +; CHECK-NEXT: xvmaddasp 35, 0, 1 ; CHECK-NEXT: xvmulsp 0, 34, 36 -; CHECK-NEXT: xvmaddasp 1, 1, 35 +; CHECK-NEXT: xvnmsubasp 1, 1, 35 ; CHECK-NEXT: xvmulsp 34, 0, 1 ; CHECK-NEXT: blr %ins = insertelement <4 x float> undef, float %a, i32 0 diff --git a/llvm/test/CodeGen/X86/dpbusd_const.ll b/llvm/test/CodeGen/X86/dpbusd_const.ll index aa780fe3b94ad..b0ffb23c9ced3 100644 --- a/llvm/test/CodeGen/X86/dpbusd_const.ll +++ b/llvm/test/CodeGen/X86/dpbusd_const.ll @@ -68,8 +68,7 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) { ; AVXVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVXVNNI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0] -; AVXVNNI-NEXT: {vex} vpdpbusd %xmm0, %xmm2, %xmm1 +; AVXVNNI-NEXT: {vex} vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVXVNNI-NEXT: vmovd %xmm1, %eax ; AVXVNNI-NEXT: addl %edi, %eax ; AVXVNNI-NEXT: retq @@ -80,10 +79,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) { ; AVX512VNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0] -; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2 -; AVX512VNNI-NEXT: vmovd %xmm2, %eax +; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512VNNI-NEXT: vmovd %xmm1, %eax ; AVX512VNNI-NEXT: addl %edi, %eax ; AVX512VNNI-NEXT: vzeroupper ; AVX512VNNI-NEXT: retq @@ -92,10 +90,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) { ; AVX512VLVNNI: # %bb.0: # %entry ; AVX512VLVNNI-NEXT: vpmovdb %xmm0, %xmm0 ; AVX512VLVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VLVNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0] -; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLVNNI-NEXT: vpdpbusd %xmm0, %xmm1, %xmm2 -; AVX512VLVNNI-NEXT: vmovd %xmm2, %eax +; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VLVNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512VLVNNI-NEXT: vmovd %xmm1, %eax ; AVX512VLVNNI-NEXT: addl %edi, %eax ; AVX512VLVNNI-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll index 8ff73d5a7ffa8..c398fb8c74cb8 100644 --- a/llvm/test/CodeGen/X86/extractelement-fp.ll +++ b/llvm/test/CodeGen/X86/extractelement-fp.ll @@ -1070,7 +1070,7 @@ define float @round_v4f32(<4 x float> %x) nounwind { ; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; X64-NEXT: vandps %xmm1, %xmm0, %xmm1 ; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; X64-NEXT: vorps %xmm1, %xmm2, %xmm1 +; X64-NEXT: vorps %xmm2, %xmm1, %xmm1 ; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq @@ -1081,7 +1081,7 @@ define float @round_v4f32(<4 x float> %x) nounwind { ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; X86-NEXT: vandps %xmm1, %xmm0, %xmm1 ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; X86-NEXT: vorps %xmm1, %xmm2, %xmm1 +; X86-NEXT: vorps %xmm2, %xmm1, %xmm1 ; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) @@ -1099,7 +1099,7 @@ define double @round_v4f64(<4 x double> %x) nounwind { ; X64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; X64-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X64-NEXT: # xmm2 = mem[0,0] -; X64-NEXT: vorpd %xmm1, %xmm2, %xmm1 +; X64-NEXT: vorpd %xmm2, %xmm1, %xmm1 ; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper @@ -1114,7 +1114,7 @@ define double @round_v4f64(<4 x double> %x) nounwind { ; X86-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 ; X86-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X86-NEXT: # xmm2 = mem[0,0] -; X86-NEXT: vorpd %xmm1, %xmm2, %xmm1 +; X86-NEXT: vorpd %xmm2, %xmm1, %xmm1 ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll index 7798ab682d41e..955501544ff55 100644 --- a/llvm/test/CodeGen/X86/fp-round.ll +++ b/llvm/test/CodeGen/X86/fp-round.ll @@ -41,7 +41,7 @@ define half @round_f16(half %h) { ; AVX1-NEXT: callq ___extendhfsf2 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: callq ___truncsfhf2 @@ -94,7 +94,7 @@ define float @round_f32(float %x) { ; AVX1: ## %bb.0: ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -130,7 +130,7 @@ define double @round_f64(double %x) { ; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; AVX1-NEXT: ## xmm2 = mem[0,0] -; AVX1-NEXT: vorpd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vorpd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -521,11 +521,11 @@ define <16 x float> @round_v16f32(<16 x float> %x) { ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm3 ; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; AVX1-NEXT: vorps %ymm3, %ymm4, %ymm3 +; AVX1-NEXT: vorps %ymm4, %ymm3, %ymm3 ; AVX1-NEXT: vaddps %ymm3, %ymm0, %ymm0 ; AVX1-NEXT: vroundps $11, %ymm0, %ymm0 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm2 -; AVX1-NEXT: vorps %ymm2, %ymm4, %ymm2 +; AVX1-NEXT: vorps %ymm4, %ymm2, %ymm2 ; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vroundps $11, %ymm1, %ymm1 ; AVX1-NEXT: retq @@ -620,11 +620,11 @@ define <8 x double> @round_v8f64(<8 x double> %x) { ; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm3 ; AVX1-NEXT: vmovapd {{.*#+}} ymm4 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1] -; AVX1-NEXT: vorpd %ymm3, %ymm4, %ymm3 +; AVX1-NEXT: vorpd %ymm4, %ymm3, %ymm3 ; AVX1-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 ; AVX1-NEXT: vandpd %ymm2, %ymm1, %ymm2 -; AVX1-NEXT: vorpd %ymm2, %ymm4, %ymm2 +; AVX1-NEXT: vorpd %ymm4, %ymm2, %ymm2 ; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vroundpd $11, %ymm1, %ymm1 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index 500cb0c677ff5..530ae967e1c07 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1326,7 +1326,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf] ; X64-AVX-NEXT: # xmm1 = mem[0,0] -; X64-AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: callq __extenddftf2@PLT ; X64-AVX-NEXT: addq $8, %rsp ; X64-AVX-NEXT: .LBB26_2: # %cleanup diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll index 8a452ddc06b62..54d74c3c86d8e 100644 --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -465,9 +465,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE2-NEXT: pand %xmm1, %xmm0 @@ -491,9 +491,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE2-NEXT: pand %xmm1, %xmm0 @@ -611,9 +611,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE2-NEXT: pand %xmm1, %xmm0 @@ -637,9 +637,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE2-NEXT: pand %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr43509.ll b/llvm/test/CodeGen/X86/pr43509.ll index e2c3affd952bb..87ddad03e9c45 100644 --- a/llvm/test/CodeGen/X86/pr43509.ll +++ b/llvm/test/CodeGen/X86/pr43509.ll @@ -4,12 +4,10 @@ define <8 x i8> @foo(<8 x float> %arg) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 -; CHECK-NEXT: vpmovm2b %k0, %xmm1 -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpltps %ymm2, %ymm0, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} ; CHECK-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq bb: diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll index 2d6c4dc829ed9..705d655939c30 100644 --- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -336,23 +336,26 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32 ; CHECK-SSE1-LABEL: in_constant_mone_vary: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 +; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 -; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 +; CHECK-SSE1-NEXT: andnps (%rcx), %xmm1 +; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0 -; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 +; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1 +; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 +; CHECK-SSE2-NEXT: andnps (%rdx), %xmm0 +; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 -; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 +; CHECK-XOP-NEXT: vandnps (%rdx), %xmm0, %xmm1 +; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16 @@ -408,30 +411,32 @@ define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 -; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] -; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 -; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 -; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) +; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 +; CHECK-SSE1-NEXT: movaps (%rcx), %xmm1 +; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE1-NEXT: movaps %xmm0, %xmm2 +; CHECK-SSE1-NEXT: andnps %xmm1, %xmm2 +; CHECK-SSE1-NEXT: xorps %xmm0, %xmm2 +; CHECK-SSE1-NEXT: movaps %xmm2, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 -; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 -; CHECK-SSE2-NEXT: pand (%rsi), %xmm0 -; CHECK-SSE2-NEXT: por %xmm1, %xmm0 +; CHECK-SSE2-NEXT: movdqa (%rsi), %xmm1 +; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-SSE2-NEXT: pxor (%rdx), %xmm2 +; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 +; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0 -; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 +; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm1 +; CHECK-XOP-NEXT: vpandn %xmm1, %xmm0, %xmm1 +; CHECK-XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16 From e7c9a6cae09d99388d8384ca7c0fb5b24b353975 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 17 Jan 2022 15:48:01 +0100 Subject: [PATCH 345/946] [SDAG] Don't move DBG_VALUE instructions after insertion point during scheduling (PR53243) EmitSchedule() shouldn't be touching instructions after the provided insertion point. The change introduced in D83561 performs a scan to the end of the block, and thus may move unrelated instructions. In particular, this ends up moving instructions that have been produced by FastISel and will later be deleted. Moving them means that more instructions than intended are removed. Fix this by stopping the iteration when the insertion point is reached. Fixes https://github.com/llvm/llvm-project/issues/53243. Differential Revision: https://reviews.llvm.org/D117489 --- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 7 ++-- .../CodeGen/X86/pr53243-tail-call-fastisel.ll | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index bec240d6c4d49..403f345738998 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -1057,12 +1057,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { "first terminator cannot be a debug value"); for (MachineInstr &MI : make_early_inc_range( make_range(std::next(FirstTerm), InsertBB->end()))) { + // Only scan up to insertion point. + if (&MI == InsertPos) + break; + if (!MI.isDebugValue()) continue; - if (&MI == InsertPos) - InsertPos = std::prev(InsertPos->getIterator()); - // The DBG_VALUE was referencing a value produced by a terminator. By // moving the DBG_VALUE, the referenced value also needs invalidating. MI.getOperand(0).ChangeToRegister(0, false); diff --git a/llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll b/llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll new file mode 100644 index 0000000000000..333eff8fb0081 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -fast-isel -mtriple=x86_64-- < %s | FileCheck %s + +define void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp set_state@PLT # TAILCALL + tail call void @set_state() + call void @llvm.dbg.value(metadata i64 0, metadata !10, metadata !DIExpression()), !dbg !16 + ret void +} + +declare void @set_state() + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nofree nosync nounwind readnone speculatable willreturn } + +!llvm.module.flags = !{!0} +!llvm.dbg.cu = !{!1} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !2, producer: "clang LLVM (rustc version 1.60.0-nightly (ec4bcaac4 2022-01-15))", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3) +!2 = !DIFile(filename: "src/lib.rs/@/bug.63e521cd-cgu.0", directory: "/tmp/rust-bug") +!3 = !{!4} +!4 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Option", file: !5, baseType: !6, size: 8, align: 8, flags: DIFlagEnumClass, elements: !7) +!5 = !DIFile(filename: "", directory: "") +!6 = !DIBasicType(name: "u8", size: 8, encoding: DW_ATE_unsigned) +!7 = !{!8, !9} +!8 = !DIEnumerator(name: "None", value: 0) +!9 = !DIEnumerator(name: "Some", value: 1) +!10 = !DILocalVariable(name: "msg", arg: 2, scope: !11, file: !12, line: 689, type: !6) +!11 = distinct !DISubprogram(name: "expect<()>", linkageName: "_ZN4core6option15Option$LT$T$GT$6expect17h9a574c18f194c213E", scope: !4, file: !12, line: 689, type: !13, scopeLine: 689, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !15, retainedNodes: !15) +!12 = !DIFile(filename: "/rustc/ec4bcaac450279b029f3480b8b8f1b82ab36a5eb/library/core/src/option.rs", directory: "", checksumkind: CSK_MD5, checksum: "4120c8557937a0772190a676ec193800") +!13 = !DISubroutineType(types: !14) +!14 = !{null, !4} +!15 = !{} +!16 = !DILocation(line: 0, scope: !11) From 0d1308a7b77c9ed87386c22a728a6c97e2fb4887 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 21 Jan 2022 10:32:21 +0100 Subject: [PATCH 346/946] [AArch64][GlobalISel] Support returned argument with multiple registers The call lowering code assumed that a returned argument could only consist of one register. Pass an ArrayRef instead of Register to make sure that all parts get assigned. Fixes https://github.com/llvm/llvm-project/issues/53315. Differential Revision: https://reviews.llvm.org/D117866 --- .../llvm/CodeGen/GlobalISel/CallLowering.h | 14 +++++------ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 11 ++++---- .../AArch64/GISel/AArch64CallLowering.cpp | 2 +- .../CodeGen/AArch64/pr53315-returned-i128.ll | 25 +++++++++++++++++++ 4 files changed, 39 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 82c125993ec3d..3a4b3ee18e1bc 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -388,12 +388,12 @@ class CallLowering { /// \p Handler to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. - bool determineAndHandleAssignments(ValueHandler &Handler, - ValueAssigner &Assigner, - SmallVectorImpl &Args, - MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, bool IsVarArg, - Register ThisReturnReg = Register()) const; + bool + determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner, + SmallVectorImpl &Args, + MachineIRBuilder &MIRBuilder, + CallingConv::ID CallConv, bool IsVarArg, + ArrayRef ThisReturnRegs = None) const; /// Use \p Handler to insert code to handle the argument/return values /// represented by \p Args. It's expected determineAssignments previously @@ -402,7 +402,7 @@ class CallLowering { CCState &CCState, SmallVectorImpl &ArgLocs, MachineIRBuilder &MIRBuilder, - Register ThisReturnReg = Register()) const; + ArrayRef ThisReturnRegs = None) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index d061664e8c5d1..486eff4dc7100 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -509,7 +509,8 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, bool CallLowering::determineAndHandleAssignments( ValueHandler &Handler, ValueAssigner &Assigner, SmallVectorImpl &Args, MachineIRBuilder &MIRBuilder, - CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg) const { + CallingConv::ID CallConv, bool IsVarArg, + ArrayRef ThisReturnRegs) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); SmallVector ArgLocs; @@ -519,7 +520,7 @@ bool CallLowering::determineAndHandleAssignments( return false; return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder, - ThisReturnReg); + ThisReturnRegs); } static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) { @@ -596,7 +597,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, CCState &CCInfo, SmallVectorImpl &ArgLocs, MachineIRBuilder &MIRBuilder, - Register ThisReturnReg) const { + ArrayRef ThisReturnRegs) const { MachineFunction &MF = MIRBuilder.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); @@ -740,10 +741,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, assert(!VA.needsCustom() && "custom loc should have been handled already"); - if (i == 0 && ThisReturnReg.isValid() && + if (i == 0 && !ThisReturnRegs.empty() && Handler.isIncomingArgumentHandler() && isTypeIsValidForThisReturn(ValVT)) { - Handler.assignValueToReg(Args[i].Regs[i], ThisReturnReg, VA); + Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA); continue; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index ac08ee8ae8dd0..677e7a6684d56 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1179,7 +1179,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!determineAndHandleAssignments( UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs, MIRBuilder, Info.CallConv, Info.IsVarArg, - UsingReturnedArg ? OutArgs[0].Regs[0] : Register())) + UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None)) return false; } diff --git a/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll b/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll new file mode 100644 index 0000000000000..0418720231288 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +define void @test() nounwind { +; CHECK-LABEL: test: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: bl returns_arg +; CHECK-NEXT: ldr x1, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: bl accepts_arg +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %x = call i128 @returns_arg(i128 0) + call void @accepts_arg(i128 %x) + ret void +} + +declare i128 @returns_arg(i128 returned) +declare void @accepts_arg(i128) From a08447d0de5d85d915318ffd9ef3404363d02c64 Mon Sep 17 00:00:00 2001 From: Peter Smith Date: Fri, 21 Jan 2022 16:19:22 +0000 Subject: [PATCH 347/946] [LLD][ELF][AArch64] Update test with incorrect REQUIRES line [NFC] D54759 introduced aarch64-combined-dynrel.s and aarch64-combined-dynrel-ifunc.s . Unfortunately the requires line at the top was AArch64 instead of aarch64 which means they were never run. Update the tests to use aarch64 and fix to match current lld output. Differential Revision: https://reviews.llvm.org/D117896 --- lld/test/ELF/aarch64-combined-dynrel-ifunc.s | 5 +++-- lld/test/ELF/aarch64-combined-dynrel.s | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lld/test/ELF/aarch64-combined-dynrel-ifunc.s b/lld/test/ELF/aarch64-combined-dynrel-ifunc.s index 5e84b03d7b2e7..fa0f10eb8941b 100644 --- a/lld/test/ELF/aarch64-combined-dynrel-ifunc.s +++ b/lld/test/ELF/aarch64-combined-dynrel-ifunc.s @@ -1,4 +1,4 @@ -// REQUIRES: AArch64 +// REQUIRES: aarch64 // RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %p/Inputs/shared.s -o %t-lib.o // RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t.o // RUN: ld.lld %t-lib.o --shared -o %t.so @@ -42,10 +42,11 @@ main: // CHECK-NEXT: Type: SHT_RELA // CHECK-NEXT: Flags [ // CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_INFO_LINK // CHECK-NEXT: ] // CHECK-NEXT: Address: // CHECK-NEXT: Offset: // CHECK-NEXT: Size: 72 // CHECK: 0x0000000000000008 RELASZ 72 -// CHECK: 0x0000000000000002 PLTRELSZ 48 +// CHECK: 0x0000000000000002 PLTRELSZ 24 diff --git a/lld/test/ELF/aarch64-combined-dynrel.s b/lld/test/ELF/aarch64-combined-dynrel.s index 438c2509906e6..d9c88054d3abd 100644 --- a/lld/test/ELF/aarch64-combined-dynrel.s +++ b/lld/test/ELF/aarch64-combined-dynrel.s @@ -1,4 +1,4 @@ -// REQUIRES: AArch64 +// REQUIRES: aarch64 // RUN: llvm-mc --triple=aarch64-linux-gnu -filetype=obj -o %t.o %s // RUN: echo "SECTIONS { \ // RUN: .text : { *(.text) } \ @@ -32,6 +32,7 @@ _start: // CHECK-NEXT: Type: SHT_RELA // CHECK-NEXT: Flags [ // CHECK-NEXT: SHF_ALLOC +// CHECK-NEXT: SHF_INFO_LINK // CHECK-NEXT: ] // CHECK-NEXT: Address: // CHECK-NEXT: Offset: From 906ebd5830e6053b50c52bf098e3586b567e8499 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 21 Jan 2022 17:03:03 +0000 Subject: [PATCH 348/946] [AMDGPU][GlobalISel] Regenerate checks in inst-select-*ext.mir --- .../AMDGPU/GlobalISel/inst-select-anyext.mir | 56 ++++++++++--------- .../AMDGPU/GlobalISel/inst-select-sext.mir | 36 ++++++------ .../AMDGPU/GlobalISel/inst-select-zext.mir | 36 ++++++------ 3 files changed, 66 insertions(+), 62 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir index dcad0a85e8e0e..3ae9735d11b90 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -13,7 +13,7 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0 = COPY [[COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -22,7 +22,7 @@ body: | ... --- -name: anyext_sgpr_s32_to_sgpr_s64 +name: anyext_sgpr_s32_to_sgpr_s64 legalized: true regBankSelected: true tracksRegLiveness: true @@ -32,10 +32,11 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s32_to_sgpr_s64 ; GCN: liveins: $sgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s64) = G_ANYEXT %0 S_ENDPGM 0, implicit %1 @@ -43,7 +44,7 @@ body: | ... --- -name: anyext_sgpr_s16_to_sgpr_s64 +name: anyext_sgpr_s16_to_sgpr_s64 legalized: true regBankSelected: true tracksRegLiveness: true @@ -53,10 +54,11 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s64 ; GCN: liveins: $sgpr0 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_ANYEXT %1 @@ -65,7 +67,7 @@ body: | ... --- -name: anyext_vgpr_s32_to_vgpr_s64 +name: anyext_vgpr_s32_to_vgpr_s64 legalized: true regBankSelected: true tracksRegLiveness: true @@ -75,10 +77,11 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s32_to_vgpr_s64 ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s64) = G_ANYEXT %0 S_ENDPGM 0, implicit %1 @@ -86,7 +89,7 @@ body: | ... --- -name: anyext_vgpr_s16_to_vgpr_s64 +name: anyext_vgpr_s16_to_vgpr_s64 legalized: true regBankSelected: true tracksRegLiveness: true @@ -96,10 +99,11 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s64 ; GCN: liveins: $vgpr0 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s64) = G_ANYEXT %1 @@ -134,7 +138,7 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0 = COPY [[COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -152,7 +156,7 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s32 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: $vgpr0 = COPY [[COPY]] + ; GCN-NEXT: $vgpr0 = COPY [[COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -170,7 +174,7 @@ body: | ; GCN-LABEL: name: anyext_sgpr_s1_to_vgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0 = COPY [[COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 @@ -188,7 +192,7 @@ body: | ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s32 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: $vgpr0 = COPY [[COPY]] + ; GCN-NEXT: $vgpr0 = COPY [[COPY]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_ANYEXT %1 @@ -209,7 +213,7 @@ body: | ; GCN-LABEL: name: anyext_regclass_sgpr_s1_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: $sgpr0 = COPY [[COPY]] + ; GCN-NEXT: $sgpr0 = COPY [[COPY]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ANYEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir index d642dc0ce6124..ef7be3f7cd4a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -12,8 +12,8 @@ body: | ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 @@ -31,10 +31,10 @@ body: | ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s64 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_SEXT %1 @@ -52,8 +52,8 @@ body: | ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] - ; GCN: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]] + ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 @@ -72,10 +72,10 @@ body: | ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s64 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_SEXT %1 @@ -109,8 +109,8 @@ body: | ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s32 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec - ; GCN: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_SEXT %1 @@ -128,8 +128,8 @@ body: | ; GCN-LABEL: name: sext_vgpr_s16_to_vgpr_s32 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN: $vgpr0 = COPY [[V_BFE_I32_e64_]] + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_SEXT %1 @@ -148,8 +148,8 @@ body: | ; GCN-LABEL: name: sext_sgpr_reg_class_s1_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc - ; GCN: $sgpr0 = COPY [[S_BFE_I32_]] + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_SEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir index 50b3306e92ed5..c8d8468689762 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -12,8 +12,8 @@ body: | ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc - ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 @@ -31,10 +31,10 @@ body: | ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s64 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc - ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s1) = G_TRUNC %0 %2:sgpr(s64) = G_ZEXT %1 @@ -52,8 +52,8 @@ body: | ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc - ; GCN: $sgpr0 = COPY [[S_BFE_U32_]] + ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 @@ -72,10 +72,10 @@ body: | ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s64 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 - ; GCN: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc - ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_ZEXT %1 @@ -109,8 +109,8 @@ body: | ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s32 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec - ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]] + ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e32_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s1) = G_TRUNC %0 %2:vgpr(s32) = G_ZEXT %1 @@ -128,8 +128,8 @@ body: | ; GCN-LABEL: name: zext_vgpr_s16_to_vgpr_s32 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec - ; GCN: $vgpr0 = COPY [[V_BFE_U32_e64_]] + ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s32) = G_ZEXT %1 @@ -148,8 +148,8 @@ body: | ; GCN-LABEL: name: zext_sgpr_reg_class_s1_to_sgpr_s32 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc - ; GCN: $sgpr0 = COPY [[S_AND_B32_]] + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sreg_32(s1) = G_TRUNC %0 %2:sgpr(s32) = G_ZEXT %1 From aa50b93e7cf926dec5dd69920e6f48906ea8ad25 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 21 Jan 2022 17:18:03 +0000 Subject: [PATCH 349/946] [AMDGPU][GlobalISel] Add more sign/zero/any-extension tests Add s1 to s16 cases, and for sgprs s1 to s64 and s32 to s64. --- .../AMDGPU/GlobalISel/inst-select-anyext.mir | 60 ++++++++++++++++++ .../AMDGPU/GlobalISel/inst-select-sext.mir | 63 +++++++++++++++++++ .../AMDGPU/GlobalISel/inst-select-zext.mir | 63 +++++++++++++++++++ 3 files changed, 186 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir index 3ae9735d11b90..cde4df501dfe1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -129,6 +129,26 @@ body: | --- +name: anyext_sgpr_s1_to_sgpr_s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s16 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s16) = G_ANYEXT %1 + %3:sgpr(s32) = G_ZEXT %2 + $sgpr0 = COPY %3 +... + +--- + name: anyext_sgpr_s1_to_sgpr_s32 legalized: true regBankSelected: true @@ -147,6 +167,46 @@ body: | --- +name: anyext_sgpr_s1_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s64) = G_ANYEXT %1 + $sgpr0_sgpr1 = COPY %2 +... + +--- + +name: anyext_vgpr_s1_to_vgpr_s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s16 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0 + %2:vgpr(s16) = G_ANYEXT %1 + %3:vgpr(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... + +--- + name: anyext_vgpr_s1_to_vgpr_s32 legalized: true regBankSelected: true diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir index ef7be3f7cd4a8..0e7e12f27f71d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir @@ -3,6 +3,27 @@ --- +name: sext_sgpr_s1_to_sgpr_s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s16 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc + ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_BFE_I32_]], 1048576, implicit-def $scc + ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s16) = G_SEXT %1 + %3:sgpr(s32) = G_ZEXT %2 + $sgpr0 = COPY %3 +... + +--- + name: sext_sgpr_s1_to_sgpr_s32 legalized: true regBankSelected: true @@ -83,6 +104,27 @@ body: | ... +--- + +name: sext_sgpr_s32_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: sext_sgpr_s32_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2097152, implicit-def $scc + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s64) = G_SEXT %0 + $sgpr0_sgpr1 = COPY %1 + +... + # --- # name: sext_vcc_s1_to_vgpr_s32 @@ -100,6 +142,27 @@ body: | --- +name: sext_vgpr_s1_to_vgpr_s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s16 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec + ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_BFE_I32_e64_]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0 + %2:vgpr(s16) = G_SEXT %1 + %3:vgpr(s32) = G_ZEXT %2 + $vgpr0 = COPY %3 +... + +--- + name: sext_vgpr_s1_to_vgpr_s32 legalized: true regBankSelected: true diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir index c8d8468689762..821d05f1f03af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir @@ -3,6 +3,27 @@ --- +name: zext_sgpr_s1_to_sgpr_s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s16 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc + ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_AND_B32_]] + ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s1) = G_TRUNC %0 + %2:sgpr(s16) = G_ZEXT %1 + %3:sgpr(s32) = G_SEXT %2 + $sgpr0 = COPY %3 +... + +--- + name: zext_sgpr_s1_to_sgpr_s32 legalized: true regBankSelected: true @@ -83,6 +104,27 @@ body: | ... +--- + +name: zext_sgpr_s32_to_sgpr_s64 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: zext_sgpr_s32_to_sgpr_s64 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 2097152, implicit-def $scc + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s64) = G_ZEXT %0 + $sgpr0_sgpr1 = COPY %1 + +... + # --- # name: zext_vcc_s1_to_vgpr_s32 @@ -100,6 +142,27 @@ body: | --- +name: zext_vgpr_s1_to_vgpr_s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s16 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_AND_B32_e32_]], 0, 16, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s1) = G_TRUNC %0 + %2:vgpr(s16) = G_ZEXT %1 + %3:vgpr(s32) = G_SEXT %2 + $vgpr0 = COPY %3 +... + +--- + name: zext_vgpr_s1_to_vgpr_s32 legalized: true regBankSelected: true From 7d19566c3bfb3efacb629d18839e2d85761156ab Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 13 Jan 2022 16:54:04 +0000 Subject: [PATCH 350/946] [lldb] Ignore non-address bits in "memory find" arguments This removes the non-address bits before we try to use the addresses. Meaning that when results are shown, those results won't show non-address bits either. This follows what "memory read" has done. On the grounds that non-address bits are a property of a pointer, not the memory pointed to. I've added testing and merged the find and read tests into one file. Note that there are no API side changes because "memory find" does not have an equivalent API call. Reviewed By: omjavaid Differential Revision: https://reviews.llvm.org/D117299 --- lldb/source/Commands/CommandObjectMemory.cpp | 6 +++ .../Makefile | 0 .../TestAArch64LinuxTaggedMemoryAccess.py} | 40 ++++++++++++++++--- .../main.c | 8 +++- 4 files changed, 46 insertions(+), 8 deletions(-) rename lldb/test/API/linux/aarch64/{tagged_memory_read => tagged_memory_access}/Makefile (100%) rename lldb/test/API/linux/aarch64/{tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py => tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py} (62%) rename lldb/test/API/linux/aarch64/{tagged_memory_read => tagged_memory_access}/main.c (80%) diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index 0b5f39bc7a8f4..f5e2cb4ed44ea 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -1032,6 +1032,12 @@ class CommandObjectMemoryFind : public CommandObjectParsed { return false; } + ABISP abi = m_exe_ctx.GetProcessPtr()->GetABI(); + if (abi) { + low_addr = abi->FixDataAddress(low_addr); + high_addr = abi->FixDataAddress(high_addr); + } + if (high_addr <= low_addr) { result.AppendError( "starting address must be smaller than ending address"); diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/Makefile b/lldb/test/API/linux/aarch64/tagged_memory_access/Makefile similarity index 100% rename from lldb/test/API/linux/aarch64/tagged_memory_read/Makefile rename to lldb/test/API/linux/aarch64/tagged_memory_access/Makefile diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py b/lldb/test/API/linux/aarch64/tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py similarity index 62% rename from lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py rename to lldb/test/API/linux/aarch64/tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py index 2f55b951a7548..3cc0d70a3ce34 100644 --- a/lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py +++ b/lldb/test/API/linux/aarch64/tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py @@ -1,6 +1,9 @@ """ -Test that "memory read" removes non address bits from -memory read arguments. +Test that "memory read" and "memory find" remove non address bits from +address arguments. + +These tests use the top byte ignore feature of AArch64. Which Linux +always enables. """ @@ -17,10 +20,7 @@ class AArch64LinuxTaggedMemoryReadTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True - # AArch64 Linux always enables top byte ignore - @skipUnlessArch("aarch64") - @skipUnlessPlatform(["linux"]) - def test_tagged_memory_read(self): + def setup_test(self): self.build() self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) @@ -37,6 +37,11 @@ def test_tagged_memory_read(self): substrs=['stopped', 'stop reason = breakpoint']) + @skipUnlessArch("aarch64") + @skipUnlessPlatform(["linux"]) + def test_tagged_memory_read(self): + self.setup_test() + # If we do not remove non address bits, this can fail in two ways. # 1. We attempt to read much more than 16 bytes, probably more than # the default 1024 byte read size. Which will error. @@ -53,3 +58,26 @@ def test_tagged_memory_read(self): # Would fail if we don't remove non address bits because 0x56... > 0x34... self.expect("memory read ptr2 ptr1+16", patterns=[tagged_addr_pattern], matching=False) self.expect("memory read", patterns=[tagged_addr_pattern], matching=False) + + @skipUnlessArch("aarch64") + @skipUnlessPlatform(["linux"]) + def test_tagged_memory_find(self): + self.setup_test() + + # If memory find doesn't remove non-address bits one of two + # things happen. + # 1. It tries to search a gigantic amount of memory. + # We're not going to test for this because a failure + # would take a very long time and perhaps even find the + # target value randomly. + # 2. It thinks high address <= low address, which we check below. + + self.runCmd("memory find -s '?' ptr2 ptr1+32") + + self.assertTrue(self.res.Succeeded()) + out = self.res.GetOutput() + # memory find does not fail when it doesn't find the data. + # First check we actually got something. + self.assertRegexpMatches(out, "data found at location: 0x[0-9A-Fa-f]+") + # Then that the location found does not display the tag bits. + self.assertNotRegexpMatches(out, "data found at location: 0x(34|56)[0-9A-Fa-f]+") diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/main.c b/lldb/test/API/linux/aarch64/tagged_memory_access/main.c similarity index 80% rename from lldb/test/API/linux/aarch64/tagged_memory_read/main.c rename to lldb/test/API/linux/aarch64/tagged_memory_access/main.c index 72ee30cef7869..3dd064b00bd31 100644 --- a/lldb/test/API/linux/aarch64/tagged_memory_read/main.c +++ b/lldb/test/API/linux/aarch64/tagged_memory_access/main.c @@ -5,11 +5,15 @@ static char *set_non_address_bits(char *ptr, size_t tag) { return (char *)((size_t)ptr | (tag << 56)); } -int main(int argc, char const *argv[]) { - char buf[32]; +// Global to zero init +char buf[32]; +int main(int argc, char const *argv[]) { char *ptr1 = set_non_address_bits(buf, 0x34); char *ptr2 = set_non_address_bits(buf, 0x56); + // Target value for "memory find" + buf[15] = '?'; + return 0; // Set break point at this line. } From 022600334dcb914d24230c6659487b2744af702b Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Tue, 11 Jan 2022 13:16:03 +0000 Subject: [PATCH 351/946] [flang] Update the description of `!fir.coordinate_of` This change was suggested in one of the comments for https://reviews.llvm.org/D115333. Basically, the following usage is valid, but the current wording suggests otherwise: ``` %1 = fir.coordinate_of %a, %k : (!fir.ref>, index) -> !fir.ref> ``` A test is also added to better document this particular case. Differential revision: https://reviews.llvm.org/D115929 --- .../include/flang/Optimizer/Dialect/FIROps.td | 2 +- flang/test/Fir/convert-to-llvm.fir | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index cd4d77eab521b..a484d60fa3b95 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -1633,7 +1633,7 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoSideEffect]> { Compute the internal coordinate address starting from a boxed value or unboxed memory reference. Returns a memory reference. When computing the coordinate of an array element, the rank of the array must be known and - the number of indexing expressions must equal the rank of the array. + the number of indexing expressions must not exceed the rank of the array. This operation will apply the access map from a boxed value implicitly. diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 9b4961a974a47..e5256629062fb 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -2471,11 +2471,11 @@ func @coordinate_array_known_size_1d(%arg0: !fir.ref>, %arg // ----- -func @coordinate_array_known_size_2d(%arg0: !fir.ref>, %arg1 : index, %arg2 : index) { +func @coordinate_array_known_size_2d_get_i32(%arg0: !fir.ref>, %arg1 : index, %arg2 : index) { %q = fir.coordinate_of %arg0, %arg1, %arg2 : (!fir.ref>, index, index) -> !fir.ref return } -// CHECK-LABEL: llvm.func @coordinate_array_known_size_2d( +// CHECK-LABEL: llvm.func @coordinate_array_known_size_2d_get_i32( // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr>>, // CHECK-SAME: %[[VAL_1:.*]]: i64, // CHECK-SAME: %[[VAL_2:.*]]: i64) { @@ -2486,6 +2486,20 @@ func @coordinate_array_known_size_2d(%arg0: !fir.ref>, // ----- +func @coordinate_array_known_size_2d_get_array(%arg0: !fir.ref>, %arg1 : index) { + %q = fir.coordinate_of %arg0, %arg1 : (!fir.ref>, index) -> !fir.ref> + return +} +// CHECK-LABEL: llvm.func @coordinate_array_known_size_2d_get_array( +// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr>>, +// CHECK-SAME: %[[VAL_1:.*]]: i64) { +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_0]][%[[VAL_2]], %[[VAL_1]]] : (!llvm.ptr>>, i64, i64) -> !llvm.ptr> +// CHECK: llvm.return +// CHECK: } + +// ----- + // 5.2. `fir.derived` func @coordinate_ref_derived(%arg0: !fir.ref>) { %idx = fir.field_index field_2, !fir.type From 912af6b570d6f70e107e4ddf54bc85cb8b63cc70 Mon Sep 17 00:00:00 2001 From: Abinav Puthan Purayil Date: Mon, 24 Jan 2022 16:24:54 +0530 Subject: [PATCH 352/946] [AMDGPU][GlobalISel] Remove the post ':' part of vreg operands in fsh combine tests. --- .../CodeGen/AMDGPU/GlobalISel/combine-fsh.mir | 50 +++++++++---------- .../CodeGen/AMDGPU/GlobalISel/combine-rot.mir | 50 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir index ad93f1bf4d39e..a1eabb487448e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir @@ -20,10 +20,10 @@ body: | %b:_(s32) = COPY $vgpr1 %amt:_(s32) = COPY $vgpr2 %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt + %lshr:_(s32) = G_LSHR %b, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr3 = COPY %or ... @@ -46,11 +46,11 @@ body: | %b:_(<2 x s32>) = COPY $vgpr2_vgpr3 %amt:_(<2 x s32>) = COPY $vgpr4_vgpr5 %scalar_bw:_(s32) = G_CONSTANT i32 32 - %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw(s32), %scalar_bw(s32) - %shl:_(<2 x s32>) = G_SHL %a:_, %amt:_(<2 x s32>) - %sub:_(<2 x s32>) = G_SUB %bw:_, %amt:_ - %lshr:_(<2 x s32>) = G_LSHR %b:_, %sub:_(<2 x s32>) - %or:_(<2 x s32>) = G_OR %shl:_, %lshr:_ + %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw, %scalar_bw + %shl:_(<2 x s32>) = G_SHL %a, %amt + %sub:_(<2 x s32>) = G_SUB %bw, %amt + %lshr:_(<2 x s32>) = G_LSHR %b, %sub + %or:_(<2 x s32>) = G_OR %shl, %lshr $vgpr6_vgpr7 = COPY %or ... @@ -73,10 +73,10 @@ body: | %b:_(s32) = COPY $vgpr1 %amt:_(s32) = COPY $vgpr2 %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) - %or:_(s32) = G_OR %lshr:_, %shl:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt + %lshr:_(s32) = G_LSHR %b, %sub + %or:_(s32) = G_OR %lshr, %shl $vgpr3 = COPY %or ... @@ -99,10 +99,10 @@ body: | %b:_(s32) = COPY $vgpr1 %amt:_(s32) = COPY $vgpr2 %bw:_(s32) = G_CONSTANT i32 32 - %lshr:_(s32) = G_LSHR %b:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %shl:_(s32) = G_SHL %a:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %lshr:_(s32) = G_LSHR %b, %amt + %sub:_(s32) = G_SUB %bw, %amt + %shl:_(s32) = G_SHL %a, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr3 = COPY %or ... @@ -182,10 +182,10 @@ body: | %b:_(s32) = COPY $vgpr1 %amt:_(s32) = COPY $vgpr2 %bw:_(s32) = G_CONSTANT i32 31 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt + %lshr:_(s32) = G_LSHR %b, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr3 = COPY %or ... @@ -214,9 +214,9 @@ body: | %amt:_(s32) = COPY $vgpr2 %amt1:_(s32) = COPY $vgpr3 %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt1:_ - %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt1 + %lshr:_(s32) = G_LSHR %b, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr4 = COPY %or ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir index 2649ee4bdf72a..e83bcbc293d51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir @@ -18,10 +18,10 @@ body: | %a:_(s32) = COPY $vgpr0 %amt:_(s32) = COPY $vgpr1 %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt + %lshr:_(s32) = G_LSHR %a, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr2 = COPY %or ... @@ -42,11 +42,11 @@ body: | %a:_(<2 x s32>) = COPY $vgpr0_vgpr1 %amt:_(<2 x s32>) = COPY $vgpr2_vgpr3 %scalar_bw:_(s32) = G_CONSTANT i32 32 - %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw(s32), %scalar_bw(s32) - %shl:_(<2 x s32>) = G_SHL %a:_, %amt:_(<2 x s32>) - %sub:_(<2 x s32>) = G_SUB %bw:_, %amt:_ - %lshr:_(<2 x s32>) = G_LSHR %a:_, %sub:_(<2 x s32>) - %or:_(<2 x s32>) = G_OR %shl:_, %lshr:_ + %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw, %scalar_bw + %shl:_(<2 x s32>) = G_SHL %a, %amt + %sub:_(<2 x s32>) = G_SUB %bw, %amt + %lshr:_(<2 x s32>) = G_LSHR %a, %sub + %or:_(<2 x s32>) = G_OR %shl, %lshr $vgpr4_vgpr5 = COPY %or ... @@ -67,10 +67,10 @@ body: | %a:_(s32) = COPY $vgpr0 %amt:_(s32) = COPY $vgpr1 %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) - %or:_(s32) = G_OR %lshr:_, %shl:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt + %lshr:_(s32) = G_LSHR %a, %sub + %or:_(s32) = G_OR %lshr, %shl $vgpr2 = COPY %or ... @@ -91,10 +91,10 @@ body: | %a:_(s32) = COPY $vgpr0 %amt:_(s32) = COPY $vgpr1 %bw:_(s32) = G_CONSTANT i32 32 - %lshr:_(s32) = G_LSHR %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %shl:_(s32) = G_SHL %a:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %lshr:_(s32) = G_LSHR %a, %amt + %sub:_(s32) = G_SUB %bw, %amt + %shl:_(s32) = G_SHL %a, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr2 = COPY %or ... @@ -169,10 +169,10 @@ body: | %a:_(s32) = COPY $vgpr0 %amt:_(s32) = COPY $vgpr1 %bw:_(s32) = G_CONSTANT i32 31 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt:_ - %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt + %lshr:_(s32) = G_LSHR %a, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr2 = COPY %or ... @@ -199,9 +199,9 @@ body: | %amt:_(s32) = COPY $vgpr1 %amt1:_(s32) = COPY $vgpr2 %bw:_(s32) = G_CONSTANT i32 32 - %shl:_(s32) = G_SHL %a:_, %amt:_(s32) - %sub:_(s32) = G_SUB %bw:_, %amt1:_ - %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32) - %or:_(s32) = G_OR %shl:_, %lshr:_ + %shl:_(s32) = G_SHL %a, %amt + %sub:_(s32) = G_SUB %bw, %amt1 + %lshr:_(s32) = G_LSHR %a, %sub + %or:_(s32) = G_OR %shl, %lshr $vgpr3 = COPY %or ... From 577a6dc9a1864fcc0e938052beae2a9b5362367a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 24 Jan 2022 11:08:13 +0000 Subject: [PATCH 353/946] [X86] getVectorMaskingNode - fix indentation. NFC. clang-format --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 618b97a2e8dbd..10fdcc3d6ca81 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25785,9 +25785,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT, /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting or extending for \p Mask when lowering masking intrinsics static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, - SDValue PreservedSrc, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { + SDValue PreservedSrc, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); unsigned OpcodeSelect = ISD::VSELECT; From e7926e8d972e8129d4b64e10a38719b066a526be Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 20 Jan 2022 14:46:28 +0000 Subject: [PATCH 354/946] [RISCV] Match VF variants for masked VFRDIV/VFRSUB This patch follows up on D117697 to help the simple binary operations behave similarly in the presence of masks. It also enables CGP sinking support for vp.fdiv and vp.fsub intrinsics, now that VFRDIV and VFRSUB are consistently matched with a LHS splat for masked and unmasked variants. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117783 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 ++-- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 11 ++++- .../RISCV/rvv/fixed-vectors-vfrdiv-vp.ll | 48 +++++-------------- .../RISCV/rvv/fixed-vectors-vfrsub-vp.ll | 48 +++++-------------- .../CodeGen/RISCV/rvv/sink-splat-operands.ll | 22 ++++----- 5 files changed, 46 insertions(+), 91 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 411191343cf04..5073fe66c15a9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1272,9 +1272,7 @@ bool RISCVTargetLowering::shouldSinkOperands( case Intrinsic::vp_or: case Intrinsic::vp_xor: case Intrinsic::vp_fadd: - case Intrinsic::vp_fsub: case Intrinsic::vp_fmul: - case Intrinsic::vp_fdiv: case Intrinsic::vp_shl: case Intrinsic::vp_lshr: case Intrinsic::vp_ashr: @@ -1283,9 +1281,11 @@ bool RISCVTargetLowering::shouldSinkOperands( case Intrinsic::vp_urem: case Intrinsic::vp_srem: return Operand == 1; - // ... the one exception is vp.sub which has explicit patterns for both - // LHS and RHS (as vrsub). + // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have + // explicit patterns for both LHS and RHS (as 'vr' versions). case Intrinsic::vp_sub: + case Intrinsic::vp_fsub: + case Intrinsic::vp_fdiv: return Operand == 0 || Operand == 1; default: return false; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 7b556174bbf0c..9e47cb80349bf 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -404,7 +404,7 @@ multiclass VPatBinaryFPVL_VV_VF { } multiclass VPatBinaryFPVL_R_VF { - foreach fvti = AllFloatVectors in + foreach fvti = AllFloatVectors in { def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2), fvti.RegClass:$rs1, (fvti.Mask true_mask), @@ -412,6 +412,15 @@ multiclass VPatBinaryFPVL_R_VF { (!cast(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2), + fvti.RegClass:$rs1, + (fvti.Mask V0), + VLOpFrag)), + (!cast(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; + } } multiclass VPatIntegerSetCCVL_VV @llvm.vp.fdiv.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfrdiv_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> undef, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> undef, <2 x i32> zeroinitializer @@ -39,10 +37,8 @@ declare <4 x half> @llvm.vp.fdiv.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfrdiv_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> undef, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> undef, <4 x i32> zeroinitializer @@ -69,10 +65,8 @@ declare <8 x half> @llvm.vp.fdiv.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfrdiv_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> undef, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> undef, <8 x i32> zeroinitializer @@ -99,10 +93,8 @@ declare <16 x half> @llvm.vp.fdiv.v16f16(<16 x half>, <16 x half>, <16 x i1>, i3 define <16 x half> @vfrdiv_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> undef, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> undef, <16 x i32> zeroinitializer @@ -129,10 +121,8 @@ declare <2 x float> @llvm.vp.fdiv.v2f32(<2 x float>, <2 x float>, <2 x i1>, i32) define <2 x float> @vfrdiv_vf_v2f32(<2 x float> %va, float %b, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> undef, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer @@ -159,10 +149,8 @@ declare <4 x float> @llvm.vp.fdiv.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) define <4 x float> @vfrdiv_vf_v4f32(<4 x float> %va, float %b, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> undef, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> undef, <4 x i32> zeroinitializer @@ -189,10 +177,8 @@ declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) define <8 x float> @vfrdiv_vf_v8f32(<8 x float> %va, float %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> undef, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> undef, <8 x i32> zeroinitializer @@ -219,10 +205,8 @@ declare <16 x float> @llvm.vp.fdiv.v16f32(<16 x float>, <16 x float>, <16 x i1>, define <16 x float> @vfrdiv_vf_v16f32(<16 x float> %va, float %b, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> undef, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> undef, <16 x i32> zeroinitializer @@ -249,10 +233,8 @@ declare <2 x double> @llvm.vp.fdiv.v2f64(<2 x double>, <2 x double>, <2 x i1>, i define <2 x double> @vfrdiv_vf_v2f64(<2 x double> %va, double %b, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> undef, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> undef, <2 x i32> zeroinitializer @@ -279,10 +261,8 @@ declare <4 x double> @llvm.vp.fdiv.v4f64(<4 x double>, <4 x double>, <4 x i1>, i define <4 x double> @vfrdiv_vf_v4f64(<4 x double> %va, double %b, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> undef, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> undef, <4 x i32> zeroinitializer @@ -309,10 +289,8 @@ declare <8 x double> @llvm.vp.fdiv.v8f64(<8 x double>, <8 x double>, <8 x i1>, i define <8 x double> @vfrdiv_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> undef, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> undef, <8 x i32> zeroinitializer @@ -339,10 +317,8 @@ declare <16 x double> @llvm.vp.fdiv.v16f64(<16 x double>, <16 x double>, <16 x i define <16 x double> @vfrdiv_vf_v16f64(<16 x double> %va, double %b, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrdiv_vf_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfdiv.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> undef, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> undef, <16 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll index bdf7c6601d8d8..dfb436437e765 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll @@ -9,10 +9,8 @@ declare <2 x half> @llvm.vp.fsub.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfrsub_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> undef, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> undef, <2 x i32> zeroinitializer @@ -39,10 +37,8 @@ declare <4 x half> @llvm.vp.fsub.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfrsub_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> undef, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> undef, <4 x i32> zeroinitializer @@ -69,10 +65,8 @@ declare <8 x half> @llvm.vp.fsub.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfrsub_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> undef, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> undef, <8 x i32> zeroinitializer @@ -99,10 +93,8 @@ declare <16 x half> @llvm.vp.fsub.v16f16(<16 x half>, <16 x half>, <16 x i1>, i3 define <16 x half> @vfrsub_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> undef, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> undef, <16 x i32> zeroinitializer @@ -129,10 +121,8 @@ declare <2 x float> @llvm.vp.fsub.v2f32(<2 x float>, <2 x float>, <2 x i1>, i32) define <2 x float> @vfrsub_vf_v2f32(<2 x float> %va, float %b, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> undef, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer @@ -159,10 +149,8 @@ declare <4 x float> @llvm.vp.fsub.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) define <4 x float> @vfrsub_vf_v4f32(<4 x float> %va, float %b, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> undef, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> undef, <4 x i32> zeroinitializer @@ -189,10 +177,8 @@ declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) define <8 x float> @vfrsub_vf_v8f32(<8 x float> %va, float %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> undef, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> undef, <8 x i32> zeroinitializer @@ -219,10 +205,8 @@ declare <16 x float> @llvm.vp.fsub.v16f32(<16 x float>, <16 x float>, <16 x i1>, define <16 x float> @vfrsub_vf_v16f32(<16 x float> %va, float %b, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> undef, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> undef, <16 x i32> zeroinitializer @@ -249,10 +233,8 @@ declare <2 x double> @llvm.vp.fsub.v2f64(<2 x double>, <2 x double>, <2 x i1>, i define <2 x double> @vfrsub_vf_v2f64(<2 x double> %va, double %b, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v9, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfsub.vv v8, v9, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> undef, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> undef, <2 x i32> zeroinitializer @@ -279,10 +261,8 @@ declare <4 x double> @llvm.vp.fsub.v4f64(<4 x double>, <4 x double>, <4 x i1>, i define <4 x double> @vfrsub_vf_v4f64(<4 x double> %va, double %b, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vfmv.v.f v10, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfsub.vv v8, v10, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> undef, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> undef, <4 x i32> zeroinitializer @@ -309,10 +289,8 @@ declare <8 x double> @llvm.vp.fsub.v8f64(<8 x double>, <8 x double>, <8 x i1>, i define <8 x double> @vfrsub_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfsub.vv v8, v12, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> undef, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> undef, <8 x i32> zeroinitializer @@ -339,10 +317,8 @@ declare <16 x double> @llvm.vp.fsub.v16f64(<16 x double>, <16 x double>, <16 x i define <16 x double> @vfrsub_vf_v16f64(<16 x double> %va, double %b, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfrsub_vf_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu -; CHECK-NEXT: vfsub.vv v8, v16, v8, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> undef, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> undef, <16 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index e902e1cd577a8..5bfa79e02e437 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -3364,22 +3364,19 @@ for.cond.cleanup: ; preds = %vector.body ret void } -; FIXME: vfrdiv.vf doesn't match against masked instructions - define void @sink_splat_vp_frdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_frdiv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft0 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB56_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vv v9, v8, v9, v0.t +; CHECK-NEXT: vfrdiv.vf v8, v8, ft0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: bnez a1, .LBB56_1 @@ -3490,22 +3487,19 @@ for.cond.cleanup: ; preds = %vector.body declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32) -; FIXME: vfrsub.vf doesn't match against masked instructions - define void @sink_splat_vp_frsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_frsub: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vfmv.v.f v8, ft0 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB59_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vv v9, v8, v9, v0.t +; CHECK-NEXT: vfrsub.vf v8, v8, ft0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: bnez a1, .LBB59_1 From af773a18181dc1a1e3846f518b2d44f2abbbdf87 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 18 Jan 2022 14:13:13 +0000 Subject: [PATCH 355/946] [RISCV][VP] Lower VP_MERGE to RVV instructions This patch adds lowering of the llvm.vp.merge.* intrinsic (ISD::VP_MERGE) to RVV vmerge/vfmerge instructions. It introduces a special pseudo form of vmerge which allows a tied merge operand, allowing us to specify the tail elements as being equal to the "on false" operand, using a tied-def constraint and a "tail undisturbed" policy. While this strategy allows us to often lower the intrinsic to just one instruction, it may be less efficient in fixed-vector types as the number of tail elements may extend far beyond the length of the fixed vector. Another strategy could be to use a vmerge/vfmerge instruction with an AVL equal to the length of the vector type, and manipulate the condition operand such that mask elements greater than the operation's EVL are false. I've also observed inefficient codegen in which our 'VF' patterns don't match raw floating-point SPLAT_VECTORs, which occur in scalable-vector code. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D117561 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 75 +- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 64 +- .../RISCV/rvv/fixed-vectors-vpmerge.ll | 953 ++++++++++++ llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 1280 +++++++++++++++++ 6 files changed, 2372 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5073fe66c15a9..7d224e3968545 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -521,12 +521,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, - ISD::VP_SELECT}; + ISD::VP_MERGE, ISD::VP_SELECT}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT}; + ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, + ISD::VP_SELECT}; if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector @@ -3441,6 +3442,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerSET_ROUNDING(Op, DAG); case ISD::VP_SELECT: return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); + case ISD::VP_MERGE: + return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL); case ISD::VP_ADD: return lowerVPOp(Op, DAG, RISCVISD::ADD_VL); case ISD::VP_SUB: @@ -10087,6 +10090,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VWADDU_VL) NODE_NAME_CASE(SETCC_VL) NODE_NAME_CASE(VSELECT_VL) + NODE_NAME_CASE(VP_MERGE_VL) NODE_NAME_CASE(VMAND_VL) NODE_NAME_CASE(VMOR_VL) NODE_NAME_CASE(VMXOR_VL) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 23857f93e0159..58b7ec89f8758 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -253,6 +253,10 @@ enum NodeType : unsigned { // Vector select with an additional VL operand. This operation is unmasked. VSELECT_VL, + // Vector select with operand #2 (the value when the condition is false) tied + // to the destination and an additional VL operand. This operation is + // unmasked. + VP_MERGE_VL, // Mask binary operators. VMAND_VL, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index a4e92c80ff140..798f848a50b7a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -579,10 +579,11 @@ class PseudoToVInst { !subst("_B64", "", !subst("_MASK", "", !subst("_TIED", "", + !subst("_TU", "", !subst("F16", "F", !subst("F32", "F", !subst("F64", "F", - !subst("Pseudo", "", PseudoInst)))))))))))))))))))); + !subst("Pseudo", "", PseudoInst))))))))))))))))))))); } // The destination vector register group for a masked vector instruction cannot @@ -928,6 +929,9 @@ class VPseudoBinaryNoMask(PseudoToVInst.VInst); } +// Special version of VPseudoBinaryNoMask where we pretend the first source is +// tied to the destination. +// This allows maskedoff and rs2 to be the same register. class VPseudoTiedBinaryNoMask : @@ -1079,6 +1083,30 @@ class VPseudoBinaryCarryIn : + Pseudo<(outs RetClass:$rd), + !if(CarryIn, + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, + ixlenimm:$sew), + (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; + let HasVLOp = 1; + let HasSEWOp = 1; + let HasMergeOp = 1; + let HasVecPolicyOp = 0; + let BaseInstr = !cast(PseudoToVInst.VInst); + let VLMul = MInfo.value; +} + class VPseudoTernaryNoMask; } +multiclass VPseudoTiedBinaryV_VM { + foreach m = MxList in + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; +} + multiclass VPseudoBinaryV_XM { foreach m = MxList in @@ -1751,13 +1789,29 @@ multiclass VPseudoBinaryV_XM; } +multiclass VPseudoTiedBinaryV_XM { + foreach m = MxList in + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; +} + multiclass VPseudoVMRG_FM { foreach f = FPList in - foreach m = f.MxList in + foreach m = f.MxList in { def "_V" # f.FX # "M_" # m.MX : VPseudoBinaryCarryIn.R, m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + // Tied version to allow codegen control over the tail elements + def "_V" # f.FX # "M_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, + m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; + } } multiclass VPseudoBinaryV_IM; } +multiclass VPseudoTiedBinaryV_IM { + foreach m = MxList in + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; +} + multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList in { let VLMul = m.value in { @@ -2104,6 +2168,13 @@ multiclass VPseudoVMRG_VM_XM_IM { Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; defm "" : VPseudoBinaryV_IM, Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } multiclass VPseudoVCALU_VM_XM_IM { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9e47cb80349bf..5cff16c32fe78 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -177,14 +177,13 @@ def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL", SDTCisSameNumEltsAs<0, 3>, SDTCisVT<4, XLenVT>]>>; -def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", - SDTypeProfile<1, 4, [SDTCisVec<0>, - SDTCisVec<1>, - SDTCisSameNumEltsAs<0, 1>, - SDTCVecEltisVT<1, i1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>, - SDTCisVT<4, XLenVT>]>>; +def SDT_RISCVSelect_VL : SDTypeProfile<1, 4, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisVT<4, XLenVT> +]>; + +def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>; +def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>; def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, @@ -976,6 +975,30 @@ foreach vti = AllIntegerVectors in { VLOpFrag)), (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + vti.RegClass:$rs1, + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + (SplatPat XLenVT:$rs1), + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + + def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), + (SplatPat_simm5 simm5:$rs1), + vti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU") + vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; } // 12.16. Vector Integer Move Instructions @@ -1223,6 +1246,31 @@ foreach fvti = AllFloatVectors in { (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + fvti.RegClass:$rs1, + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + (SplatFPOp fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, + (fvti.Scalar fvti.ScalarRegClass:$rs1), + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + + def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), + (SplatFPOp (fvti.Scalar fpimm0)), + fvti.RegClass:$rs2, + VLOpFrag)), + (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU") + fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; + // 14.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll new file mode 100644 index 0000000000000..e0cc13db7f118 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -0,0 +1,953 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) + +define <2 x i8> @vpmerge_vv_v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vpmerge_vx_v2i8(i8 %a, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %a, i32 0 + %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vpmerge_vi_v2i8(<2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 2, i32 0 + %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.merge.v4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) + +define <4 x i8> @vpmerge_vv_v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vpmerge_vx_v4i8(i8 %a, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %a, i32 0 + %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 2, i32 0 + %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) + +define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vpmerge_vx_v8i8(i8 %a, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %a, i32 0 + %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vpmerge_vi_v8i8(<8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 2, i32 0 + %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.merge.v16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) + +define <16 x i8> @vpmerge_vv_v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vpmerge_vx_v16i8(i8 %a, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %a, i32 0 + %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vpmerge_vi_v16i8(<16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 2, i32 0 + %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.vp.merge.v2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) + +define <2 x i16> @vpmerge_vv_v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vpmerge_vx_v2i16(i16 %a, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %a, i32 0 + %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vpmerge_vi_v2i16(<2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 2, i32 0 + %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.merge.v4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) + +define <4 x i16> @vpmerge_vv_v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vpmerge_vx_v4i16(i16 %a, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %a, i32 0 + %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vpmerge_vi_v4i16(<4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 2, i32 0 + %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.merge.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) + +define <8 x i16> @vpmerge_vv_v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vpmerge_vx_v8i16(i16 %a, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %a, i32 0 + %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vpmerge_vi_v8i16(<8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 2, i32 0 + %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.merge.v16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) + +define <16 x i16> @vpmerge_vv_v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vpmerge_vx_v16i16(i16 %a, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %a, i32 0 + %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vpmerge_vi_v16i16(<16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 2, i32 0 + %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.merge.v2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) + +define <2 x i32> @vpmerge_vv_v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vpmerge_vx_v2i32(i32 %a, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %a, i32 0 + %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vpmerge_vi_v2i32(<2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 2, i32 0 + %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.merge.v4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) + +define <4 x i32> @vpmerge_vv_v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vpmerge_vx_v4i32(i32 %a, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %a, i32 0 + %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vpmerge_vi_v4i32(<4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 2, i32 0 + %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) + +define <8 x i32> @vpmerge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vpmerge_vx_v8i32(i32 %a, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %a, i32 0 + %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vpmerge_vi_v8i32(<8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 2, i32 0 + %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.merge.v16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) + +define <16 x i32> @vpmerge_vv_v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vpmerge_vx_v16i32(i32 %a, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %a, i32 0 + %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vpmerge_vi_v16i32(<16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 2, i32 0 + %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.merge.v2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) + +define <2 x i64> @vpmerge_vv_v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %a, i32 0 + %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vpmerge_vi_v2i64(<2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 2, i32 0 + %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.merge.v4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) + +define <4 x i64> @vpmerge_vv_v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %a, i32 0 + %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vpmerge_vi_v4i64(<4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 2, i32 0 + %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.merge.v8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) + +define <8 x i64> @vpmerge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %a, i32 0 + %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vpmerge_vi_v8i64(<8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 2, i32 0 + %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.merge.v16i64(<16 x i1>, <16 x i64>, <16 x i64>, i32) + +define <16 x i64> @vpmerge_vv_v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %a, i32 0 + %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vpmerge_vi_v16i64(<16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 2, i32 0 + %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + ret <16 x i64> %v +} + +declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32) + +define <2 x half> @vpmerge_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) + ret <2 x half> %v +} + +define <2 x half> @vpmerge_vf_v2f16(half %a, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x half> poison, half %a, i32 0 + %va = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer + %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl) + ret <2 x half> %v +} + +declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32) + +define <4 x half> @vpmerge_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) + ret <4 x half> %v +} + +define <4 x half> @vpmerge_vf_v4f16(half %a, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x half> poison, half %a, i32 0 + %va = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer + %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl) + ret <4 x half> %v +} + +declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32) + +define <8 x half> @vpmerge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) + ret <8 x half> %v +} + +define <8 x half> @vpmerge_vf_v8f16(half %a, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x half> poison, half %a, i32 0 + %va = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer + %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl) + ret <8 x half> %v +} + +declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32) + +define <16 x half> @vpmerge_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) + ret <16 x half> %v +} + +define <16 x half> @vpmerge_vf_v16f16(half %a, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x half> poison, half %a, i32 0 + %va = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer + %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl) + ret <16 x half> %v +} + +declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32) + +define <2 x float> @vpmerge_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) + ret <2 x float> %v +} + +define <2 x float> @vpmerge_vf_v2f32(float %a, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x float> poison, float %a, i32 0 + %va = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer + %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl) + ret <2 x float> %v +} + +declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32) + +define <4 x float> @vpmerge_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) + ret <4 x float> %v +} + +define <4 x float> @vpmerge_vf_v4f32(float %a, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x float> poison, float %a, i32 0 + %va = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer + %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl) + ret <4 x float> %v +} + +declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32) + +define <8 x float> @vpmerge_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) + ret <8 x float> %v +} + +define <8 x float> @vpmerge_vf_v8f32(float %a, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x float> poison, float %a, i32 0 + %va = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer + %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl) + ret <8 x float> %v +} + +declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32) + +define <16 x float> @vpmerge_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) + ret <16 x float> %v +} + +define <16 x float> @vpmerge_vf_v16f32(float %a, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x float> poison, float %a, i32 0 + %va = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer + %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl) + ret <16 x float> %v +} + +declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32) + +define <2 x double> @vpmerge_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) + ret <2 x double> %v +} + +define <2 x double> @vpmerge_vf_v2f64(double %a, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x double> poison, double %a, i32 0 + %va = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer + %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl) + ret <2 x double> %v +} + +declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32) + +define <4 x double> @vpmerge_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) + ret <4 x double> %v +} + +define <4 x double> @vpmerge_vf_v4f64(double %a, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x double> poison, double %a, i32 0 + %va = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer + %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl) + ret <4 x double> %v +} + +declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +define <8 x double> @vpmerge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) + ret <8 x double> %v +} + +define <8 x double> @vpmerge_vf_v8f64(double %a, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x double> poison, double %a, i32 0 + %va = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer + %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl) + ret <8 x double> %v +} + +declare <16 x double> @llvm.vp.merge.v16f64(<16 x i1>, <16 x double>, <16 x double>, i32) + +define <16 x double> @vpmerge_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) + ret <16 x double> %v +} + +define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_v16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x double> poison, double %a, i32 0 + %va = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer + %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl) + ret <16 x double> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll new file mode 100644 index 0000000000000..46ebd0a6acf50 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -0,0 +1,1280 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.merge.nxv1i8(, , , i32) + +define @vpmerge_vv_nxv1i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i8(, , , i32) + +define @vpmerge_vv_nxv2i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i8(, , , i32) + +define @vpmerge_vv_nxv4i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i8(, , , i32) + +define @vpmerge_vv_nxv8i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i8(, , , i32) + +define @vpmerge_vv_nxv16i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32i8(, , , i32) + +define @vpmerge_vv_nxv32i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv32i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv32i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv64i8(, , , i32) + +define @vpmerge_vv_nxv64i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv64i8(i8 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv64i8( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i16(, , , i32) + +define @vpmerge_vv_nxv1i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i16(, , , i32) + +define @vpmerge_vv_nxv2i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i16(, , , i32) + +define @vpmerge_vv_nxv4i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i16(, , , i32) + +define @vpmerge_vv_nxv8i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i16(, , , i32) + +define @vpmerge_vv_nxv16i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32i16(, , , i32) + +define @vpmerge_vv_nxv32i16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv32i16(i16 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv32i16( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i32(, , , i32) + +define @vpmerge_vv_nxv1i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i32(, , , i32) + +define @vpmerge_vv_nxv2i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i32(, , , i32) + +define @vpmerge_vv_nxv4i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i32(, , , i32) + +define @vpmerge_vv_nxv8i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16i32(, , , i32) + +define @vpmerge_vv_nxv16i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv16i32(i32 %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv16i32( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1i64(, , , i32) + +define @vpmerge_vv_nxv1i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv1i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v9, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv1i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2i64(, , , i32) + +define @vpmerge_vv_nxv2i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv2i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv2i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4i64(, , , i32) + +define @vpmerge_vv_nxv4i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv4i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v12, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv4i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8i64(, , , i32) + +define @vpmerge_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vx_nxv8i64(i64 %a, %vb, %m, i32 zeroext %evl) { +; RV32-LABEL: vpmerge_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vpmerge_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vi_nxv8i64( %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 2, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f16(, , , i32) + +define @vpmerge_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f16(, , , i32) + +define @vpmerge_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f16(, , , i32) + +define @vpmerge_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f16(, , , i32) + +define @vpmerge_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16f16(, , , i32) + +define @vpmerge_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv16f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv32f16(, , , i32) + +define @vpmerge_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv32f16( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv32f16(half %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv32f16( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f32(, , , i32) + +define @vpmerge_vv_nxv1f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f32(, , , i32) + +define @vpmerge_vv_nxv2f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f32(, , , i32) + +define @vpmerge_vv_nxv4f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f32(, , , i32) + +define @vpmerge_vv_nxv8f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv16f32(, , , i32) + +define @vpmerge_vv_nxv16f32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv16f32( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv16f32(float %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, float %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv16f32( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv1f64(, , , i32) + +define @vpmerge_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv1f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv1f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vfmv.v.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv1f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv2f64(, , , i32) + +define @vpmerge_vv_nxv2f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv2f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv2f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; CHECK-NEXT: vfmv.v.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv2f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv4f64(, , , i32) + +define @vpmerge_vv_nxv4f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv4f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv4f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv4f64( %m, %va, %vb, i32 %evl) + ret %v +} + +declare @llvm.vp.merge.nxv8f64(, , , i32) + +define @vpmerge_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) + ret %v +} + +define @vpmerge_vf_nxv8f64(double %a, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpmerge_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vfmv.v.f v16, fa0 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, double %a, i32 0 + %va = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) + ret %v +} From 3e6be0241b31265953d82bad0164d3e0c24cf9d7 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 24 Jan 2022 11:16:34 +0000 Subject: [PATCH 356/946] [lldb] Update release notes with non-address bit handling changes This adds the "memory find" (https://reviews.llvm.org/D117299) and "memory tag" (https://reviews.llvm.org/D117672) commands and puts them all in one list. --- llvm/docs/ReleaseNotes.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 5825a6e81eb34..15f7428a7fde2 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -163,9 +163,14 @@ Changes to LLDB * A change in Clang's type printing has changed the way LLDB names array types (from ``int [N]`` to ``int[N]``) - LLDB pretty printer type name matching code may need to be updated to handle this. -* The ``memory read`` command now ignores non-address bits in start and end - addresses. In addition, non-address bits will not be shown in the addresses - in the output. +* The following commands now ignore non-address bits (e.g. AArch64 pointer + signatures) in address arguments. In addition, non-address bits will not + be shown in the output of the commands. + + * ``memory find`` + * ``memory read`` + * ``memory tag read`` + * ``memory tag write`` Changes to Sanitizers --------------------- From 12a499eb00e36bb0944c6b1f7f8721fd90a5bd8f Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Tue, 18 Jan 2022 19:48:35 +0100 Subject: [PATCH 357/946] Pre-commit test case for trunc+lshr+load folds This is a pre-commit of test cases relevant for D117406. @srl_load_narrowing1 is showing a pattern that could be folded into a more narrow load. @srl_load_narrowing2 is showing a similar pattern that happens to be optimized already, but that happens in two steps (first triggering a combine based on SRL and later another combine based on TRUNCATE). Differential Revision: https://reviews.llvm.org/D117588 --- llvm/test/CodeGen/X86/shift-folding.ll | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/llvm/test/CodeGen/X86/shift-folding.ll b/llvm/test/CodeGen/X86/shift-folding.ll index 539649b7cd476..cc03b4b9480a2 100644 --- a/llvm/test/CodeGen/X86/shift-folding.ll +++ b/llvm/test/CodeGen/X86/shift-folding.ll @@ -83,3 +83,30 @@ define i32 @overshift(i32 %a) { ret i32 %xor } +; Should be possible to adjust the pointer and narrow the load to 16 bits. +define i16 @srl_load_narrowing1(i32* %arg) { +; CHECK-LABEL: srl_load_narrowing1: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: shrl $8, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retl + %tmp1 = load i32, i32* %arg, align 1 + %tmp2 = lshr i32 %tmp1, 8 + %tmp3 = trunc i32 %tmp2 to i16 + ret i16 %tmp3 +} + +define i16 @srl_load_narrowing2(i32* %arg) { +; CHECK-LABEL: srl_load_narrowing2: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl 3(%eax), %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retl + %tmp1 = load i32, i32* %arg, align 1 + %tmp2 = lshr i32 %tmp1, 24 + %tmp3 = trunc i32 %tmp2 to i16 + ret i16 %tmp3 +} From 46cacdbb21c221a4304c489cb4a1abbc51967bb1 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Tue, 18 Jan 2022 19:50:50 +0100 Subject: [PATCH 358/946] [DAGCombiner] Adjust some checks in DAGCombiner::reduceLoadWidth In code review for D117104 two slightly weird checks were found in DAGCombiner::reduceLoadWidth. They were typically checking if BitsA was a mulitple of BitsB by looking at (BitsA & (BitsB - 1)), but such a comparison actually only make sense if BitsB is a power of two. The checks were related to the code that attempted to shrink a load based on the fact that the loaded value would be right shifted. Afaict the legality of the value types is checked later (typically in isLegalNarrowLdSt), so the existing checks were both overly conservative as well as being wrong whenever ExtVTBits wasn't a power of two. The latter was a situation triggered by a number of lit tests so we could not just assert on ExtVTBIts being a power of two). When attempting to simply remove the checks I found some problems, that seems to have been guarded by the checks (maybe just out of luck). A typical example would be a pattern like this: t1 = load i96* ptr t2 = srl t1, 64 t3 = truncate t2 to i64 When DAGCombine is visiting the truncate reduceLoadWidth is called attempting to narrow the load to 64 bits (ExtVT := MVT::i64). Then the SRL is detected and we set ShAmt to 64. In the past we've bailed out due to i96 not being a multiple of 64. If we simply remove that check then we would end up replacing the load with a new load that would read 64 bits but with a base pointer adjusted by 64 bits. So we would read 32 bits the wasn't accessed by the original load. This patch will instead utilize the fact that the logical left shift can be folded away by using a zextload. Thus, the pattern above will now be combined into t3 = load i32* ptr+offset, zext to i64 Another case is shown in the X86/shift-folding.ll test case: t1 = load i32* ptr t2 = srl i32 t1, 8 t3 = truncate t2 to i16 In the past we bailed out due to the shift count (8) not being a multiple of 16. Now the narrowing kicks in and we get t3 = load i16* ptr+offset Differential Revision: https://reviews.llvm.org/D117406 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 ++++++++++++------- llvm/test/CodeGen/ARM/shift-combine.ll | 20 ++++----------- llvm/test/CodeGen/X86/shift-folding.ll | 4 +-- 3 files changed, 22 insertions(+), 27 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 861beee6386bf..bf4409e77a916 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12214,7 +12214,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). ShAmt = SRL1C->getZExtValue(); - if (ShAmt >= LN->getMemoryVT().getSizeInBits()) + uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits(); + if (ShAmt >= MemoryWidth) return SDValue(); // Because a SRL must be assumed to *need* to zero-extend the high bits @@ -12223,13 +12224,19 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { if (LN->getExtensionType() == ISD::SEXTLOAD) return SDValue(); - unsigned ExtVTBits = ExtVT.getScalarSizeInBits(); - // Is the shift amount a multiple of size of ExtVT? - if ((ShAmt & (ExtVTBits - 1)) != 0) - return SDValue(); - // Is the load width a multiple of size of ExtVT? - if ((SRL.getScalarValueSizeInBits() & (ExtVTBits - 1)) != 0) - return SDValue(); + // Avoid reading outside the memory accessed by the original load (could + // happened if we only adjust the load base pointer by ShAmt). Instead we + // try to narrow the load even further. The typical scenario here is: + // (i64 (truncate (i96 (srl (load x), 64)))) -> + // (i64 (truncate (i96 (zextload (load i32 + offset) from i32)))) + if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) { + // Don't replace sextload by zextload. + if (ExtType == ISD::SEXTLOAD) + return SDValue(); + // Narrow the load. + ExtType = ISD::ZEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt); + } // If the SRL is only used by a masking AND, we may be able to adjust // the ExtVT to make the AND redundant. @@ -12241,7 +12248,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countTrailingOnes()); // If the mask is smaller, recompute the type. - if ((ExtVTBits > MaskedVT.getScalarSizeInBits()) && + if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) && TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) ExtVT = MaskedVT; } diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll index de1beb740bfbd..549d709f62237 100644 --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -302,9 +302,7 @@ define arm_aapcscc i32 @test_lshr_load64_4_unaligned(i64* %a) { ; ; CHECK-BE-LABEL: test_lshr_load64_4_unaligned: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: ldr r1, [r0] -; CHECK-BE-NEXT: ldrh r0, [r0, #4] -; CHECK-BE-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-BE-NEXT: ldr r0, [r0, #2] ; CHECK-BE-NEXT: bx lr ; ; CHECK-THUMB-LABEL: test_lshr_load64_4_unaligned: @@ -341,9 +339,7 @@ define arm_aapcscc i32 @test_lshr_load64_1_lsb(i64* %a) { ; ; CHECK-BE-LABEL: test_lshr_load64_1_lsb: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: ldr r1, [r0] -; CHECK-BE-NEXT: ldrb r0, [r0, #4] -; CHECK-BE-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-BE-NEXT: ldr r0, [r0, #1] ; CHECK-BE-NEXT: bx lr ; ; CHECK-THUMB-LABEL: test_lshr_load64_1_lsb: @@ -441,23 +437,17 @@ entry: define arm_aapcscc i32 @test_lshr_load4_fail(i64* %a) { ; CHECK-ARM-LABEL: test_lshr_load4_fail: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: ldrd r0, r1, [r0] -; CHECK-ARM-NEXT: lsr r0, r0, #8 -; CHECK-ARM-NEXT: orr r0, r0, r1, lsl #24 +; CHECK-ARM-NEXT: ldr r0, [r0, #1] ; CHECK-ARM-NEXT: bx lr ; ; CHECK-BE-LABEL: test_lshr_load4_fail: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: ldrd r0, r1, [r0] -; CHECK-BE-NEXT: lsr r1, r1, #8 -; CHECK-BE-NEXT: orr r0, r1, r0, lsl #24 +; CHECK-BE-NEXT: ldr r0, [r0, #3] ; CHECK-BE-NEXT: bx lr ; ; CHECK-THUMB-LABEL: test_lshr_load4_fail: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: ldrd r0, r1, [r0] -; CHECK-THUMB-NEXT: lsrs r0, r0, #8 -; CHECK-THUMB-NEXT: orr.w r0, r0, r1, lsl #24 +; CHECK-THUMB-NEXT: ldr.w r0, [r0, #1] ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ALIGN-LABEL: test_lshr_load4_fail: diff --git a/llvm/test/CodeGen/X86/shift-folding.ll b/llvm/test/CodeGen/X86/shift-folding.ll index cc03b4b9480a2..bb59cdf504103 100644 --- a/llvm/test/CodeGen/X86/shift-folding.ll +++ b/llvm/test/CodeGen/X86/shift-folding.ll @@ -88,9 +88,7 @@ define i16 @srl_load_narrowing1(i32* %arg) { ; CHECK-LABEL: srl_load_narrowing1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl (%eax), %eax -; CHECK-NEXT: shrl $8, %eax -; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movzwl 1(%eax), %eax ; CHECK-NEXT: retl %tmp1 = load i32, i32* %arg, align 1 %tmp2 = lshr i32 %tmp1, 8 From e5147f82e1cba6791252d8f44c1a014cd9ea7927 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 24 Jan 2022 11:15:45 +0000 Subject: [PATCH 359/946] [X86] Remove __builtin_ia32_pabs intrinsics and use generic __builtin_elementwise_abs D111986 added the generic `__builtin_elementwise_abs()` intrinsic with the same integer absolute behaviour as the SSE/AVX instructions (abs(INT_MIN) == INT_MIN) This patch removes the `__builtin_ia32_pabs*` intrinsics and just uses `__builtin_elementwise_abs` - the existing tests see no changes: ``` __m256i test_mm256_abs_epi8(__m256i a) { // CHECK-LABEL: test_mm256_abs_epi8 // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false) return _mm256_abs_epi8(a); } ``` This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`). Differential Revision: https://reviews.llvm.org/D117791 --- clang/include/clang/Basic/BuiltinsX86.def | 12 ------------ clang/lib/CodeGen/CGBuiltin.cpp | 15 --------------- clang/lib/Headers/avx2intrin.h | 6 +++--- clang/lib/Headers/avx512bwintrin.h | 4 ++-- clang/lib/Headers/avx512fintrin.h | 8 ++++++-- clang/lib/Headers/avx512vlintrin.h | 4 ++-- clang/lib/Headers/tmmintrin.h | 6 +++--- clang/test/CodeGen/builtins-x86.c | 3 --- 8 files changed, 16 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index bc6208be45606..9b7c763b0c6c7 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -296,9 +296,6 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse") TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse") @@ -558,9 +555,6 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx") // AVX2 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2") @@ -927,8 +921,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") @@ -1045,8 +1037,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") @@ -1198,8 +1188,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a49c035002786..49f054ec1a982 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14285,21 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops[0]); } } - case X86::BI__builtin_ia32_pabsb128: - case X86::BI__builtin_ia32_pabsw128: - case X86::BI__builtin_ia32_pabsd128: - case X86::BI__builtin_ia32_pabsb256: - case X86::BI__builtin_ia32_pabsw256: - case X86::BI__builtin_ia32_pabsd256: - case X86::BI__builtin_ia32_pabsq128: - case X86::BI__builtin_ia32_pabsq256: - case X86::BI__builtin_ia32_pabsb512: - case X86::BI__builtin_ia32_pabsw512: - case X86::BI__builtin_ia32_pabsd512: - case X86::BI__builtin_ia32_pabsq512: { - Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); - } case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 5064c87c2bb19..c9ad74ce3fa42 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -26,19 +26,19 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { - return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); + return (__m256i)__builtin_elementwise_abs((__v32qs)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { - return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); + return (__m256i)__builtin_elementwise_abs((__v16hi)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { - return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); + return (__m256i)__builtin_elementwise_abs((__v8si)__a); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 6aee8aed84871..53319eb23011d 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi8 (__m512i __A) { - return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); + return (__m512i)__builtin_elementwise_abs((__v64qs)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi16 (__m512i __A) { - return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); + return (__m512i)__builtin_elementwise_abs((__v32hi)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index df298640523b7..9b02a7cffc64d 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -26,6 +26,10 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64))); typedef unsigned long long __v8du __attribute__((__vector_size__(64))); typedef unsigned int __v16su __attribute__((__vector_size__(64))); +/* We need an explicitly signed variant for char. Note that this shouldn't + * appear in the interface though. */ +typedef signed char __v64qs __attribute__((__vector_size__(64))); + typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); @@ -1846,7 +1850,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A) { - return (__m512i)__builtin_ia32_pabsq512((__v8di)__A); + return (__m512i)__builtin_elementwise_abs((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1868,7 +1872,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A) { - return (__m512i)__builtin_ia32_pabsd512((__v16si) __A); + return (__m512i)__builtin_elementwise_abs((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 0519dba59081a..eddb99902e3d5 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { - return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); + return (__m128i)__builtin_elementwise_abs((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { - return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); + return (__m256i)__builtin_elementwise_abs((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index bcffa8187801c..cb9be2349de5a 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a) { - return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); + return (__m128i)__builtin_elementwise_abs((__v16qs)__a); } /// Computes the absolute value of each of the packed 16-bit signed @@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a) { - return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); + return (__m128i)__builtin_elementwise_abs((__v8hi)__a); } /// Computes the absolute value of each of the packed 32-bit signed @@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a) { - return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); + return (__m128i)__builtin_elementwise_abs((__v4si)__a); } /// Concatenates the two 128-bit integer vector operands, and diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 61b9d53c74f9d..bfcd30072fc1f 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -259,11 +259,8 @@ void f0() { tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s); tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i); tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i); - tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c); tmp_V8c = __builtin_ia32_pabsb(tmp_V8c); - tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s); tmp_V4s = __builtin_ia32_pabsw(tmp_V4s); - tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i); tmp_V2i = __builtin_ia32_pabsd(tmp_V2i); tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi); tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi); From b2499bf3e851c67ef623766b922de520de9235d5 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Mon, 24 Jan 2022 20:18:40 +0900 Subject: [PATCH 360/946] [mlir][bufferize][NFC] Refactor createAlloc function signature Pass a ValueRange instead of an ArrayRef for better compatibility. Also provide an additional function overload that automatically deallocates the buffer if specified. Differential Revision: https://reviews.llvm.org/D118025 --- .../IR/BufferizableOpInterface.h | 16 +++++++++--- .../IR/BufferizableOpInterface.cpp | 26 +++++++++++++++++-- .../Transforms/ComprehensiveBufferizePass.cpp | 2 +- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index f679a22fa7a6c..bbac6e59aeeb2 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -36,8 +36,8 @@ class BufferizationState; /// Options for ComprehensiveBufferize. struct BufferizationOptions { - using AllocationFn = std::function( - OpBuilder &, Location, MemRefType, ArrayRef)>; + using AllocationFn = std::function(OpBuilder &, Location, + MemRefType, ValueRange)>; using DeallocationFn = std::function; using MemCpyFn = @@ -298,15 +298,23 @@ UnrankedMemRefType getUnrankedMemRefType(Type elementType, MemRefType getDynamicMemRefType(RankedTensorType tensorType, unsigned addressSpace = 0); -/// Creates a memref allocation. +/// Creates a memref allocation with the given type and dynamic extents. FailureOr createAlloc(OpBuilder &b, Location loc, MemRefType type, - ArrayRef dynShape, + ValueRange dynShape, + const BufferizationOptions &options); + +/// Creates a memref allocation with the given type and dynamic extents. If +/// `createDealloc`, a deallocation op is inserted at the point where the +/// allocation goes out of scope. +FailureOr createAlloc(OpBuilder &b, Location loc, MemRefType type, + ValueRange dynShape, bool deallocMemref, const BufferizationOptions &options); /// Creates a memref allocation for the given shaped value. This function may /// perform additional optimizations such as buffer allocation hoisting. If /// `createDealloc`, a deallocation op is inserted at the point where the /// allocation goes out of scope. +// TODO: Allocation hoisting should be a cleanup pass. FailureOr createAlloc(OpBuilder &b, Location loc, Value shapedValue, bool deallocMemref, const BufferizationOptions &options); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index fb081d3d6c3cd..e565f41a39d5a 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -433,10 +433,10 @@ bufferization::createAlloc(OpBuilder &b, Location loc, Value shapedValue, return casted; } -/// Create a memref allocation. +/// Create a memref allocation with the given type and dynamic extents. FailureOr bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type, - ArrayRef dynShape, + ValueRange dynShape, const BufferizationOptions &options) { if (options.allocationFn) return (*options.allocationFn)(b, loc, type, dynShape); @@ -447,6 +447,28 @@ bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type, return allocated; } +/// Create a memref allocation with the given type and dynamic extents. May also +/// deallocate the memref again. +FailureOr +bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type, + ValueRange dynShape, bool deallocMemref, + const BufferizationOptions &options) { + OpBuilder::InsertionGuard g(b); + + FailureOr alloc = createAlloc(b, loc, type, dynShape, options); + if (failed(alloc)) + return failure(); + + if (deallocMemref) { + // Dealloc at the end of the block. + b.setInsertionPoint(alloc.getValue().getParentBlock()->getTerminator()); + if (failed(createDealloc(b, loc, *alloc, options))) + return failure(); + } + + return alloc; +} + /// Create a memref deallocation. LogicalResult bufferization::createDealloc(OpBuilder &b, Location loc, Value allocatedBuffer, diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp index 3c8b9c9606952..9409492e12dba 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -73,7 +73,7 @@ static void applyEnablingTransformations(ModuleOp moduleOp) { static FailureOr allocationFnUsingAlloca(OpBuilder &b, Location loc, MemRefType type, - ArrayRef dynShape) { + ValueRange dynShape) { Value allocated = b.create( loc, type, dynShape, b.getI64IntegerAttr(kBufferAlignments)); return allocated; From 3e50593b18840ab4508a25d0f761afb65535a38d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 24 Jan 2022 11:40:16 +0000 Subject: [PATCH 361/946] [X86] Remove `__builtin_ia32_pmax/min` intrinsics and use generic `__builtin_elementwise_max/min` D111985 added the generic `__builtin_elementwise_max` and `__builtin_elementwise_min` intrinsics with the same integer behaviour as the SSE/AVX instructions This patch removes the `__builtin_ia32_pmax/min` intrinsics and just uses `__builtin_elementwise_max/min` - the existing tests see no changes: ``` __m256i test_mm256_max_epu32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_max_epu32 // CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_max_epu32(a, b); } ``` This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`). Sibling patch to D117791 Differential Revision: https://reviews.llvm.org/D117798 --- clang/include/clang/Basic/BuiltinsX86.def | 48 --------------------- clang/lib/CodeGen/CGBuiltin.cpp | 52 ----------------------- clang/lib/Headers/avx2intrin.h | 24 +++++------ clang/lib/Headers/avx512bwintrin.h | 16 +++---- clang/lib/Headers/avx512fintrin.h | 16 +++---- clang/lib/Headers/avx512vlintrin.h | 16 +++---- clang/lib/Headers/emmintrin.h | 8 ++-- clang/lib/Headers/smmintrin.h | 16 +++---- clang/test/CodeGen/builtins-x86.c | 12 ------ 9 files changed, 48 insertions(+), 160 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 9b7c763b0c6c7..a8f5567248624 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -265,10 +265,6 @@ TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2") @@ -377,14 +373,6 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1") @@ -580,18 +568,6 @@ TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2") TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2") @@ -921,14 +897,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f") @@ -1047,14 +1015,6 @@ TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw" TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") @@ -1188,14 +1148,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl") TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 49f054ec1a982..4c68b20067b99 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14285,58 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops[0]); } } - case X86::BI__builtin_ia32_pmaxsb128: - case X86::BI__builtin_ia32_pmaxsw128: - case X86::BI__builtin_ia32_pmaxsd128: - case X86::BI__builtin_ia32_pmaxsq128: - case X86::BI__builtin_ia32_pmaxsb256: - case X86::BI__builtin_ia32_pmaxsw256: - case X86::BI__builtin_ia32_pmaxsd256: - case X86::BI__builtin_ia32_pmaxsq256: - case X86::BI__builtin_ia32_pmaxsb512: - case X86::BI__builtin_ia32_pmaxsw512: - case X86::BI__builtin_ia32_pmaxsd512: - case X86::BI__builtin_ia32_pmaxsq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax); - case X86::BI__builtin_ia32_pmaxub128: - case X86::BI__builtin_ia32_pmaxuw128: - case X86::BI__builtin_ia32_pmaxud128: - case X86::BI__builtin_ia32_pmaxuq128: - case X86::BI__builtin_ia32_pmaxub256: - case X86::BI__builtin_ia32_pmaxuw256: - case X86::BI__builtin_ia32_pmaxud256: - case X86::BI__builtin_ia32_pmaxuq256: - case X86::BI__builtin_ia32_pmaxub512: - case X86::BI__builtin_ia32_pmaxuw512: - case X86::BI__builtin_ia32_pmaxud512: - case X86::BI__builtin_ia32_pmaxuq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax); - case X86::BI__builtin_ia32_pminsb128: - case X86::BI__builtin_ia32_pminsw128: - case X86::BI__builtin_ia32_pminsd128: - case X86::BI__builtin_ia32_pminsq128: - case X86::BI__builtin_ia32_pminsb256: - case X86::BI__builtin_ia32_pminsw256: - case X86::BI__builtin_ia32_pminsd256: - case X86::BI__builtin_ia32_pminsq256: - case X86::BI__builtin_ia32_pminsb512: - case X86::BI__builtin_ia32_pminsw512: - case X86::BI__builtin_ia32_pminsd512: - case X86::BI__builtin_ia32_pminsq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin); - case X86::BI__builtin_ia32_pminub128: - case X86::BI__builtin_ia32_pminuw128: - case X86::BI__builtin_ia32_pminud128: - case X86::BI__builtin_ia32_pminuq128: - case X86::BI__builtin_ia32_pminub256: - case X86::BI__builtin_ia32_pminuw256: - case X86::BI__builtin_ia32_pminud256: - case X86::BI__builtin_ia32_pminuq256: - case X86::BI__builtin_ia32_pminub512: - case X86::BI__builtin_ia32_pminuw512: - case X86::BI__builtin_ia32_pminud512: - case X86::BI__builtin_ia32_pminuq512: - return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin); case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index c9ad74ce3fa42..e33514a60ff3e 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -253,73 +253,73 @@ _mm256_madd_epi16(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); + return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); + return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b); } static __inline__ int __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 53319eb23011d..522ef100bab1a 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -751,7 +751,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -773,7 +773,7 @@ _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -796,7 +796,7 @@ _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -818,7 +818,7 @@ _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -840,7 +840,7 @@ _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); + return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -862,7 +862,7 @@ _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); + return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -884,7 +884,7 @@ _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu8 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); + return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -906,7 +906,7 @@ _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu16 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); + return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9b02a7cffc64d..8695aeb94de24 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1090,7 +1090,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1112,7 +1112,7 @@ _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1134,7 +1134,7 @@ _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1156,7 +1156,7 @@ _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1325,7 +1325,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1347,7 +1347,7 @@ _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1369,7 +1369,7 @@ _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1391,7 +1391,7 @@ _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index eddb99902e3d5..178c9dbc0e6ea 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -3054,7 +3054,7 @@ _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3073,7 +3073,7 @@ _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3120,7 +3120,7 @@ _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3139,7 +3139,7 @@ _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3186,7 +3186,7 @@ _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3205,7 +3205,7 @@ _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -3252,7 +3252,7 @@ _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -3271,7 +3271,7 @@ _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 6e9c3032c21f7..4618b808efc48 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2375,7 +2375,7 @@ _mm_madd_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] @@ -2395,7 +2395,7 @@ _mm_max_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); + return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b); } /// Compares corresponding elements of two 128-bit signed [8 x i16] @@ -2415,7 +2415,7 @@ _mm_max_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); + return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b); } /// Compares corresponding elements of two 128-bit unsigned [16 x i8] @@ -2435,7 +2435,7 @@ _mm_min_epi16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b) { - return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); + return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b); } /// Multiplies the corresponding elements of two signed [8 x i16] diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 710e55aaa1203..0df59c5fcc592 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -668,7 +668,7 @@ _mm_stream_load_si128 (__m128i const *__V) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); + return (__m128i) __builtin_elementwise_min((__v16qs) __V1, (__v16qs) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -687,7 +687,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); + return (__m128i) __builtin_elementwise_max((__v16qs) __V1, (__v16qs) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -706,7 +706,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); + return (__m128i) __builtin_elementwise_min((__v8hu) __V1, (__v8hu) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -725,7 +725,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); + return (__m128i) __builtin_elementwise_max((__v8hu) __V1, (__v8hu) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -744,7 +744,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_min((__v4si) __V1, (__v4si) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -763,7 +763,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_max((__v4si) __V1, (__v4si) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -782,7 +782,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_min((__v4su) __V1, (__v4su) __V2); } /// Compares the corresponding elements of two 128-bit vectors of @@ -801,7 +801,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32 (__m128i __V1, __m128i __V2) { - return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); + return (__m128i) __builtin_elementwise_max((__v4su) __V1, (__v4su) __V2); } /* SSE4 Insertion and Extraction from XMM Register Instructions. */ diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index bfcd30072fc1f..9eb5f2f5d149e 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -221,10 +221,6 @@ void f0() { tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c); tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s); - tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c); - tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s); - tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c); - tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s); tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s); tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i); tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s); @@ -455,14 +451,6 @@ void f0() { tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d); tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f); tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i); - tmp_V16c = __builtin_ia32_pmaxsb128(tmp_V16c, tmp_V16c); - tmp_V4i = __builtin_ia32_pmaxsd128(tmp_V4i, tmp_V4i); - tmp_V4i = __builtin_ia32_pmaxud128(tmp_V4i, tmp_V4i); - tmp_V8s = __builtin_ia32_pmaxuw128(tmp_V8s, tmp_V8s); - tmp_V16c = __builtin_ia32_pminsb128(tmp_V16c, tmp_V16c); - tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i); - tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i); - tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s); tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i); tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16); tmp_V4f = __builtin_ia32_roundss(tmp_V4f, tmp_V4f, imm_i_0_16); From 3696c70e67d9b9e54307ef25077bae7a6f76636e Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Wed, 27 Oct 2021 11:49:00 -0700 Subject: [PATCH 362/946] [clang-tidy] Add `readability-container-contains` check This commit introduces a new check `readability-container-contains` which finds usages of `container.count()` and `container.find() != container.end()` and instead recommends the `container.contains()` method introduced in C++20. For containers which permit multiple entries per key (`multimap`, `multiset`, ...), `contains` is more efficient than `count` because `count` has to do unnecessary additional work. While this this performance difference does not exist for containers with only a single entry per key (`map`, `unordered_map`, ...), `contains` still conveys the intent better. Reviewed By: xazax.hun, whisperity Differential Revision: http://reviews.llvm.org/D112646 --- .../clang-tidy/readability/CMakeLists.txt | 1 + .../readability/ContainerContainsCheck.cpp | 144 +++++++++++ .../readability/ContainerContainsCheck.h | 40 +++ .../readability/ReadabilityTidyModule.cpp | 3 + clang-tools-extra/docs/ReleaseNotes.rst | 6 + .../docs/clang-tidy/checks/list.rst | 1 + .../checks/readability-container-contains.rst | 25 ++ .../readability-container-contains.cpp | 230 ++++++++++++++++++ 8 files changed, 450 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index 22ce8f62751ec..ea09b2193eb7c 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -7,6 +7,7 @@ add_clang_library(clangTidyReadabilityModule AvoidConstParamsInDecls.cpp BracesAroundStatementsCheck.cpp ConstReturnTypeCheck.cpp + ContainerContainsCheck.cpp ContainerDataPointerCheck.cpp ContainerSizeEmptyCheck.cpp ConvertMemberFunctionsToStatic.cpp diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp new file mode 100644 index 0000000000000..7a20480fb501c --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp @@ -0,0 +1,144 @@ +//===--- ContainerContainsCheck.cpp - clang-tidy --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ContainerContainsCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace readability { + +void ContainerContainsCheck::registerMatchers(MatchFinder *Finder) { + const auto SupportedContainers = hasType( + hasUnqualifiedDesugaredType(recordType(hasDeclaration(cxxRecordDecl( + hasAnyName("::std::set", "::std::unordered_set", "::std::map", + "::std::unordered_map", "::std::multiset", + "::std::unordered_multiset", "::std::multimap", + "::std::unordered_multimap")))))); + + const auto CountCall = + cxxMemberCallExpr(on(SupportedContainers), + callee(cxxMethodDecl(hasName("count"))), + argumentCountIs(1)) + .bind("call"); + + const auto FindCall = + cxxMemberCallExpr(on(SupportedContainers), + callee(cxxMethodDecl(hasName("find"))), + argumentCountIs(1)) + .bind("call"); + + const auto EndCall = cxxMemberCallExpr(on(SupportedContainers), + callee(cxxMethodDecl(hasName("end"))), + argumentCountIs(0)); + + const auto Literal0 = integerLiteral(equals(0)); + const auto Literal1 = integerLiteral(equals(1)); + + auto AddSimpleMatcher = [&](auto Matcher) { + Finder->addMatcher( + traverse(TK_IgnoreUnlessSpelledInSource, std::move(Matcher)), this); + }; + + // Find membership tests which use `count()`. + Finder->addMatcher(implicitCastExpr(hasImplicitDestinationType(booleanType()), + hasSourceExpression(CountCall)) + .bind("positiveComparison"), + this); + AddSimpleMatcher( + binaryOperator(hasLHS(CountCall), hasOperatorName("!="), hasRHS(Literal0)) + .bind("positiveComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(Literal0), hasOperatorName("!="), hasRHS(CountCall)) + .bind("positiveComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(CountCall), hasOperatorName(">"), hasRHS(Literal0)) + .bind("positiveComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(Literal0), hasOperatorName("<"), hasRHS(CountCall)) + .bind("positiveComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(CountCall), hasOperatorName(">="), hasRHS(Literal1)) + .bind("positiveComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(Literal1), hasOperatorName("<="), hasRHS(CountCall)) + .bind("positiveComparison")); + + // Find inverted membership tests which use `count()`. + AddSimpleMatcher( + binaryOperator(hasLHS(CountCall), hasOperatorName("=="), hasRHS(Literal0)) + .bind("negativeComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(Literal0), hasOperatorName("=="), hasRHS(CountCall)) + .bind("negativeComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(CountCall), hasOperatorName("<="), hasRHS(Literal0)) + .bind("negativeComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(Literal0), hasOperatorName(">="), hasRHS(CountCall)) + .bind("negativeComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(CountCall), hasOperatorName("<"), hasRHS(Literal1)) + .bind("negativeComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(Literal1), hasOperatorName(">"), hasRHS(CountCall)) + .bind("negativeComparison")); + + // Find membership tests based on `find() == end()`. + AddSimpleMatcher( + binaryOperator(hasLHS(FindCall), hasOperatorName("!="), hasRHS(EndCall)) + .bind("positiveComparison")); + AddSimpleMatcher( + binaryOperator(hasLHS(FindCall), hasOperatorName("=="), hasRHS(EndCall)) + .bind("negativeComparison")); +} + +void ContainerContainsCheck::check(const MatchFinder::MatchResult &Result) { + // Extract the information about the match + const auto *Call = Result.Nodes.getNodeAs("call"); + const auto *PositiveComparison = + Result.Nodes.getNodeAs("positiveComparison"); + const auto *NegativeComparison = + Result.Nodes.getNodeAs("negativeComparison"); + assert( + !PositiveComparison || + !NegativeComparison && + "only one of PositiveComparison or NegativeComparison should be set"); + bool Negated = NegativeComparison != nullptr; + const auto *Comparison = Negated ? NegativeComparison : PositiveComparison; + + // Diagnose the issue. + auto Diag = + diag(Call->getExprLoc(), "use 'contains' to check for membership"); + + // Don't fix it if it's in a macro invocation. Leave fixing it to the user. + SourceLocation FuncCallLoc = Comparison->getEndLoc(); + if (!FuncCallLoc.isValid() || FuncCallLoc.isMacroID()) + return; + + // Create the fix it. + const auto *Member = cast(Call->getCallee()); + Diag << FixItHint::CreateReplacement( + Member->getMemberNameInfo().getSourceRange(), "contains"); + SourceLocation ComparisonBegin = Comparison->getSourceRange().getBegin(); + SourceLocation ComparisonEnd = Comparison->getSourceRange().getEnd(); + SourceLocation CallBegin = Call->getSourceRange().getBegin(); + SourceLocation CallEnd = Call->getSourceRange().getEnd(); + Diag << FixItHint::CreateReplacement( + CharSourceRange::getCharRange(ComparisonBegin, CallBegin), + Negated ? "!" : ""); + Diag << FixItHint::CreateRemoval(CharSourceRange::getCharRange( + CallEnd.getLocWithOffset(1), ComparisonEnd.getLocWithOffset(1))); +} + +} // namespace readability +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h new file mode 100644 index 0000000000000..0c2705d437797 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h @@ -0,0 +1,40 @@ +//===--- ContainerContainsCheck.h - clang-tidy ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERCONTAINSCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERCONTAINSCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace readability { + +/// Finds usages of `container.count()` and `find() == end()` which should be +/// replaced by a call to the `container.contains()` method introduced in C++20. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/readability-container-contains.html +class ContainerContainsCheck : public ClangTidyCheck { +public: + ContainerContainsCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) final; + void check(const ast_matchers::MatchFinder::MatchResult &Result) final; + +protected: + bool isLanguageVersionSupported(const LangOptions &LO) const final { + return LO.CPlusPlus20; + } +}; + +} // namespace readability +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERCONTAINSCHECK_H diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index b0493d43ff318..6bbef6b7fa07c 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -12,6 +12,7 @@ #include "AvoidConstParamsInDecls.h" #include "BracesAroundStatementsCheck.h" #include "ConstReturnTypeCheck.h" +#include "ContainerContainsCheck.h" #include "ContainerDataPointerCheck.h" #include "ContainerSizeEmptyCheck.h" #include "ConvertMemberFunctionsToStatic.h" @@ -64,6 +65,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-braces-around-statements"); CheckFactories.registerCheck( "readability-const-return-type"); + CheckFactories.registerCheck( + "readability-container-contains"); CheckFactories.registerCheck( "readability-container-data-pointer"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 060af42521552..ba0e530b7fec4 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -118,6 +118,12 @@ New checks Reports identifier with unicode right-to-left characters. +- New :doc:`readability-container-contains + ` check. + + Finds usages of ``container.count()`` and ``container.find() == container.end()`` which should + be replaced by a call to the ``container.contains()`` method introduced in C++20. + - New :doc:`readability-container-data-pointer ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 5878345bdfcfd..fcf661a406959 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -290,6 +290,7 @@ Clang-Tidy Checks `readability-avoid-const-params-in-decls `_, "Yes" `readability-braces-around-statements `_, "Yes" `readability-const-return-type `_, "Yes" + `readability-container-contains `_, "Yes" `readability-container-data-pointer `_, "Yes" `readability-container-size-empty `_, "Yes" `readability-convert-member-functions-to-static `_, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst new file mode 100644 index 0000000000000..07d1e352d3b1b --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst @@ -0,0 +1,25 @@ +.. title:: clang-tidy - readability-container-contains + +readability-container-contains +============================== + +Finds usages of ``container.count()`` and ``container.find() == container.end()`` which should be replaced by a call to the ``container.contains()`` method introduced in C++ 20. + +Whether an element is contained inside a container should be checked with ``contains`` instead of ``count``/``find`` because ``contains`` conveys the intent more clearly. Furthermore, for containers which permit multiple entries per key (``multimap``, ``multiset``, ...), ``contains`` is more efficient than ``count`` because ``count`` has to do unnecessary additional work. + +Examples: + +=========================================== ============================== +Initial expression Result +------------------------------------------- ------------------------------ +``myMap.find(x) == myMap.end()`` ``!myMap.contains(x)`` +``myMap.find(x) != myMap.end()`` ``myMap.contains(x)`` +``if (myMap.count(x))`` ``if (myMap.contains(x))`` +``bool exists = myMap.count(x)`` ``bool exists = myMap.contains(x)`` +``bool exists = myMap.count(x) > 0`` ``bool exists = myMap.contains(x)`` +``bool exists = myMap.count(x) >= 1`` ``bool exists = myMap.contains(x)`` +``bool missing = myMap.count(x) == 0`` ``bool missing = !myMap.contains(x)`` +=========================================== ============================== + +This check applies to ``std::set``, ``std::unordered_set``, ``std::map``, ``std::unordered_map`` and the corresponding multi-key variants. +It is only active for C++20 and later, as the ``contains`` method was only added in C++20. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp new file mode 100644 index 0000000000000..c4ea1e27e63e6 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp @@ -0,0 +1,230 @@ +// RUN: %check_clang_tidy -std=c++20 %s readability-container-contains %t + +// Some *very* simplified versions of `map` etc. +namespace std { + +template +struct map { + unsigned count(const Key &K) const; + bool contains(const Key &K) const; + void *find(const Key &K); + void *end(); +}; + +template +struct set { + unsigned count(const Key &K) const; + bool contains(const Key &K) const; +}; + +template +struct unordered_set { + unsigned count(const Key &K) const; + bool contains(const Key &K) const; +}; + +template +struct multimap { + unsigned count(const Key &K) const; + bool contains(const Key &K) const; +}; + +} // namespace std + +// Check that we detect various common ways to check for membership +int testDifferentCheckTypes(std::map &MyMap) { + if (MyMap.count(0)) + // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MyMap.contains(0)) + return 1; + bool C1 = MyMap.count(1); + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C1 = MyMap.contains(1); + auto C2 = static_cast(MyMap.count(1)); + // CHECK-MESSAGES: :[[@LINE-1]]:37: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C2 = static_cast(MyMap.contains(1)); + auto C3 = MyMap.count(2) != 0; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C3 = MyMap.contains(2); + auto C4 = MyMap.count(3) > 0; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C4 = MyMap.contains(3); + auto C5 = MyMap.count(4) >= 1; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C5 = MyMap.contains(4); + auto C6 = MyMap.find(5) != MyMap.end(); + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C6 = MyMap.contains(5); + return C1 + C2 + C3 + C4 + C5 + C6; +} + +// Check that we detect various common ways to check for non-membership +int testNegativeChecks(std::map &MyMap) { + bool C1 = !MyMap.count(-1); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C1 = !MyMap.contains(-1); + auto C2 = MyMap.count(-2) == 0; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C2 = !MyMap.contains(-2); + auto C3 = MyMap.count(-3) <= 0; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C3 = !MyMap.contains(-3); + auto C4 = MyMap.count(-4) < 1; + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C4 = !MyMap.contains(-4); + auto C5 = MyMap.find(-5) == MyMap.end(); + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: auto C5 = !MyMap.contains(-5); + return C1 + C2 + C3 + C4 + C5; +} + +// Check for various types +int testDifferentTypes(std::map &M, std::unordered_set &US, std::set &S, std::multimap &MM) { + bool C1 = M.count(1001); + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C1 = M.contains(1001); + bool C2 = US.count(1002); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C2 = US.contains(1002); + bool C3 = S.count(1003); + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C3 = S.contains(1003); + bool C4 = MM.count(1004); + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C4 = MM.contains(1004); + return C1 + C2 + C3 + C4; +} + +// The check detects all kind of `const`, reference, rvalue-reference and value types. +int testQualifiedTypes(std::map ValueM, std::map &RefM, const std::map &ConstRefM, std::map &&RValueM) { + bool C1 = ValueM.count(2001); + // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C1 = ValueM.contains(2001); + bool C2 = RefM.count(2002); + // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C2 = RefM.contains(2002); + bool C3 = ConstRefM.count(2003); + // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C3 = ConstRefM.contains(2003); + bool C4 = RValueM.count(2004); + // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C4 = RValueM.contains(2004); + return C1 + C2 + C3 + C4; +} + +// This is effectively a membership check, as the result is implicitly casted +// to `bool`. +bool returnContains(std::map &M) { + return M.count(42); + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: return M.contains(42); +} + +// This returns the actual count and should not be rewritten +int actualCount(std::multimap &M) { + return M.count(21); + // NO-WARNING. + // CHECK-FIXES: return M.count(21); +} + +// Check that we are not confused by aliases +namespace s2 = std; +using MyMapT = s2::map; +int typeAliases(MyMapT &MyMap) { + bool C1 = MyMap.count(99); + // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: bool C1 = MyMap.contains(99); + return C1; +} + +// Check that the tests also trigger for a local variable and not only for +// function arguments. +bool localVar() { + using namespace std; + map LocalM; + return LocalM.count(42); + // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: return LocalM.contains(42); +} + +// Check various usages of an actual `count` which isn't rewritten +int nonRewrittenCount(std::multimap &MyMap) { + // This is an actual test if we have at least 2 usages. Shouldn't be rewritten. + bool C1 = MyMap.count(1) >= 2; + // NO-WARNING. + // CHECK-FIXES: bool C1 = MyMap.count(1) >= 2; + + // "< 0" makes little sense and is always `false`. Still, let's ensure we + // don't accidentally rewrite it to 'contains'. + bool C2 = MyMap.count(2) < 0; + // NO-WARNING. + // CHECK-FIXES: bool C2 = MyMap.count(2) < 0; + + // The `count` is used in some more complicated formula. + bool C3 = MyMap.count(1) + MyMap.count(2) * 2 + MyMap.count(3) / 3 >= 20; + // NO-WARNING. + // CHECK-FIXES: bool C3 = MyMap.count(1) + MyMap.count(2) * 2 + MyMap.count(3) / 3 >= 20; + + // This could theoretically be rewritten into a 'contains' after removig the + // `4` on both sides of the comparison. For the time being, we don't detect + // this case. + bool C4 = MyMap.count(1) + 4 > 4; + // NO-WARNING. + // CHECK-FIXES: bool C4 = MyMap.count(1) + 4 > 4; + + return C1 + C2 + C3 + C4; +} + +// We don't want to rewrite if the `contains` call is from a macro expansion +int testMacroExpansion(std::unordered_set &MySet) { +#define COUNT_ONES(SET) SET.count(1) + // Rewriting the macro would break the code + // CHECK-FIXES: #define COUNT_ONES(SET) SET.count(1) + // We still want to warn the user even if we don't offer a fixit + if (COUNT_ONES(MySet)) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-MESSAGES: note: expanded from macro 'COUNT_ONES' + return COUNT_ONES(MySet); + } +#undef COUNT_ONES +#define COUNT_ONES count(1) + // Rewriting the macro would break the code + // CHECK-FIXES: #define COUNT_ONES count(1) + // We still want to warn the user even if we don't offer a fixit + if (MySet.COUNT_ONES) { + // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-MESSAGES: note: expanded from macro 'COUNT_ONES' + return MySet.COUNT_ONES; + } +#undef COUNT_ONES +#define MY_SET MySet + // CHECK-FIXES: #define MY_SET MySet + // We still want to rewrite one of the two calls to `count` + if (MY_SET.count(1)) { + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: use 'contains' to check for membership [readability-container-contains] + // CHECK-FIXES: if (MY_SET.contains(1)) { + return MY_SET.count(1); + } +#undef MY_SET + return 0; +} + +// The following map has the same interface like `std::map`. +template +struct CustomMap { + unsigned count(const Key &K) const; + bool contains(const Key &K) const; + void *find(const Key &K); + void *end(); +}; + +// The clang-tidy check is currently hard-coded against the `std::` containers +// and hence won't annotate the following instance. We might change this in the +// future and also detect the following case. +void *testDifferentCheckTypes(CustomMap &MyMap) { + if (MyMap.count(0)) + // NO-WARNING. + // CHECK-FIXES: if (MyMap.count(0)) + return nullptr; + return MyMap.find(2); +} From e4074432d5bf5c295f96eeed27c5b693f5b3bf16 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 24 Jan 2022 11:57:23 +0000 Subject: [PATCH 363/946] [X86] Remove avx512f integer and/or/xor/min/max reduction intrinsics and use generic equivalents None of these have any reordering issues, and they still emit the same reduction intrinsics without any change in the existing test coverage: llvm-project\clang\test\CodeGen\X86\avx512-reduceIntrin.c llvm-project\clang\test\CodeGen\X86\avx512-reduceMinMaxIntrin.c Differential Revision: https://reviews.llvm.org/D117881 --- clang/include/clang/Basic/BuiltinsX86.def | 12 ------ clang/lib/CodeGen/CGBuiltin.cpp | 36 ----------------- clang/lib/Headers/avx512fintrin.h | 48 +++++++++++------------ 3 files changed, 24 insertions(+), 72 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index a8f5567248624..0669a96b942b3 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2015,8 +2015,6 @@ TARGET_BUILTIN(__builtin_ia32_selectsd_128, "V2dUcV2dV2d", "ncV:128:", "avx512f" // generic reduction intrinsics TARGET_BUILTIN(__builtin_ia32_reduce_add_d512, "iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_add_q512, "OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16") @@ -2039,16 +2037,6 @@ TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph256, "xxV16x", "ncV:256:", "avx512fp TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl") TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f") TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_or_q512, "OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_smax_d512, "iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_smax_q512, "OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_smin_d512, "iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_smin_q512, "OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_umax_d512, "iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_umax_q512, "OiV8Oi", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_umin_d512, "iV16i", "ncV:512:", "avx512f") -TARGET_BUILTIN(__builtin_ia32_reduce_umin_q512, "OiV8Oi", "ncV:512:", "avx512f") // MONITORX/MWAITX TARGET_BUILTIN(__builtin_ia32_monitorx, "vvC*UiUi", "n", "mwaitx") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4c68b20067b99..cd35e7cbe76f7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14365,12 +14365,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0]}); } - case X86::BI__builtin_ia32_reduce_and_d512: - case X86::BI__builtin_ia32_reduce_and_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } case X86::BI__builtin_ia32_reduce_fadd_pd512: case X86::BI__builtin_ia32_reduce_fadd_ps512: case X86::BI__builtin_ia32_reduce_fadd_ph512: @@ -14417,36 +14411,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0]}); } - case X86::BI__builtin_ia32_reduce_or_d512: - case X86::BI__builtin_ia32_reduce_or_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_or, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } - case X86::BI__builtin_ia32_reduce_smax_d512: - case X86::BI__builtin_ia32_reduce_smax_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_smax, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } - case X86::BI__builtin_ia32_reduce_smin_d512: - case X86::BI__builtin_ia32_reduce_smin_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_smin, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } - case X86::BI__builtin_ia32_reduce_umax_d512: - case X86::BI__builtin_ia32_reduce_umax_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_umax, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } - case X86::BI__builtin_ia32_reduce_umin_d512: - case X86::BI__builtin_ia32_reduce_umin_q512: { - Function *F = - CGM.getIntrinsic(Intrinsic::vector_reduce_umin, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } // 3DNow! case X86::BI__builtin_ia32_pswapdsf: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 8695aeb94de24..50e0e287d9fc7 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -9324,11 +9324,11 @@ static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512 } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { - return __builtin_ia32_reduce_and_q512(__W); + return __builtin_reduce_and((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { - return __builtin_ia32_reduce_or_q512(__W); + return __builtin_reduce_or((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 @@ -9346,13 +9346,13 @@ _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W); - return __builtin_ia32_reduce_and_q512(__W); + return __builtin_reduce_and((__v8di)__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); - return __builtin_ia32_reduce_or_q512(__W); + return __builtin_reduce_or((__v8di)__W); } // -0.0 is used to ignore the start value since it is the neutral value of @@ -9390,12 +9390,12 @@ _mm512_reduce_mul_epi32(__m512i __W) { static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W) { - return __builtin_ia32_reduce_and_d512((__v16si)__W); + return __builtin_reduce_and((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W) { - return __builtin_ia32_reduce_or_d512((__v16si)__W); + return __builtin_reduce_or((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 @@ -9413,13 +9413,13 @@ _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W); - return __builtin_ia32_reduce_and_d512((__v16si)__W); + return __builtin_reduce_and((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); - return __builtin_ia32_reduce_or_d512((__v16si)__W); + return __builtin_reduce_or((__v16si)__W); } static __inline__ float __DEFAULT_FN_ATTRS512 @@ -9446,89 +9446,89 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V) { - return __builtin_ia32_reduce_smax_q512(__V); + return __builtin_reduce_max((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V) { - return __builtin_ia32_reduce_umax_q512(__V); + return __builtin_reduce_max((__v8du)__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V) { - return __builtin_ia32_reduce_smin_q512(__V); + return __builtin_reduce_min((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V) { - return __builtin_ia32_reduce_umin_q512(__V); + return __builtin_reduce_min((__v8du)__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); - return __builtin_ia32_reduce_smax_q512(__V); + return __builtin_reduce_max((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_maskz_mov_epi64(__M, __V); - return __builtin_ia32_reduce_umax_q512(__V); + return __builtin_reduce_max((__v8du)__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); - return __builtin_ia32_reduce_smin_q512(__V); + return __builtin_reduce_min((__v8di)__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V); - return __builtin_ia32_reduce_umin_q512(__V); + return __builtin_reduce_min((__v8du)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V) { - return __builtin_ia32_reduce_smax_d512((__v16si)__V); + return __builtin_reduce_max((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V) { - return __builtin_ia32_reduce_umax_d512((__v16si)__V); + return __builtin_reduce_max((__v16su)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V) { - return __builtin_ia32_reduce_smin_d512((__v16si)__V); + return __builtin_reduce_min((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V) { - return __builtin_ia32_reduce_umin_d512((__v16si)__V); + return __builtin_reduce_min((__v16su)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); - return __builtin_ia32_reduce_smax_d512((__v16si)__V); + return __builtin_reduce_max((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_maskz_mov_epi32(__M, __V); - return __builtin_ia32_reduce_umax_d512((__v16si)__V); + return __builtin_reduce_max((__v16su)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); - return __builtin_ia32_reduce_smin_d512((__v16si)__V); + return __builtin_reduce_min((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V); - return __builtin_ia32_reduce_umin_d512((__v16si)__V); + return __builtin_reduce_min((__v16su)__V); } static __inline__ double __DEFAULT_FN_ATTRS512 From 8082ab2fc391cb88ab142d3ce8f63e6eb8641a23 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 24 Jan 2022 10:41:51 +0000 Subject: [PATCH 364/946] [LoopVectorize] Support epilogue vectorisation of loops with reductions isCandidateForEpilogueVectorization will currently return false for loops which contain reductions. This patch removes this restriction and makes the following changes to support epilogue vectorisation with reductions: - `fixReduction`: If fixReduction is being called during vectorisation of the epilogue, the phi node it creates will need to additionally carry incoming values from the middle block of the main loop. - `createEpilogueVectorizedLoopSkeleton`: The incoming values of the phi created by fixReduction are updated after the vec.epilog.iter.check block is added. The phi is also moved to the preheader of the epilogue. - `processLoop`: The start value of any VPReductionPHIRecipes are updated before vectorising the epilogue loop. The getResumeInstr function added to the ILV will return the resume instruction associated with the recurrence descriptor. Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D116928 --- .../Transforms/Vectorize/LoopVectorize.cpp | 79 ++- .../sve-epilog-vect-inloop-reductions.ll | 121 ++++ .../AArch64/sve-epilog-vect-reductions.ll | 121 ++++ .../sve-epilog-vect-strict-reductions.ll | 116 ++++ .../X86/invariant-store-vectorization.ll | 106 ++-- .../Transforms/LoopVectorize/X86/pr35432.ll | 2 +- .../Transforms/LoopVectorize/X86/pr42674.ll | 2 +- .../epilog-vectorization-reductions.ll | 529 ++++++++++++++++++ ...ptimal-epilog-vectorization-limitations.ll | 33 -- 9 files changed, 1030 insertions(+), 79 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll create mode 100644 llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7b90dcff7bc1e..d11f4146b5905 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -557,6 +557,10 @@ class InnerLoopVectorizer { /// vector of instructions. void addMetadata(ArrayRef To, Instruction *From); + // Returns the resume value (bc.merge.rdx) for a reduction as + // generated by fixReduction. + PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc); + protected: friend class LoopVectorizationPlanner; @@ -823,6 +827,11 @@ class InnerLoopVectorizer { /// Structure to hold information about generated runtime checks, responsible /// for cleaning the checks, if vectorization turns out unprofitable. GeneratedRTChecks &RTChecks; + + // Holds the resume values for reductions in the loops, used to set the + // correct start value of reduction PHIs when vectorizing the epilogue. + SmallMapVector + ReductionResumeValues; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -1218,6 +1227,14 @@ void InnerLoopVectorizer::addMetadata(ArrayRef To, } } +PHINode *InnerLoopVectorizer::getReductionResumeValue( + const RecurrenceDescriptor &RdxDesc) { + auto It = ReductionResumeValues.find(&RdxDesc); + assert(It != ReductionResumeValues.end() && + "Expected to find a resume value for the reduction."); + return It->second; +} + namespace llvm { // Loop vectorization cost-model hints how the scalar epilogue loop should be @@ -4287,13 +4304,29 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, : Builder.CreateZExt(ReducedPartRdx, PhiTy); } + PHINode *ResumePhi = + dyn_cast(PhiR->getStartValue()->getUnderlyingValue()); + // Create a phi node that merges control-flow from the backedge-taken check // block and the middle block. PHINode *BCBlockPhi = PHINode::Create(PhiTy, 2, "bc.merge.rdx", LoopScalarPreHeader->getTerminator()); - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) - BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]); - BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); + + // If we are fixing reductions in the epilogue loop then we should already + // have created a bc.merge.rdx Phi after the main vector body. Ensure that + // we carry over the incoming values correctly. + for (auto *Incoming : predecessors(LoopScalarPreHeader)) { + if (Incoming == LoopMiddleBlock) + BCBlockPhi->addIncoming(ReducedPartRdx, Incoming); + else if (ResumePhi && llvm::is_contained(ResumePhi->blocks(), Incoming)) + BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), + Incoming); + else + BCBlockPhi->addIncoming(ReductionStartValue, Incoming); + } + + // Set the resume value for this reduction + ReductionResumeValues.insert({&RdxDesc, BCBlockPhi}); // Now, we need to fix the users of the reduction variable // inside and outside of the scalar remainder loop. @@ -5811,10 +5844,8 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization( const Loop &L, ElementCount VF) const { // Cross iteration phis such as reductions need special handling and are // currently unsupported. - if (any_of(L.getHeader()->phis(), [&](PHINode &Phi) { - return Legal->isFirstOrderRecurrence(&Phi) || - Legal->isReductionVariable(&Phi); - })) + if (any_of(L.getHeader()->phis(), + [&](PHINode &Phi) { return Legal->isFirstOrderRecurrence(&Phi); })) return false; // Phis with uses outside of the loop require special handling and are @@ -8248,6 +8279,25 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() { LoopBypassBlocks.push_back(EPI.MemSafetyCheck); LoopBypassBlocks.push_back(EPI.EpilogueIterationCountCheck); + // The vec.epilog.iter.check block may contain Phi nodes from reductions which + // merge control-flow from the latch block and the middle block. Update the + // incoming values here and move the Phi into the preheader. + SmallVector PhisInBlock; + for (PHINode &Phi : VecEpilogueIterationCountCheck->phis()) + PhisInBlock.push_back(&Phi); + + for (PHINode *Phi : PhisInBlock) { + Phi->replaceIncomingBlockWith( + VecEpilogueIterationCountCheck->getSinglePredecessor(), + VecEpilogueIterationCountCheck); + Phi->removeIncomingValue(EPI.EpilogueIterationCountCheck); + if (EPI.SCEVSafetyCheck) + Phi->removeIncomingValue(EPI.SCEVSafetyCheck); + if (EPI.MemSafetyCheck) + Phi->removeIncomingValue(EPI.MemSafetyCheck); + Phi->moveBefore(LoopVectorPreHeader->getFirstNonPHI()); + } + // Generate a resume induction for the vector epilogue and put it in the // vector epilogue preheader Type *IdxTy = Legal->getWidestInductionType(); @@ -10567,6 +10617,21 @@ bool LoopVectorizePass::processLoop(Loop *L) { Checks); VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF); + + // Ensure that the start values for any VPReductionPHIRecipes are + // updated before vectorising the epilogue loop. + VPBasicBlock *Header = BestEpiPlan.getEntry()->getEntryBasicBlock(); + for (VPRecipeBase &R : Header->phis()) { + if (auto *ReductionPhi = dyn_cast(&R)) { + if (auto *Resume = MainILV.getReductionResumeValue( + ReductionPhi->getRecurrenceDescriptor())) { + VPValue *StartVal = new VPValue(Resume); + BestEpiPlan.addExternalDef(StartVal); + ReductionPhi->setOperand(0, StartVal); + } + } + } + LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT); ++LoopsEpilogueVectorized; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll new file mode 100644 index 0000000000000..b8f941a7a4481 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -epilogue-vectorization-force-VF=2 -prefer-inloop-reductions -S | FileCheck %s + +; +; In-loop integer and reduction +; +define i64 @int_reduction_and(i64* noalias nocapture %a, i64 %N) { +; CHECK-LABEL: @int_reduction_and( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to * +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , * [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP19]] = and i64 [[TMP18]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64( [[WIDE_LOAD3]]) +; CHECK-NEXT: [[TMP21]] = and i64 [[TMP20]], [[VEC_PHI2]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = and i64 [[TMP21]], [[TMP19]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP30:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX8]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD10]]) +; CHECK-NEXT: [[TMP30]] = and i64 [[TMP29]], [[VEC_PHI9]] +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]] +; CHECK-NEXT: br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i64 [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] +; CHECK-NEXT: [[L3:%.*]] = load i64, i64* [[L2]], align 4 +; CHECK-NEXT: [[AND]] = and i64 [[RDX]], [[L3]] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[AND_LCSSA4:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[AND_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[AND_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %rdx = phi i64 [ %and, %for.body ], [ 1, %entry ] + %l2 = getelementptr inbounds i64, i64* %a, i64 %iv + %l3 = load i64, i64* %l2 + %and = and i64 %rdx, %l3 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i64 %and +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.interleave.count", i32 2} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll new file mode 100644 index 0000000000000..e3e219cc1601a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -epilogue-vectorization-force-VF=2 -S | FileCheck %s + +; +; Integer reduction with interleaving & a start value of 5 +; +define i64 @int_reduction_add(i64* %a, i64 %N) { +; CHECK-LABEL: @int_reduction_add( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to * +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , * [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP19]] = add [[WIDE_LOAD3]], [[VEC_PHI2]] +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = add [[TMP19]], [[TMP18]] +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <2 x i64> [ [[TMP24]], [[VEC_EPILOG_PH]] ], [ [[TMP29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX8]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29]] = add <2 x i64> [[WIDE_LOAD10]], [[VEC_PHI9]] +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2 +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]] +; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP29]]) +; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]] +; CHECK-NEXT: br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add i64 [[TMP32]], [[SUM]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[ADD_LCSSA4:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum = phi i64 [ 5, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv + %0 = load i64, i64* %arrayidx + %add = add i64 %0, %sum + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret i64 %add +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.interleave.count", i32 2} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll new file mode 100644 index 0000000000000..a32a6723e83bf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -epilogue-vectorization-force-VF=2 -S | FileCheck %s + +; +; Strict fadd reduction with interleaving +; +define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { +; CHECK-LABEL: @fadd_strict( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP12]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP10]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to * +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load , * [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP18]], [[WIDE_LOAD2]]) +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]] +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX7]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP25]] to <2 x float>* +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI8]], <2 x float> [[WIDE_LOAD9]]) +; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = fadd float [[TMP29]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[ADD_LCSSA3:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA3]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0xFFFFFFFFE0000000, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %iv + %0 = load float, float* %arrayidx, align 4 + %add = fadd float %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret float %add +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.interleave.count", i32 2} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index ab5003f1b74fd..91ab21df4b273 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -9,11 +9,11 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b) { ; CHECK-LABEL: @inv_val_store_to_inv_address_with_reduction( -; CHECK-NEXT: entry: +; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 ; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 64 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1 ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) @@ -21,59 +21,91 @@ define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[SMAX6]], 64 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775744 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 16 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 32 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 48 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0 ; CHECK-NEXT: [[TMP8]] = add <16 x i32> [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP9]] = add <16 x i32> [[VEC_PHI7]], [[WIDE_LOAD10]] -; CHECK-NEXT: [[TMP10]] = add <16 x i32> [[VEC_PHI8]], [[WIDE_LOAD11]] -; CHECK-NEXT: [[TMP11]] = add <16 x i32> [[VEC_PHI9]], [[WIDE_LOAD12]] +; CHECK-NEXT: [[TMP9]] = add <16 x i32> [[VEC_PHI8]], [[WIDE_LOAD11]] +; CHECK-NEXT: [[TMP10]] = add <16 x i32> [[VEC_PHI9]], [[WIDE_LOAD12]] +; CHECK-NEXT: [[TMP11]] = add <16 x i32> [[VEC_PHI10]], [[WIDE_LOAD13]] ; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP9]], [[TMP8]] -; CHECK-NEXT: [[BIN_RDX13:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]] -; CHECK-NEXT: [[BIN_RDX14:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX13]] -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX14]]) +; CHECK-NEXT: [[BIN_RDX14:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX15:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX14]] +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX15]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX6]], 56 +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP13]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[SMAX16:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) +; CHECK-NEXT: [[N_VEC18:%.*]] = and i64 [[SMAX16]], 9223372036854775800 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> , i32 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI21:%.*]] = phi <8 x i32> [ [[TMP14]], [[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX20]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>* +; CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 8 +; CHECK-NEXT: [[TMP17]] = add <8 x i32> [[VEC_PHI21]], [[WIDE_LOAD22]] +; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 +; CHECK-NEXT: [[INDEX_NEXT23]] = add nuw i64 [[INDEX20]], 8 +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT23]], [[N_VEC18]] +; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP17]]) +; CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC18]] +; CHECK-NEXT: br i1 [[CMP_N19]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX24:%.*]] = phi i32 [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[T0:%.*]] = phi i32 [ [[T3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[T0:%.*]] = phi i32 [ [[T3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX24]], [[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]] ; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[T1]], align 8 ; CHECK-NEXT: [[T3]] = add i32 [[T0]], [[T2]] ; CHECK-NEXT: store i32 [[NTRUNC]], i32* [[A]], align 4 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[T3_LCSSA:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[T4:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[T3_LCSSA]], [[FOR_END_LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[T4]] ; entry: @@ -129,14 +161,14 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !8, !noalias !11 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !10, !noalias !13 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP3]], align 4, !alias.scope !8, !noalias !11 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT9]], <16 x i32*> [[BROADCAST_SPLAT11]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !11 +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP3]], align 4, !alias.scope !10, !noalias !13 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT9]], <16 x i32*> [[BROADCAST_SPLAT11]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !13 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -166,7 +198,7 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[BROADCAST_SPLAT21]], <8 x i32*> [[BROADCAST_SPLAT23]], i32 4, <8 x i1> [[TMP7]]) ; CHECK-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX16]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC14]] -; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[SMAX12]], [[N_VEC14]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] @@ -186,7 +218,7 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b, ; CHECK: latch: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -257,17 +289,17 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !17, !noalias !20 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !18, !noalias !21 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>* -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT19]], <16 x i32>* [[TMP3]], align 4, !alias.scope !17, !noalias !20 +; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT19]], <16 x i32>* [[TMP3]], align 4, !alias.scope !18, !noalias !21 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !23 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT21]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !24, !noalias !23 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !24 +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT21]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !25, !noalias !24 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -300,7 +332,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT35]] = add nuw i64 [[INDEX26]], 8 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC24]] -; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N25:%.*]] = icmp eq i64 [[SMAX22]], [[N_VEC24]] ; CHECK-NEXT: br i1 [[CMP_N25]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] @@ -322,7 +354,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* ; CHECK: latch: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 4bd408ccd4036..ebf9b5467b582 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -89,7 +89,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll index bb50d0997e58e..7516c055ab732 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll @@ -8,7 +8,7 @@ ; the vector loop was dead code leaving only a scalar remainder. define zeroext i8 @sum() { ; CHECK-LABEL: @sum( -; CHECK-NEXT: entry: +; CHECK-NEXT: iter.check: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll new file mode 100644 index 0000000000000..57ddff65ff90a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -0,0 +1,529 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S | FileCheck %s + +; +; Integer reduction with a start value of 5 +; +define i64 @int_reduction_add(i64* %a, i64 %N) { +; CHECK-LABEL: @int_reduction_add( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <4 x i64>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4]] = add <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[TMP7]], [[VEC_EPILOG_PH]] ], [ [[TMP12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX6]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <4 x i64>* +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, <4 x i64>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12]] = add <4 x i64> [[WIDE_LOAD8]], [[VEC_PHI7]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP12]]) +; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add i64 [[TMP15]], [[SUM]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[ADD_LCSSA2:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i64 [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum = phi i64 [ 5, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv + %0 = load i64, i64* %arrayidx + %add = add i64 %0, %sum + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret i64 %add +} + +; +; Floating point max reduction +; +define float @fp_reduction_max(float* noalias %a, i64 %N) { +; CHECK-LABEL: @fp_reduction_max( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX6]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI7]], [[WIDE_LOAD8]] +; CHECK-NEXT: [[TMP13]] = select <4 x i1> [[TMP12]], <4 x float> [[VEC_PHI7]], <4 x float> [[WIDE_LOAD8]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP13]]) +; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = fcmp fast ogt float [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[V0]] = select fast i1 [[C0]], float [[RESULT_08]], float [[L0]] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[V0_LCSSA2:%.*]] = phi float [ [[V0]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[V0_LCSSA2]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret float [[V0_LCSSA]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %v0, %for.body ], [ 0.000000e+00, %entry ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv + %l0 = load float, float* %arrayidx + %c0 = fcmp fast ogt float %result.08, %l0 + %v0 = select fast i1 %c0, float %result.08, float %l0 + %iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %v0 +} + +; +; Extension is required before the reduction operation & result is truncated +; +define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) { +; CHECK-LABEL: @reduction_or_trunc( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP9]] = zext <4 x i16> [[TMP8]] to <4 x i32> +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i32> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i16 [[TMP11]] to i32 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ [[TMP13]], [[VEC_EPILOG_PH]] ], [ [[TMP23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[INDEX3]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i32> [[VEC_PHI4]], +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, i16* [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16* [[TMP17]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP18]], align 2 +; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[WIDE_LOAD5]] to <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX3]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 256 +; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP20]] to <4 x i16> +; CHECK-NEXT: [[TMP23]] = zext <4 x i16> [[TMP22]] to <4 x i32> +; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP24:%.*]] = trunc <4 x i32> [[TMP23]] to <4 x i16> +; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = zext i16 [[TMP25]] to i32 +; CHECK-NEXT: [[CMP_N2:%.*]] = icmp eq i32 256, 256 +; CHECK-NEXT: br i1 [[CMP_N2]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 256, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX7]], [[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[IV]] +; CHECK-NEXT: [[LOAD:%.*]] = load i16, i16* [[GEP]], align 2 +; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[LOAD]] to i32 +; CHECK-NEXT: [[XOR]] = or i32 [[SUM_02]], [[EXT]] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[XOR_LCSSA1:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[XOR_LCSSA1]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[XOR_LCSSA]] to i16 +; CHECK-NEXT: ret i16 [[RET]] +; +entry: + br label %for.body + +for.body: + %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] + %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ] + %sum.02 = and i32 %sum.02p, 65535 + %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv + %load = load i16, i16* %gep + %ext = zext i16 %load to i32 + %xor = or i32 %sum.02, %ext + %iv.next = add i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + %ret = trunc i32 %xor to i16 + ret i16 %ret +} + +; +; More than one reduction in the loop +; +define float @multiple_fp_rdx(float* %A, i64 %N) { +; CHECK-LABEL: @multiple_fp_rdx( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4]] = fadd fast <4 x float> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.500000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX3:%.*]] = phi float [ 1.000000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF6:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC7:%.*]] = sub i64 [[N]], [[N_MOD_VF6]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> , float [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> zeroinitializer, float [[BC_MERGE_RDX3]], i32 0 +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x float> [ [[TMP9]], [[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX9]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15]] = fadd fast <4 x float> [[VEC_PHI11]], [[WIDE_LOAD12]] +; CHECK-NEXT: [[TMP16]] = fmul fast <4 x float> [[VEC_PHI10]], [[WIDE_LOAD12]] +; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX9]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC7]] +; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP15]]) +; CHECK-NEXT: [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP16]]) +; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[N]], [[N_VEC7]] +; CHECK-NEXT: br i1 [[CMP_N8]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC7]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi float [ 1.500000e+01, [[ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX15:%.*]] = phi float [ 1.000000e+01, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PROD:%.*]] = phi float [ [[BC_MERGE_RDX14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX15]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = fadd fast float [[SUM]], [[TMP20]] +; CHECK-NEXT: [[MUL]] = fmul fast float [[PROD]], [[TMP20]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[ADD_LCSSA5:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[MUL_LCSSA4:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA5]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[MUL_LCSSA4]], [[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[MUL_LCSSA]], [[ADD_LCSSA]] +; CHECK-NEXT: ret float [[DIV]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %prod = phi float [ 15.000000e+00, %entry ], [ %mul, %for.body ] + %sum = phi float [ 10.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %iv + %0 = load float, float* %arrayidx + %add = fadd fast float %sum, %0 + %mul = fmul fast float %prod, %0 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + %div = fdiv float %mul, %add + ret float %div +} + +; +; Start value of the reduction is a Phi node from the outer loop +; +define i32 @reduction_phi_start_val(i32* %A, i64 %N) { +; CHECK-LABEL: @reduction_phi_start_val( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[ITER_CHECK:%.*]] +; CHECK: iter.check: +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[FOR_COND:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[START_SUM:%.*]] = phi i32 [ [[SUB_LCSSA:%.*]], [[FOR_COND]] ], [ 5, [[ENTRY]] ] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[START_SUM]], i32 0 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START_SUM]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i32> [ [[TMP8]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX6]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13]] = sub <4 x i32> [[VEC_PHI7]], [[WIDE_LOAD8]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) +; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_COND_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[START_SUM]], [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SUB:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[SUB]] = sub nsw i32 [[SUM]], [[LOAD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: for.cond.loopexit: +; CHECK-NEXT: [[SUB_LCSSA2:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.cond: +; CHECK-NEXT: [[SUB_LCSSA]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[SUB_LCSSA2]], [[FOR_COND_LOOPEXIT]] ] +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 +; CHECK-NEXT: [[OUTER_EXITCOND_NOT:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[OUTER_EXITCOND_NOT]], label [[FOR_END:%.*]], label [[ITER_CHECK]] +; CHECK: for.end: +; CHECK-NEXT: [[SUB_LCSSA_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[FOR_COND]] ] +; CHECK-NEXT: ret i32 [[SUB_LCSSA_LCSSA]] +; +entry: + br label %for.cond.preheader + +for.cond.preheader: + %outer.iv = phi i64 [ %outer.iv.next, %for.cond ], [ 0, %entry ] + %start.sum = phi i32 [ %sub, %for.cond ], [ 5, %entry ] + br label %for.body + +for.body: + %iv = phi i64 [ 0, %for.cond.preheader ], [ %iv.next, %for.body ] + %sum = phi i32 [ %start.sum, %for.cond.preheader ], [ %sub, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv + %load = load i32, i32* %arrayidx, align 4 + %sub = sub nsw i32 %sum, %load + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond, label %for.body + +for.cond: + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %outer.exitcond.not = icmp eq i64 %outer.iv.next, %N + br i1 %outer.exitcond.not, label %for.end, label %for.cond.preheader + +for.end: + ret i32 %sub +} diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll index c4a8f0ded86b7..b34c317f18d77 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll @@ -3,39 +3,6 @@ target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" -; Currently we cannot handle reduction loops. -; CHECK: LV: Checking a loop in "f1" -; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate. - -define signext i32 @f1(i8* noalias %A, i32 signext %n) { -entry: - %cmp1 = icmp sgt i32 %n, 0 - br i1 %cmp1, label %for.body.preheader, label %for.end - -for.body.preheader: ; preds = %entry - %wide.trip.count = zext i32 %n to i64 - br label %for.body - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %sum.02 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv - %0 = load i8, i8* %arrayidx, align 1 - %conv = zext i8 %0 to i32 - %add = add nuw nsw i32 %sum.02, %conv - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond, label %for.body, label %for.end.loopexit - -for.end.loopexit: ; preds = %for.body - %add.lcssa = phi i32 [ %add, %for.body ] - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ] - ret i32 %sum.0.lcssa -} - ; Currently we cannot handle live-out variables that are recurrences. ; CHECK: LV: Checking a loop in "f2" ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate. From 54f1d950667cac9d1cbf549c72972bcb487b6fa7 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 24 Jan 2022 12:06:49 +0000 Subject: [PATCH 365/946] [gn build] Port 3696c70e67d9 --- .../secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn index 1e24a52be4725..d5743e8b209e0 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn @@ -15,6 +15,7 @@ static_library("readability") { "AvoidConstParamsInDecls.cpp", "BracesAroundStatementsCheck.cpp", "ConstReturnTypeCheck.cpp", + "ContainerContainsCheck.cpp", "ContainerDataPointerCheck.cpp", "ContainerSizeEmptyCheck.cpp", "ConvertMemberFunctionsToStatic.cpp", From b7f69b8d46502adbe7007a89bdf6ea6b6c8f95f9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 24 Jan 2022 11:27:27 +0000 Subject: [PATCH 366/946] [LV] Name values and blocks in same induction tests (NFC). This reduces the churn in the test in future updates due to numbering changes. --- .../Transforms/LoopVectorize/induction.ll | 138 ++++++++++-------- 1 file changed, 78 insertions(+), 60 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index de2d405a6da95..4eff30b1fc331 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -2860,6 +2860,7 @@ for.end: define i32 @i8_loop() nounwind readnone ssp uwtable { ; CHECK-LABEL: @i8_loop( +; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2881,18 +2882,19 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[TMP6:%.*]] -; CHECK: 6: +; CHECK: loop: ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP6]] ] ; CHECK-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP6]] ] ; CHECK-NEXT: [[TMP7]] = and i32 [[A_0]], 4 ; CHECK-NEXT: [[TMP8]] = add i8 [[B_0]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10]], label [[TMP6]], !llvm.loop [[LOOP29:![0-9]+]] -; CHECK: 10: +; CHECK: exit: ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; ; IND-LABEL: @i8_loop( +; IND-NEXT: entry: ; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: ; IND-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2905,12 +2907,13 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; IND-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: br label [[TMP2:%.*]] -; IND: 2: +; IND: loop: ; IND-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]] -; IND: 3: +; IND: exit: ; IND-NEXT: ret i32 0 ; ; UNROLL-LABEL: @i8_loop( +; UNROLL-NEXT: entry: ; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2923,12 +2926,13 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[TMP2:%.*]] -; UNROLL: 2: +; UNROLL: loop: ; UNROLL-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]] -; UNROLL: 3: +; UNROLL: exit: ; UNROLL-NEXT: ret i32 0 ; ; UNROLL-NO-IC-LABEL: @i8_loop( +; UNROLL-NO-IC-NEXT: entry: ; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2954,18 +2958,19 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: br label [[TMP8:%.*]] -; UNROLL-NO-IC: 8: +; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP8]] ] ; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[TMP8]] ] ; UNROLL-NO-IC-NEXT: [[TMP9]] = and i32 [[A_0]], 4 ; UNROLL-NO-IC-NEXT: [[TMP10]] = add i8 [[B_0]], -1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[TMP12]], label [[TMP8]], !llvm.loop [[LOOP29:![0-9]+]] -; UNROLL-NO-IC: 12: +; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[DOTLCSSA]] ; ; INTERLEAVE-LABEL: @i8_loop( +; INTERLEAVE-NEXT: entry: ; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2978,28 +2983,30 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; INTERLEAVE-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: br label [[TMP2:%.*]] -; INTERLEAVE: 2: +; INTERLEAVE: loop: ; INTERLEAVE-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]] -; INTERLEAVE: 3: +; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret i32 0 ; - br label %1 +entry: + br label %loop -;
+Matcher<VarDecl>isConstinit +
Matches constinit variable declarations.
+
+Given:
+  constinit int foo = 42;
+  constinit const char* bar = "baz";
+  int baz = 42;
+  [[clang::require_constant_initialization]] int xyz = 42;
+varDecl(isConstinit())
+  matches the declaration of `foo` and `bar`, but not `baz` and `xyz`.
+
+ + Matcher<VarDecl>isDefinition
Matches if a declaration has a body attached.
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4fe037741256f..5cd2896de54d5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -331,6 +331,8 @@ AST Matchers
   underlying type.
 - Added the ``isConsteval`` matcher to match ``consteval`` function
   declarations as well as `if consteval` and `if ! consteval` statements.
+- Added the ``isConstinit`` matcher to match ``constinit`` variable
+  declarations.
 
 clang-format
 ------------
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index c934b708cb96c..86bd44091b593 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -5211,6 +5211,23 @@ AST_POLYMORPHIC_MATCHER(isConstexpr,
   return Node.isConstexpr();
 }
 
+/// Matches constinit variable declarations.
+///
+/// Given:
+/// \code
+///   constinit int foo = 42;
+///   constinit const char* bar = "bar";
+///   int baz = 42;
+///   [[clang::require_constant_initialization]] int xyz = 42;
+/// \endcode
+/// varDecl(isConstinit())
+///   matches the declaration of `foo` and `bar`, but not `baz` and `xyz`.
+AST_MATCHER(VarDecl, isConstinit) {
+  if (const auto *CIA = Node.getAttr())
+    return CIA->isConstinit();
+  return false;
+}
+
 /// Matches selection statements with initializer.
 ///
 /// Given:
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 2210c5413cc5a..47db6b51966ac 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -406,6 +406,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isConstQualified);
   REGISTER_MATCHER(isConsteval);
   REGISTER_MATCHER(isConstexpr);
+  REGISTER_MATCHER(isConstinit);
   REGISTER_MATCHER(isCopyAssignmentOperator);
   REGISTER_MATCHER(isCopyConstructor);
   REGISTER_MATCHER(isDefaultConstructor);
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index 51946e1430cf6..d1c9790401f02 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -1841,6 +1841,25 @@ TEST_P(ASTMatchersTest, IsConstexpr_MatchesIfConstexpr) {
       notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConstexpr())));
 }
 
+TEST_P(ASTMatchersTest, IsConstinit) {
+  if (!GetParam().isCXX20OrLater())
+    return;
+
+  EXPECT_TRUE(matches("constinit int foo = 1;",
+                      varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(matches("extern constinit int foo;",
+                      varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(matches("constinit const char* foo = \"bar\";",
+                      varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(
+      notMatches("[[clang::require_constant_initialization]] int foo = 1;",
+                 varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(notMatches("constexpr int foo = 1;",
+                         varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(notMatches("static inline int foo = 1;",
+                         varDecl(hasName("foo"), isConstinit())));
+}
+
 TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers) {
   EXPECT_TRUE(notMatches("void baz() { if (1 > 0) {} }",
                          ifStmt(hasInitStatement(anything()))));

From a0d5e938fe9c1fd6ca492a91cdc3d841aa03fc0d Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Mon, 24 Jan 2022 14:40:51 +0100
Subject: [PATCH 379/946] Add missing include llvm/ADT/STLExtras

---
 clang-tools-extra/clang-tidy/GlobList.cpp   | 1 +
 llvm/include/llvm/ADT/CoalescingBitVector.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/clang-tools-extra/clang-tidy/GlobList.cpp b/clang-tools-extra/clang-tidy/GlobList.cpp
index b594d943cc075..fe41feef38abf 100644
--- a/clang-tools-extra/clang-tidy/GlobList.cpp
+++ b/clang-tools-extra/clang-tidy/GlobList.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "GlobList.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 
 namespace clang {
diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h
index 18803ecf209f4..82e2e1a9f9e25 100644
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@@ -15,6 +15,7 @@
 #define LLVM_ADT_COALESCINGBITVECTOR_H
 
 #include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Debug.h"

From f6ac8088b0e890765974fee5f5820a340736f9bf Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer 
Date: Mon, 24 Jan 2022 11:41:06 +0000
Subject: [PATCH 380/946] [LoopFlatten] Added comments about usage of various
 Loop APIs. NFC.

---
 llvm/lib/Transforms/Scalar/LoopFlatten.cpp | 81 ++++++++++++++++------
 1 file changed, 61 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 9b2463b3cd357..4d9578934d9e6 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -10,10 +10,13 @@
 //
 // The intention is to optimise loop nests like this, which together access an
 // array linearly:
+//
 //   for (int i = 0; i < N; ++i)
 //     for (int j = 0; j < M; ++j)
 //       f(A[i*M+j]);
+//
 // into one loop:
+//
 //   for (int i = 0; i < (N*M); ++i)
 //     f(A[i]);
 //
@@ -22,7 +25,27 @@
 // expression like i*M+j. If they had any other uses, we would have to insert a
 // div/mod to reconstruct the original values, so this wouldn't be profitable.
 //
-// We also need to prove that N*M will not overflow.
+// We also need to prove that N*M will not overflow. The preferred solution is
+// to widen the IV, which avoids overflow checks, so that is tried first. If
+// the IV cannot be widened, then we try to determine that this new tripcount
+// expression won't overflow.
+//
+// Q: Does LoopFlatten use SCEV?
+// Short answer: Yes and no.
+//
+// Long answer:
+// For this transformation to be valid, we require all uses of the induction
+// variables to be linear expressions of the form i*M+j. The different Loop
+// APIs are used to get some loop components like the induction variable,
+// compare statement, etc. In addition, we do some pattern matching to find the
+// linear expressions and other loop components like the loop increment. The
+// latter are examples of expressions that do use the induction variable, but
+// are safe to ignore when we check all uses to be of the form i*M+j. We keep
+// track of all of this in bookkeeping struct FlattenInfo.
+// We assume the loops to be canonical, i.e. starting at 0 and increment with
+// 1. This makes RHS of the compare the loop tripcount (with the right
+// predicate). We use SCEV to then sanity check that this tripcount matches
+// with the tripcount as computed by SCEV.
 //
 //===----------------------------------------------------------------------===//
 
@@ -75,30 +98,48 @@ static cl::opt
             cl::desc("Widen the loop induction variables, if possible, so "
                      "overflow checks won't reject flattening"));
 
+// We require all uses of both induction variables to match this pattern:
+//
+//   (OuterPHI * InnerTripCount) + InnerPHI
+//
+// I.e., it needs to be a linear expression of the induction variables and the
+// inner loop trip count. We keep track of all different expressions on which
+// checks will be performed in this bookkeeping struct.
+//
 struct FlattenInfo {
-  Loop *OuterLoop = nullptr;
+  Loop *OuterLoop = nullptr;  // The loop pair to be flattened.
   Loop *InnerLoop = nullptr;
-  // These PHINodes correspond to loop induction variables, which are expected
-  // to start at zero and increment by one on each loop.
-  PHINode *InnerInductionPHI = nullptr;
-  PHINode *OuterInductionPHI = nullptr;
-  Value *InnerTripCount = nullptr;
-  Value *OuterTripCount = nullptr;
-  BinaryOperator *InnerIncrement = nullptr;
-  BinaryOperator *OuterIncrement = nullptr;
-  BranchInst *InnerBranch = nullptr;
-  BranchInst *OuterBranch = nullptr;
-  SmallPtrSet LinearIVUses;
+
+  PHINode *InnerInductionPHI = nullptr; // These PHINodes correspond to loop
+  PHINode *OuterInductionPHI = nullptr; // induction variables, which are
+                                        // expected to start at zero and
+                                        // increment by one on each loop.
+
+  Value *InnerTripCount = nullptr; // The product of these two tripcounts
+  Value *OuterTripCount = nullptr; // will be the new flattened loop
+                                   // tripcount. Also used to recognise a
+                                   // linear expression that will be replaced.
+
+  SmallPtrSet LinearIVUses;  // Contains the linear expressions
+                                         // of the form i*M+j that will be
+                                         // replaced.
+
+  BinaryOperator *InnerIncrement = nullptr;  // Uses of induction variables in
+  BinaryOperator *OuterIncrement = nullptr;  // loop control statements that
+  BranchInst *InnerBranch = nullptr;         // are safe to ignore.
+
+  BranchInst *OuterBranch = nullptr; // The instruction that needs to be
+                                     // updated with new tripcount.
+
   SmallPtrSet InnerPHIsToTransform;
 
-  // Whether this holds the flatten info before or after widening.
-  bool Widened = false;
+  bool Widened = false; // Whether this holds the flatten info before or after
+                        // widening.
 
-  // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
-  // been applied. This bookkeeping is used so we can skip some checks on these
-  // phi nodes.
-  PHINode *NarrowInnerInductionPHI = nullptr;
-  PHINode *NarrowOuterInductionPHI = nullptr;
+  PHINode *NarrowInnerInductionPHI = nullptr; // Holds the old/narrow induction
+  PHINode *NarrowOuterInductionPHI = nullptr; // phis, i.e. the Phis before IV
+                                              // has been apllied. Used to skip
+                                              // checks on phi nodes.
 
   FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL){};
 

From ada6d78a7802f8057f1ab7cee0bed25f91fcc4b4 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer 
Date: Mon, 24 Jan 2022 12:54:16 +0000
Subject: [PATCH 381/946] [LoopFlatten] Address FIXME about
 getTripCountFromExitCount. NFC.

Together with the previous commit which mainly documents better LoopFlatten's
overall strategy, this addresses a concern added as a FIXME comment in D110587;
the code refactoring (NFC) introduces functions (also for the SCEV usage) to
make this clearer.
---
 llvm/lib/Transforms/Scalar/LoopFlatten.cpp | 351 ++++++++++++---------
 1 file changed, 199 insertions(+), 152 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 4d9578934d9e6..c46db4e63bfee 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -149,6 +149,118 @@ struct FlattenInfo {
       return false;
     return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
   }
+  bool isInnerLoopIncrement(User *U) {
+    return InnerIncrement == U;
+  }
+  bool isOuterLoopIncrement(User *U) {
+    return OuterIncrement == U;
+  }
+  bool isInnerLoopTest(User *U) {
+    return InnerBranch->getCondition() == U;
+  }
+
+  bool checkOuterInductionPhiUsers(SmallPtrSet &ValidOuterPHIUses) {
+    for (User *U : OuterInductionPHI->users()) {
+      if (isOuterLoopIncrement(U))
+        continue;
+
+      auto IsValidOuterPHIUses = [&] (User *U) -> bool {
+        LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
+        if (!ValidOuterPHIUses.count(U)) {
+          LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+          return false;
+        }
+        LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+        return true;
+      };
+
+      if (auto *V = dyn_cast(U)) {
+        for (auto *K : V->users()) {
+          if (!IsValidOuterPHIUses(K))
+            return false;
+        }
+        continue;
+      }
+
+      if (!IsValidOuterPHIUses(U))
+        return false;
+    }
+    return true;
+  }
+
+  bool matchLinearIVUser(User *U, Value *InnerTripCount,
+                         SmallPtrSet &ValidOuterPHIUses) {
+    LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
+    Value *MatchedMul = nullptr;
+    Value *MatchedItCount = nullptr;
+
+    bool IsAdd = match(U, m_c_Add(m_Specific(InnerInductionPHI),
+                                  m_Value(MatchedMul))) &&
+                 match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI),
+                                           m_Value(MatchedItCount)));
+
+    // Matches the same pattern as above, except it also looks for truncs
+    // on the phi, which can be the result of widening the induction variables.
+    bool IsAddTrunc =
+        match(U, m_c_Add(m_Trunc(m_Specific(InnerInductionPHI)),
+                         m_Value(MatchedMul))) &&
+        match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)),
+                                  m_Value(MatchedItCount)));
+
+    if (!MatchedItCount)
+      return false;
+
+    // Look through extends if the IV has been widened.
+    if (Widened &&
+        (isa(MatchedItCount) || isa(MatchedItCount))) {
+      assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
+             "Unexpected type mismatch in types after widening");
+      MatchedItCount = isa(MatchedItCount)
+                           ? dyn_cast(MatchedItCount)->getOperand(0)
+                           : dyn_cast(MatchedItCount)->getOperand(0);
+    }
+
+    if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
+      LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+      ValidOuterPHIUses.insert(MatchedMul);
+      LinearIVUses.insert(U);
+      return true;
+    }
+
+    LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+    return false;
+  }
+
+  bool checkInnerInductionPhiUsers(SmallPtrSet &ValidOuterPHIUses) {
+    Value *SExtInnerTripCount = InnerTripCount;
+    if (Widened &&
+        (isa(InnerTripCount) || isa(InnerTripCount)))
+      SExtInnerTripCount = cast(InnerTripCount)->getOperand(0);
+
+    for (User *U : InnerInductionPHI->users()) {
+      if (isInnerLoopIncrement(U))
+        continue;
+
+      // After widening the IVs, a trunc instruction might have been introduced,
+      // so look through truncs.
+      if (isa(U)) {
+        if (!U->hasOneUse())
+          return false;
+        U = *U->user_begin();
+      }
+
+      // If the use is in the compare (which is also the condition of the inner
+      // branch) then the compare has been altered by another transformation e.g
+      // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
+      // a constant. Ignore this use as the compare gets removed later anyway.
+      if (isInnerLoopTest(U))
+        continue;
+
+      if (!matchLinearIVUser(U, SExtInnerTripCount, ValidOuterPHIUses))
+        return false;
+    }
+    return true;
+  }
 };
 
 static bool
@@ -162,6 +274,77 @@ setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
   return true;
 }
 
+// Given the RHS of the loop latch compare instruction, verify with SCEV
+// that this is indeed the loop tripcount.
+// TODO: This used to be a straightforward check but has grown to be quite
+// complicated now. It is therefore worth revisiting what the additional
+// benefits are of this (compared to relying on canonical loops and pattern
+// matching).
+static bool verifyTripCount(Value *RHS, Loop *L,
+     SmallPtrSetImpl &IterationInstructions,
+    PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
+    BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  if (isa(BackedgeTakenCount)) {
+    LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
+    return false;
+  }
+
+  // The Extend=false flag is used for getTripCountFromExitCount as we want
+  // to verify and match it with the pattern matched tripcount. Please note
+  // that overflow checks are performed in checkOverflow, but are first tried
+  // to avoid by widening the IV.
+  const SCEV *SCEVTripCount =
+      SE->getTripCountFromExitCount(BackedgeTakenCount, /*Extend=*/false);
+
+  const SCEV *SCEVRHS = SE->getSCEV(RHS);
+  if (SCEVRHS == SCEVTripCount)
+    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+  ConstantInt *ConstantRHS = dyn_cast(RHS);
+  if (ConstantRHS) {
+    const SCEV *BackedgeTCExt = nullptr;
+    if (IsWidened) {
+      const SCEV *SCEVTripCountExt;
+      // Find the extended backedge taken count and extended trip count using
+      // SCEV. One of these should now match the RHS of the compare.
+      BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
+      SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
+      if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
+        LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+        return false;
+      }
+    }
+    // If the RHS of the compare is equal to the backedge taken count we need
+    // to add one to get the trip count.
+    if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
+      ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
+      Value *NewRHS = ConstantInt::get(
+          ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+      return setLoopComponents(NewRHS, TripCount, Increment,
+                               IterationInstructions);
+    }
+    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+  }
+  // If the RHS isn't a constant then check that the reason it doesn't match
+  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
+  // (and take the trip count to be the RHS).
+  if (!IsWidened) {
+    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+    return false;
+  }
+  auto *TripCountInst = dyn_cast(RHS);
+  if (!TripCountInst) {
+    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+    return false;
+  }
+  if ((!isa(TripCountInst) && !isa(TripCountInst)) ||
+      SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
+    LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
+    return false;
+  }
+  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+}
+
 // Finds the induction variable, increment and trip count for a simple loop that
 // we can flatten.
 static bool findLoopComponents(
@@ -238,63 +421,9 @@ static bool findLoopComponents(
   // another transformation has changed the compare (e.g. icmp ult %inc,
   // tripcount -> icmp ult %j, tripcount-1), or both.
   Value *RHS = Compare->getOperand(1);
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa(BackedgeTakenCount)) {
-    LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
-    return false;
-  }
-  // The use of the Extend=false flag on getTripCountFromExitCount was added
-  // during a refactoring to preserve existing behavior.  However, there's
-  // nothing obvious in the surrounding code when handles the overflow case.
-  // FIXME: audit code to establish whether there's a latent bug here.
-  const SCEV *SCEVTripCount =
-      SE->getTripCountFromExitCount(BackedgeTakenCount, false);
-  const SCEV *SCEVRHS = SE->getSCEV(RHS);
-  if (SCEVRHS == SCEVTripCount)
-    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
-  ConstantInt *ConstantRHS = dyn_cast(RHS);
-  if (ConstantRHS) {
-    const SCEV *BackedgeTCExt = nullptr;
-    if (IsWidened) {
-      const SCEV *SCEVTripCountExt;
-      // Find the extended backedge taken count and extended trip count using
-      // SCEV. One of these should now match the RHS of the compare.
-      BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
-      SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
-      if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
-        LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
-        return false;
-      }
-    }
-    // If the RHS of the compare is equal to the backedge taken count we need
-    // to add one to get the trip count.
-    if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
-      ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
-      Value *NewRHS = ConstantInt::get(
-          ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
-      return setLoopComponents(NewRHS, TripCount, Increment,
-                               IterationInstructions);
-    }
-    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
-  }
-  // If the RHS isn't a constant then check that the reason it doesn't match
-  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
-  // (and take the trip count to be the RHS).
-  if (!IsWidened) {
-    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
-    return false;
-  }
-  auto *TripCountInst = dyn_cast(RHS);
-  if (!TripCountInst) {
-    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
-    return false;
-  }
-  if ((!isa(TripCountInst) && !isa(TripCountInst)) ||
-      SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
-    LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
-    return false;
-  }
-  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+
+  return verifyTripCount(RHS, L, IterationInstructions, InductionPHI, TripCount,
+                         Increment, BackBranch, SE, IsWidened);
 }
 
 static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
@@ -440,108 +569,26 @@ checkOuterLoopInsts(FlattenInfo &FI,
   return true;
 }
 
-static bool checkIVUsers(FlattenInfo &FI) {
-  // We require all uses of both induction variables to match this pattern:
-  //
-  //   (OuterPHI * InnerTripCount) + InnerPHI
-  //
-  // Any uses of the induction variables not matching that pattern would
-  // require a div/mod to reconstruct in the flattened loop, so the
-  // transformation wouldn't be profitable.
-
-  Value *InnerTripCount = FI.InnerTripCount;
-  if (FI.Widened &&
-      (isa(InnerTripCount) || isa(InnerTripCount)))
-    InnerTripCount = cast(InnerTripCount)->getOperand(0);
 
+
+// We require all uses of both induction variables to match this pattern:
+//
+//   (OuterPHI * InnerTripCount) + InnerPHI
+//
+// Any uses of the induction variables not matching that pattern would
+// require a div/mod to reconstruct in the flattened loop, so the
+// transformation wouldn't be profitable.
+static bool checkIVUsers(FlattenInfo &FI) {
   // Check that all uses of the inner loop's induction variable match the
   // expected pattern, recording the uses of the outer IV.
   SmallPtrSet ValidOuterPHIUses;
-  for (User *U : FI.InnerInductionPHI->users()) {
-    if (U == FI.InnerIncrement)
-      continue;
-
-    // After widening the IVs, a trunc instruction might have been introduced,
-    // so look through truncs.
-    if (isa(U)) {
-      if (!U->hasOneUse())
-        return false;
-      U = *U->user_begin();
-    }
-
-    // If the use is in the compare (which is also the condition of the inner
-    // branch) then the compare has been altered by another transformation e.g
-    // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
-    // a constant. Ignore this use as the compare gets removed later anyway.
-    if (U == FI.InnerBranch->getCondition())
-      continue;
-
-    LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
-
-    Value *MatchedMul = nullptr;
-    Value *MatchedItCount = nullptr;
-    bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
-                                  m_Value(MatchedMul))) &&
-                 match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
-                                           m_Value(MatchedItCount)));
-
-    // Matches the same pattern as above, except it also looks for truncs
-    // on the phi, which can be the result of widening the induction variables.
-    bool IsAddTrunc =
-        match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
-                         m_Value(MatchedMul))) &&
-        match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
-                                  m_Value(MatchedItCount)));
-
-    if (!MatchedItCount)
-      return false;
-    // Look through extends if the IV has been widened.
-    if (FI.Widened &&
-        (isa(MatchedItCount) || isa(MatchedItCount))) {
-      assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
-             "Unexpected type mismatch in types after widening");
-      MatchedItCount = isa(MatchedItCount)
-                           ? dyn_cast(MatchedItCount)->getOperand(0)
-                           : dyn_cast(MatchedItCount)->getOperand(0);
-    }
-
-    if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
-      LLVM_DEBUG(dbgs() << "Use is optimisable\n");
-      ValidOuterPHIUses.insert(MatchedMul);
-      FI.LinearIVUses.insert(U);
-    } else {
-      LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
-      return false;
-    }
-  }
+  if (!FI.checkInnerInductionPhiUsers(ValidOuterPHIUses))
+    return false;
 
   // Check that there are no uses of the outer IV other than the ones found
   // as part of the pattern above.
-  for (User *U : FI.OuterInductionPHI->users()) {
-    if (U == FI.OuterIncrement)
-      continue;
-
-    auto IsValidOuterPHIUses = [&] (User *U) -> bool {
-      LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
-      if (!ValidOuterPHIUses.count(U)) {
-        LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
-        return false;
-      }
-      LLVM_DEBUG(dbgs() << "Use is optimisable\n");
-      return true;
-    };
-
-    if (auto *V = dyn_cast(U)) {
-      for (auto *K : V->users()) {
-        if (!IsValidOuterPHIUses(K))
-          return false;
-      }
-      continue;
-    }
-
-    if (!IsValidOuterPHIUses(U))
-      return false;
-  }
+  if (!FI.checkOuterInductionPhiUsers(ValidOuterPHIUses))
+    return false;
 
   LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
              dbgs() << "Found " << FI.LinearIVUses.size()

From d42678b453bc2587a42eef1ba4e5782b2c8c5ff1 Mon Sep 17 00:00:00 2001
From: Fraser Cormack 
Date: Fri, 21 Jan 2022 17:58:39 +0000
Subject: [PATCH 382/946] [RISCV] Add side-effect-free vsetvli intrinsics

This patch introduces new intrinsics that enable the use of vsetvli in
contexts where only the returned vector length is of interest. The
pre-existing intrinsics are marked with side-effects, which prevents
even trivial optimizations on/across them.

These intrinsics are intended to be used in situations where the vector
length is fed in turn to RVV intrinsics or to vector-predication
intrinsics during loop vectorization, for example. Those codegen paths
ensure that instructions are generated with their own implicit vsetvli,
so the vector length and vtype can be relied upon to be correct.

No corresponding C builtins are planned at this stage, though that is a
possibility for the future if the need arises.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117910
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  16 +++
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   | 121 +++++++++++-------
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h     |   2 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   8 +-
 .../RISCV/rvv/rv32-vsetvli-intrinsics.ll      |  64 +++++++++
 .../RISCV/rvv/rv64-vsetvli-intrinsics.ll      |  64 +++++++++
 6 files changed, 226 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index c332eb82a5906..99dd152fc0fc5 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -159,6 +159,22 @@ let TargetPrefix = "riscv" in {
                                        ImmArg>,
                                        ImmArg>]>;
 
+  // Versions without side effects: better optimizable and usable if only the
+  // returned vector length is important.
+  def int_riscv_vsetvli_opt   : Intrinsic<[llvm_anyint_ty],
+                               /* AVL */  [LLVMMatchType<0>,
+                               /* VSEW */  LLVMMatchType<0>,
+                               /* VLMUL */ LLVMMatchType<0>],
+                                          [IntrNoMem,
+                                           ImmArg>,
+                                           ImmArg>]>;
+  def int_riscv_vsetvlimax_opt : Intrinsic<[llvm_anyint_ty],
+                                /* VSEW */ [LLVMMatchType<0>,
+                                /* VLMUL */ LLVMMatchType<0>],
+                                          [IntrNoMem,
+                                           ImmArg>,
+                                           ImmArg>]>;
+
   // For unit stride load
   // Input: (pointer, vl)
   class RISCVUSLoad
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index e46aa6114fca2..df4e955ef583b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -495,6 +495,75 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
   ReplaceNode(Node, Store);
 }
 
+void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
+  if (!Subtarget->hasVInstructions())
+    return;
+
+  assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
+         "Unexpected opcode");
+
+  SDLoc DL(Node);
+  MVT XLenVT = Subtarget->getXLenVT();
+
+  bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
+  unsigned IntNoOffset = HasChain ? 1 : 0;
+  unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
+
+  assert((IntNo == Intrinsic::riscv_vsetvli ||
+          IntNo == Intrinsic::riscv_vsetvlimax ||
+          IntNo == Intrinsic::riscv_vsetvli_opt ||
+          IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
+         "Unexpected vsetvli intrinsic");
+
+  bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
+               IntNo == Intrinsic::riscv_vsetvlimax_opt;
+  unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
+
+  assert(Node->getNumOperands() == Offset + 2 &&
+         "Unexpected number of operands");
+
+  unsigned SEW =
+      RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
+  RISCVII::VLMUL VLMul = static_cast(
+      Node->getConstantOperandVal(Offset + 1) & 0x7);
+
+  unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
+                                            /*MaskAgnostic*/ false);
+  SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
+
+  SmallVector VTs = {XLenVT};
+  if (HasChain)
+    VTs.push_back(MVT::Other);
+
+  SDValue VLOperand;
+  unsigned Opcode = RISCV::PseudoVSETVLI;
+  if (VLMax) {
+    VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
+    Opcode = RISCV::PseudoVSETVLIX0;
+  } else {
+    VLOperand = Node->getOperand(IntNoOffset + 1);
+
+    if (auto *C = dyn_cast(VLOperand)) {
+      uint64_t AVL = C->getZExtValue();
+      if (isUInt<5>(AVL)) {
+        SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+        SmallVector Ops = {VLImm, VTypeIOp};
+        if (HasChain)
+          Ops.push_back(Node->getOperand(0));
+        ReplaceNode(
+            Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
+        return;
+      }
+    }
+  }
+
+  SmallVector Ops = {VLOperand, VTypeIOp};
+  if (HasChain)
+    Ops.push_back(Node->getOperand(0));
+
+  ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
+}
 
 void RISCVDAGToDAGISel::Select(SDNode *Node) {
   // If we have a custom node, we have already selected.
@@ -1017,6 +1086,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
                                                {Cmp, Mask, VL, MaskSEW}));
       return;
     }
+    case Intrinsic::riscv_vsetvli_opt:
+    case Intrinsic::riscv_vsetvlimax_opt:
+      return selectVSETVLI(Node);
     }
     break;
   }
@@ -1026,54 +1098,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       // By default we do not custom select any intrinsic.
     default:
       break;
-
     case Intrinsic::riscv_vsetvli:
-    case Intrinsic::riscv_vsetvlimax: {
-      if (!Subtarget->hasVInstructions())
-        break;
-
-      bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
-      unsigned Offset = VLMax ? 2 : 3;
-
-      assert(Node->getNumOperands() == Offset + 2 &&
-             "Unexpected number of operands");
-
-      unsigned SEW =
-          RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
-      RISCVII::VLMUL VLMul = static_cast(
-          Node->getConstantOperandVal(Offset + 1) & 0x7);
-
-      unsigned VTypeI = RISCVVType::encodeVTYPE(
-          VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false);
-      SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
-
-      SDValue VLOperand;
-      unsigned Opcode = RISCV::PseudoVSETVLI;
-      if (VLMax) {
-        VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
-        Opcode = RISCV::PseudoVSETVLIX0;
-      } else {
-        VLOperand = Node->getOperand(2);
-
-        if (auto *C = dyn_cast(VLOperand)) {
-          uint64_t AVL = C->getZExtValue();
-          if (isUInt<5>(AVL)) {
-            SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
-            ReplaceNode(
-                Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
-                                             MVT::Other, VLImm, VTypeIOp,
-                                             /* Chain */ Node->getOperand(0)));
-            return;
-          }
-        }
-      }
-
-      ReplaceNode(Node,
-                  CurDAG->getMachineNode(Opcode, DL, XLenVT,
-                                         MVT::Other, VLOperand, VTypeIOp,
-                                         /* Chain */ Node->getOperand(0)));
-      return;
-    }
+    case Intrinsic::riscv_vsetvlimax:
+      return selectVSETVLI(Node);
     case Intrinsic::riscv_vlseg2:
     case Intrinsic::riscv_vlseg3:
     case Intrinsic::riscv_vlseg4:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index a2770089995d8..f4d6fdddca390 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -87,6 +87,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
   void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
 
+  void selectVSETVLI(SDNode *Node);
+
   // Return the RISC-V condition code that matches the given DAG integer
   // condition code. The CondCode must be one of those supported by the RISC-V
   // ISA (see translateSetCCForBranch).
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7d224e3968545..f7e4e36a20d15 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8162,14 +8162,18 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     // We assume VLENB is no more than 65536 / 8 bytes.
     Known.Zero.setBitsFrom(14);
     break;
-  case ISD::INTRINSIC_W_CHAIN: {
-    unsigned IntNo = Op.getConstantOperandVal(1);
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo =
+        Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
     switch (IntNo) {
     default:
       // We can't do anything for most intrinsics.
       break;
     case Intrinsic::riscv_vsetvli:
     case Intrinsic::riscv_vsetvlimax:
+    case Intrinsic::riscv_vsetvli_opt:
+    case Intrinsic::riscv_vsetvlimax_opt:
       // Assume that VL output is positive and would fit in an int32_t.
       // TODO: VLEN might be capped at 16 bits in a future V spec update.
       if (BitWidth >= 32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
index 081743b31b701..1c3c219c13041 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -3,6 +3,8 @@
 
 declare i32 @llvm.riscv.vsetvli.i32(i32, i32, i32)
 declare i32 @llvm.riscv.vsetvlimax.i32(i32, i32)
+declare i32 @llvm.riscv.vsetvli.opt.i32(i32, i32, i32)
+declare i32 @llvm.riscv.vsetvlimax.opt.i32(i32, i32)
 
 define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
 ; CHECK-LABEL: test_vsetvli_e64mf8:
@@ -31,6 +33,68 @@ define void @test_vsetvlimax_e64m8() nounwind {
   ret void
 }
 
+define i32 @test_vsetvli_opt_e8m1(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m1, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
+  ret i32 %vl
+}
+
+; Check that we remove the intrinsic if it's unused.
+define void @test_vsetvli_opt_e8m1_nouse(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
+  ret void
+}
+
+define i32 @test_vsetvli_opt_e16mf4(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e16mf4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 1, i32 6)
+  ret i32 %vl
+}
+
+define i32 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a0, 0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 0, i32 1, i32 6)
+  ret i32 %vl
+}
+
+define i32 @test_vsetvlimax_opt_e32m2() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
+  ret i32 %vl
+}
+
+define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
+  ret void
+}
+
+define i32 @test_vsetvlimax_opt_e64m4() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 3, i32 2)
+  ret i32 %vl
+}
+
 declare  @llvm.riscv.vle.nxv4i32.i32(*, i32)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
index 2b745cb5eddaa..26c3aeeba38fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -3,6 +3,8 @@
 
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
 declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
+declare i64 @llvm.riscv.vsetvli.opt.i64(i64, i64, i64)
+declare i64 @llvm.riscv.vsetvlimax.opt.i64(i64, i64)
 
 define void @test_vsetvli_e8m1(i64 %avl) nounwind {
 ; CHECK-LABEL: test_vsetvli_e8m1:
@@ -49,6 +51,68 @@ define void @test_vsetvlimax_e64m4() nounwind {
   ret void
 }
 
+define i64 @test_vsetvli_opt_e8m1(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m1, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
+  ret i64 %vl
+}
+
+; Check that we remove the intrinsic if it's unused.
+define void @test_vsetvli_opt_e8m1_nouse(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
+  ret void
+}
+
+define i64 @test_vsetvli_opt_e16mf4(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e16mf4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 1, i64 6)
+  ret i64 %vl
+}
+
+define i64 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a0, 0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 0, i64 1, i64 6)
+  ret i64 %vl
+}
+
+define i64 @test_vsetvlimax_opt_e32m2() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
+  ret i64 %vl
+}
+
+define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
+  ret void
+}
+
+define i64 @test_vsetvlimax_opt_e64m4() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 3, i64 2)
+  ret i64 %vl
+}
+
 declare  @llvm.riscv.vle.nxv4i32.i64(*, i64)
 
 ; Check that we remove the redundant vsetvli when followed by another operation

From 25e8f5f827bfbc8aed24c9db116c4869f437fb9d Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Mon, 24 Jan 2022 15:02:44 +0100
Subject: [PATCH 383/946] Add missing STLExtras.h include from
 lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp

---
 lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp b/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp
index 2e103b9e61f1f..b34e01bd01a05 100644
--- a/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp
+++ b/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MockTildeExpressionResolver.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Path.h"
 
 using namespace lldb_private;

From f7079bf9ee68aa46c6eef5836279902a2fd1fe50 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Mon, 24 Jan 2022 13:36:22 +0000
Subject: [PATCH 384/946] [X86] Fix v8i8 -> v8i16 typo in bool reductions

We were supposed to be testing <8 x i16> reductions
---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 66 ++++++++-----------
 .../test/CodeGen/X86/vector-reduce-or-bool.ll | 65 ++++++++----------
 .../CodeGen/X86/vector-reduce-xor-bool.ll     | 66 ++++++++-----------
 3 files changed, 83 insertions(+), 114 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 068148d69498d..7db9ad872326d 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -111,30 +111,18 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @trunc_v8i16_v8i1(<8 x i8>) {
-; SSE2-LABEL: trunc_v8i16_v8i1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    packsswb %xmm0, %xmm0
-; SSE2-NEXT:    pmovmskb %xmm0, %eax
-; SSE2-NEXT:    cmpb $-1, %al
-; SSE2-NEXT:    sete %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: trunc_v8i16_v8i1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    packsswb %xmm0, %xmm0
-; SSE41-NEXT:    pmovmskb %xmm0, %eax
-; SSE41-NEXT:    cmpb $-1, %al
-; SSE41-NEXT:    sete %al
-; SSE41-NEXT:    retq
+define i1 @trunc_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: trunc_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    psllw $15, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpb $-1, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: trunc_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
 ; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
@@ -144,9 +132,9 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512F-LABEL: trunc_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    cmpb $-1, %al
 ; AVX512F-NEXT:    sete %al
@@ -155,8 +143,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    cmpb $-1, %al
 ; AVX512BW-NEXT:    sete %al
@@ -165,13 +153,13 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovw2m %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    cmpb $-1, %al
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
-  %a = trunc <8 x i8> %0 to <8 x i1>
+  %a = trunc <8 x i16> %0 to <8 x i1>
   %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
@@ -949,11 +937,12 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i8>) {
+define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-LABEL: icmp_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
+; SSE-NEXT:    packsswb %xmm1, %xmm1
 ; SSE-NEXT:    pmovmskb %xmm1, %eax
 ; SSE-NEXT:    cmpb $-1, %al
 ; SSE-NEXT:    sete %al
@@ -962,7 +951,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX-NEXT:    cmpb $-1, %al
 ; AVX-NEXT:    sete %al
@@ -971,9 +961,9 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512F-LABEL: icmp_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    cmpb $-1, %al
 ; AVX512F-NEXT:    sete %al
@@ -983,7 +973,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    cmpb $-1, %al
 ; AVX512BW-NEXT:    sete %al
@@ -992,12 +982,12 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    cmpb $-1, %al
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
-  %a = icmp eq <8 x i8> %0, zeroinitializer
+  %a = icmp eq <8 x i16> %0, zeroinitializer
   %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index 6269cd4df1119..b06822b93fd95 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -111,28 +111,17 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @trunc_v8i16_v8i1(<8 x i8>) {
-; SSE2-LABEL: trunc_v8i16_v8i1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    pmovmskb %xmm0, %eax
-; SSE2-NEXT:    testl $43690, %eax # imm = 0xAAAA
-; SSE2-NEXT:    setne %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: trunc_v8i16_v8i1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    pmovmskb %xmm0, %eax
-; SSE41-NEXT:    testl $43690, %eax # imm = 0xAAAA
-; SSE41-NEXT:    setne %al
-; SSE41-NEXT:    retq
+define i1 @trunc_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: trunc_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    psllw $15, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl $43690, %eax # imm = 0xAAAA
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: trunc_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX-NEXT:    testl $43690, %eax # imm = 0xAAAA
@@ -141,9 +130,9 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512F-LABEL: trunc_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setne %al
@@ -152,8 +141,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setne %al
@@ -162,13 +151,13 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovw2m %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
-  %a = trunc <8 x i8> %0 to <8 x i1>
+  %a = trunc <8 x i16> %0 to <8 x i1>
   %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
@@ -947,31 +936,31 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i8>) {
+define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-LABEL: icmp_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
 ; SSE-NEXT:    pmovmskb %xmm1, %eax
-; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    testl %eax, %eax
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    testl %eax, %eax
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: icmp_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setne %al
@@ -981,7 +970,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setne %al
@@ -990,12 +979,12 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
-  %a = icmp eq <8 x i8> %0, zeroinitializer
+  %a = icmp eq <8 x i16> %0, zeroinitializer
   %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 7ef49de025531..0d0accd3f1f18 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -111,30 +111,18 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @trunc_v8i16_v8i1(<8 x i8>) {
-; SSE2-LABEL: trunc_v8i16_v8i1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    packsswb %xmm0, %xmm0
-; SSE2-NEXT:    pmovmskb %xmm0, %eax
-; SSE2-NEXT:    testb %al, %al
-; SSE2-NEXT:    setnp %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: trunc_v8i16_v8i1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    packsswb %xmm0, %xmm0
-; SSE41-NEXT:    pmovmskb %xmm0, %eax
-; SSE41-NEXT:    testb %al, %al
-; SSE41-NEXT:    setnp %al
-; SSE41-NEXT:    retq
+define i1 @trunc_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: trunc_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    psllw $15, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: trunc_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
 ; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
@@ -144,9 +132,9 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512F-LABEL: trunc_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setnp %al
@@ -155,8 +143,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setnp %al
@@ -165,13 +153,13 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovw2m %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
-  %a = trunc <8 x i8> %0 to <8 x i1>
+  %a = trunc <8 x i16> %0 to <8 x i1>
   %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
@@ -1027,11 +1015,12 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i8>) {
+define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-LABEL: icmp_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
+; SSE-NEXT:    packsswb %xmm1, %xmm1
 ; SSE-NEXT:    pmovmskb %xmm1, %eax
 ; SSE-NEXT:    testb %al, %al
 ; SSE-NEXT:    setnp %al
@@ -1040,7 +1029,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX-NEXT:    testb %al, %al
 ; AVX-NEXT:    setnp %al
@@ -1049,9 +1039,9 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512F-LABEL: icmp_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setnp %al
@@ -1061,7 +1051,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setnp %al
@@ -1070,12 +1060,12 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
-  %a = icmp eq <8 x i8> %0, zeroinitializer
+  %a = icmp eq <8 x i16> %0, zeroinitializer
   %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }

From 4436d4cd7c86ed544b1184db2eec691b38c9c77b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Mon, 24 Jan 2022 13:44:08 +0000
Subject: [PATCH 385/946] [X86] Rename cmp-with-zero bool reductions

Explicitly name them icmp0_* - I'm intending to add PR53379 test coverage shortly
---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 172 +++++++++---------
 .../test/CodeGen/X86/vector-reduce-or-bool.ll | 164 ++++++++---------
 .../CodeGen/X86/vector-reduce-xor-bool.ll     | 168 ++++++++---------
 3 files changed, 252 insertions(+), 252 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 7db9ad872326d..c1ddc9454b939 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -826,11 +826,11 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
 }
 
 ;
-; Comparison
+; Comparison With Zero
 ;
 
-define i1 @icmp_v2i64_v2i1(<2 x i64>) {
-; SSE2-LABEL: icmp_v2i64_v2i1:
+define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
+; SSE2-LABEL: icmp0_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -839,19 +839,19 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41-LABEL: icmp0_v2i64_v2i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX-LABEL: icmp0_v2i64_v2i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F-LABEL: icmp0_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -861,7 +861,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW-LABEL: icmp0_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -871,7 +871,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL-LABEL: icmp0_v2i64_v2i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -883,8 +883,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i32_v4i1(<4 x i32>) {
-; SSE2-LABEL: icmp_v4i32_v4i1:
+define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
+; SSE2-LABEL: icmp0_v4i32_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -893,19 +893,19 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i32_v4i1:
+; SSE41-LABEL: icmp0_v4i32_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX-LABEL: icmp0_v4i32_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F-LABEL: icmp0_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -915,7 +915,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW-LABEL: icmp0_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -925,7 +925,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL-LABEL: icmp0_v4i32_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -937,8 +937,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
+define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: icmp0_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
@@ -948,7 +948,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -958,7 +958,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F-LABEL: icmp0_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -970,7 +970,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW-LABEL: icmp0_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -980,7 +980,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL-LABEL: icmp0_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -992,8 +992,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i8_v16i1(<16 x i8>) {
-; SSE2-LABEL: icmp_v16i8_v16i1:
+define i1 @icmp0_v16i8_v16i1(<16 x i8>) {
+; SSE2-LABEL: icmp0_v16i8_v16i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
@@ -1002,25 +1002,25 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v16i8_v16i1:
+; SSE41-LABEL: icmp0_v16i8_v16i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX-LABEL: icmp0_v16i8_v16i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F-LABEL: icmp0_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptest %xmm0, %xmm0
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW-LABEL: icmp0_v16i8_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1029,7 +1029,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL-LABEL: icmp0_v16i8_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1040,8 +1040,8 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i64_v4i1(<4 x i64>) {
-; SSE2-LABEL: icmp_v4i64_v4i1:
+define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
+; SSE2-LABEL: icmp0_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1053,7 +1053,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41-LABEL: icmp0_v4i64_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm1
@@ -1064,7 +1064,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1-LABEL: icmp0_v4i64_v4i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1077,14 +1077,14 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2-LABEL: icmp0_v4i64_v4i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F-LABEL: icmp0_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -1094,7 +1094,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW-LABEL: icmp0_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -1104,7 +1104,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL-LABEL: icmp0_v4i64_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1117,8 +1117,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i32_v8i1(<8 x i32>) {
-; SSE-LABEL: icmp_v8i32_v8i1:
+define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
+; SSE-LABEL: icmp0_v8i32_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1130,7 +1130,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1-LABEL: icmp0_v8i32_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1143,14 +1143,14 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2-LABEL: icmp0_v8i32_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F-LABEL: icmp0_v8i32_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1160,7 +1160,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW-LABEL: icmp0_v8i32_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1170,7 +1170,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL-LABEL: icmp0_v8i32_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1183,8 +1183,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
+define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
+; SSE-LABEL: icmp0_v16i16_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
@@ -1195,7 +1195,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1208,7 +1208,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1218,7 +1218,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F-LABEL: icmp0_v16i16_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1229,7 +1229,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW-LABEL: icmp0_v16i16_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1238,7 +1238,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL-LABEL: icmp0_v16i16_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1250,8 +1250,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i8_v32i1(<32 x i8>) {
-; SSE2-LABEL: icmp_v32i8_v32i1:
+define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
+; SSE2-LABEL: icmp0_v32i8_v32i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -1261,14 +1261,14 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v32i8_v32i1:
+; SSE41-LABEL: icmp0_v32i8_v32i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    por %xmm1, %xmm0
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1281,14 +1281,14 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2-LABEL: icmp0_v32i8_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F-LABEL: icmp0_v32i8_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1309,7 +1309,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW-LABEL: icmp0_v32i8_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1318,7 +1318,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL-LABEL: icmp0_v32i8_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1330,8 +1330,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i64_v8i1(<8 x i64>) {
-; SSE2-LABEL: icmp_v8i64_v8i1:
+define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
+; SSE2-LABEL: icmp0_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1346,7 +1346,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41-LABEL: icmp0_v8i64_v8i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm4, %xmm4
 ; SSE41-NEXT:    pcmpeqq %xmm4, %xmm3
@@ -1362,7 +1362,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1-LABEL: icmp0_v8i64_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1380,7 +1380,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2-LABEL: icmp0_v8i64_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
@@ -1392,7 +1392,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F-LABEL: icmp0_v8i64_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1401,7 +1401,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW-LABEL: icmp0_v8i64_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1410,7 +1410,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL-LABEL: icmp0_v8i64_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1423,8 +1423,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i32_v16i1(<16 x i32>) {
-; SSE-LABEL: icmp_v16i32_v16i1:
+define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
+; SSE-LABEL: icmp0_v16i32_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1439,7 +1439,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1-LABEL: icmp0_v16i32_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1457,7 +1457,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2-LABEL: icmp0_v16i32_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
@@ -1471,7 +1471,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512-LABEL: icmp0_v16i32_v16i1:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512-NEXT:    kortestw %k0, %k0
@@ -1483,8 +1483,8 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp_v32i16_v32i1:
+define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
+; SSE-LABEL: icmp0_v32i16_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
@@ -1499,7 +1499,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1517,7 +1517,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
@@ -1529,7 +1529,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F-LABEL: icmp0_v32i16_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1551,7 +1551,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW-LABEL: icmp0_v32i16_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestd %k0, %k0
@@ -1559,7 +1559,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL-LABEL: icmp0_v32i16_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1571,8 +1571,8 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v64i8_v64i1(<64 x i8>) {
-; SSE2-LABEL: icmp_v64i8_v64i1:
+define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
+; SSE2-LABEL: icmp0_v64i8_v64i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    por %xmm3, %xmm1
 ; SSE2-NEXT:    pxor %xmm3, %xmm3
@@ -1584,7 +1584,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v64i8_v64i1:
+; SSE41-LABEL: icmp0_v64i8_v64i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    por %xmm3, %xmm1
 ; SSE41-NEXT:    por %xmm2, %xmm1
@@ -1593,7 +1593,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1-LABEL: icmp0_v64i8_v64i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -1607,7 +1607,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2-LABEL: icmp0_v64i8_v64i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
@@ -1615,7 +1615,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F-LABEL: icmp0_v64i8_v64i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1642,7 +1642,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW-LABEL: icmp0_v64i8_v64i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestq %k0, %k0
@@ -1650,7 +1650,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL-LABEL: icmp0_v64i8_v64i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestq %k0, %k0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index b06822b93fd95..b73ff78330c7b 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -820,11 +820,11 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
 }
 
 ;
-; Comparison
+; Comparison With Zero
 ;
 
-define i1 @icmp_v2i64_v2i1(<2 x i64>) {
-; SSE2-LABEL: icmp_v2i64_v2i1:
+define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
+; SSE2-LABEL: icmp0_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -835,7 +835,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41-LABEL: icmp0_v2i64_v2i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm1, %xmm1
 ; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
@@ -844,7 +844,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX-LABEL: icmp0_v2i64_v2i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
@@ -853,7 +853,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F-LABEL: icmp0_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -863,7 +863,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW-LABEL: icmp0_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -873,7 +873,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL-LABEL: icmp0_v2i64_v2i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -885,8 +885,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i32_v4i1(<4 x i32>) {
-; SSE-LABEL: icmp_v4i32_v4i1:
+define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
+; SSE-LABEL: icmp0_v4i32_v4i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -895,7 +895,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX-LABEL: icmp0_v4i32_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -904,7 +904,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F-LABEL: icmp0_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -914,7 +914,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW-LABEL: icmp0_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -924,7 +924,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL-LABEL: icmp0_v4i32_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -936,8 +936,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
+define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: icmp0_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
@@ -946,7 +946,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -955,7 +955,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F-LABEL: icmp0_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -967,7 +967,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW-LABEL: icmp0_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -977,7 +977,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL-LABEL: icmp0_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -989,8 +989,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i8_v16i1(<16 x i8>) {
-; SSE-LABEL: icmp_v16i8_v16i1:
+define i1 @icmp0_v16i8_v16i1(<16 x i8>) {
+; SSE-LABEL: icmp0_v16i8_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
@@ -999,7 +999,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX-LABEL: icmp0_v16i8_v16i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1008,7 +1008,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F-LABEL: icmp0_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1017,7 +1017,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512F-NEXT:    setne %al
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW-LABEL: icmp0_v16i8_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1026,7 +1026,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL-LABEL: icmp0_v16i8_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1037,8 +1037,8 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i64_v4i1(<4 x i64>) {
-; SSE2-LABEL: icmp_v4i64_v4i1:
+define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
+; SSE2-LABEL: icmp0_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1053,7 +1053,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41-LABEL: icmp0_v4i64_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm1
@@ -1064,7 +1064,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1-LABEL: icmp0_v4i64_v4i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1077,7 +1077,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2-LABEL: icmp0_v4i64_v4i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
@@ -1087,7 +1087,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F-LABEL: icmp0_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1097,7 +1097,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW-LABEL: icmp0_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1107,7 +1107,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL-LABEL: icmp0_v4i64_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1120,8 +1120,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i32_v8i1(<8 x i32>) {
-; SSE-LABEL: icmp_v8i32_v8i1:
+define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
+; SSE-LABEL: icmp0_v8i32_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1132,7 +1132,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1-LABEL: icmp0_v8i32_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1145,7 +1145,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2-LABEL: icmp0_v8i32_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
@@ -1155,7 +1155,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F-LABEL: icmp0_v8i32_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1165,7 +1165,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW-LABEL: icmp0_v8i32_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1175,7 +1175,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL-LABEL: icmp0_v8i32_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1188,8 +1188,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
+define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
+; SSE-LABEL: icmp0_v16i16_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
@@ -1200,7 +1200,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1213,7 +1213,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1223,7 +1223,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F-LABEL: icmp0_v16i16_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1234,7 +1234,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW-LABEL: icmp0_v16i16_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1243,7 +1243,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL-LABEL: icmp0_v16i16_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1255,8 +1255,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i8_v32i1(<32 x i8>) {
-; SSE-LABEL: icmp_v32i8_v32i1:
+define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
+; SSE-LABEL: icmp0_v32i8_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -1267,7 +1267,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1280,7 +1280,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2-LABEL: icmp0_v32i8_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1290,7 +1290,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F-LABEL: icmp0_v32i8_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1311,7 +1311,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW-LABEL: icmp0_v32i8_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1320,7 +1320,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL-LABEL: icmp0_v32i8_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1332,8 +1332,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i64_v8i1(<8 x i64>) {
-; SSE2-LABEL: icmp_v8i64_v8i1:
+define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
+; SSE2-LABEL: icmp0_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1356,7 +1356,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41-LABEL: icmp0_v8i64_v8i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm4, %xmm4
 ; SSE41-NEXT:    pcmpeqq %xmm4, %xmm3
@@ -1371,7 +1371,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1-LABEL: icmp0_v8i64_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1389,7 +1389,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2-LABEL: icmp0_v8i64_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
@@ -1401,7 +1401,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F-LABEL: icmp0_v8i64_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1410,7 +1410,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW-LABEL: icmp0_v8i64_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1419,7 +1419,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL-LABEL: icmp0_v8i64_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1432,8 +1432,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i32_v16i1(<16 x i32>) {
-; SSE-LABEL: icmp_v16i32_v16i1:
+define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
+; SSE-LABEL: icmp0_v16i32_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1448,7 +1448,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1-LABEL: icmp0_v16i32_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1466,7 +1466,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2-LABEL: icmp0_v16i32_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
@@ -1478,7 +1478,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512-LABEL: icmp0_v16i32_v16i1:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512-NEXT:    kortestw %k0, %k0
@@ -1490,8 +1490,8 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp_v32i16_v32i1:
+define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
+; SSE-LABEL: icmp0_v32i16_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
@@ -1506,7 +1506,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1524,7 +1524,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
@@ -1536,7 +1536,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F-LABEL: icmp0_v32i16_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1558,7 +1558,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW-LABEL: icmp0_v32i16_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestd %k0, %k0
@@ -1566,7 +1566,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL-LABEL: icmp0_v32i16_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1578,8 +1578,8 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v64i8_v64i1(<64 x i8>) {
-; SSE-LABEL: icmp_v64i8_v64i1:
+define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
+; SSE-LABEL: icmp0_v64i8_v64i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqb %xmm4, %xmm2
@@ -1594,7 +1594,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1-LABEL: icmp0_v64i8_v64i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm3
@@ -1612,7 +1612,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2-LABEL: icmp0_v64i8_v64i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
@@ -1624,7 +1624,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F-LABEL: icmp0_v64i8_v64i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1651,7 +1651,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW-LABEL: icmp0_v64i8_v64i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestq %k0, %k0
@@ -1659,7 +1659,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL-LABEL: icmp0_v64i8_v64i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestq %k0, %k0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 0d0accd3f1f18..9fb94c386bab8 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -899,11 +899,11 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
 }
 
 ;
-; Comparison
+; Comparison With Zero
 ;
 
-define i1 @icmp_v2i64_v2i1(<2 x i64>) {
-; SSE2-LABEL: icmp_v2i64_v2i1:
+define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
+; SSE2-LABEL: icmp0_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -914,7 +914,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE2-NEXT:    setnp %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41-LABEL: icmp0_v2i64_v2i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm1, %xmm1
 ; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
@@ -923,7 +923,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE41-NEXT:    setnp %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX-LABEL: icmp0_v2i64_v2i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
@@ -932,7 +932,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F-LABEL: icmp0_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -942,7 +942,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW-LABEL: icmp0_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -952,7 +952,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL-LABEL: icmp0_v2i64_v2i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -964,8 +964,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i32_v4i1(<4 x i32>) {
-; SSE-LABEL: icmp_v4i32_v4i1:
+define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
+; SSE-LABEL: icmp0_v4i32_v4i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -974,7 +974,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX-LABEL: icmp0_v4i32_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -983,7 +983,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F-LABEL: icmp0_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -993,7 +993,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW-LABEL: icmp0_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1003,7 +1003,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL-LABEL: icmp0_v4i32_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1015,8 +1015,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
+define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: icmp0_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
@@ -1026,7 +1026,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -1036,7 +1036,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F-LABEL: icmp0_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -1048,7 +1048,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW-LABEL: icmp0_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1058,7 +1058,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL-LABEL: icmp0_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1070,8 +1070,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i8_v16i1(<16 x i8>) {
-; SSE-LABEL: icmp_v16i8_v16i1:
+define i1 @icmp0_v16i8_v16i1(<16 x i8>) {
+; SSE-LABEL: icmp0_v16i8_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
@@ -1080,7 +1080,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX-LABEL: icmp0_v16i8_v16i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1089,7 +1089,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F-LABEL: icmp0_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1098,7 +1098,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512F-NEXT:    setnp %al
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW-LABEL: icmp0_v16i8_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1110,7 +1110,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL-LABEL: icmp0_v16i8_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1124,8 +1124,8 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i64_v4i1(<4 x i64>) {
-; SSE2-LABEL: icmp_v4i64_v4i1:
+define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
+; SSE2-LABEL: icmp0_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1140,7 +1140,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE2-NEXT:    setnp %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41-LABEL: icmp0_v4i64_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm1
@@ -1151,7 +1151,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE41-NEXT:    setnp %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1-LABEL: icmp0_v4i64_v4i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1164,7 +1164,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2-LABEL: icmp0_v4i64_v4i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
@@ -1174,7 +1174,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F-LABEL: icmp0_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1184,7 +1184,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW-LABEL: icmp0_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1194,7 +1194,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL-LABEL: icmp0_v4i64_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1207,8 +1207,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i32_v8i1(<8 x i32>) {
-; SSE-LABEL: icmp_v8i32_v8i1:
+define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
+; SSE-LABEL: icmp0_v8i32_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1220,7 +1220,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1-LABEL: icmp0_v8i32_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1233,7 +1233,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2-LABEL: icmp0_v8i32_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
@@ -1243,7 +1243,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F-LABEL: icmp0_v8i32_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1253,7 +1253,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW-LABEL: icmp0_v8i32_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1263,7 +1263,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL-LABEL: icmp0_v8i32_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1276,8 +1276,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
+define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
+; SSE-LABEL: icmp0_v16i16_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
@@ -1288,7 +1288,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1301,7 +1301,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1313,7 +1313,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F-LABEL: icmp0_v16i16_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1327,7 +1327,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW-LABEL: icmp0_v16i16_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1339,7 +1339,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL-LABEL: icmp0_v16i16_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1354,8 +1354,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i8_v32i1(<32 x i8>) {
-; SSE-LABEL: icmp_v32i8_v32i1:
+define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
+; SSE-LABEL: icmp0_v32i8_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -1366,7 +1366,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1379,7 +1379,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2-LABEL: icmp0_v32i8_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1392,7 +1392,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F-LABEL: icmp0_v32i8_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1413,7 +1413,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW-LABEL: icmp0_v32i8_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1426,7 +1426,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL-LABEL: icmp0_v32i8_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1442,8 +1442,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i64_v8i1(<8 x i64>) {
-; SSE2-LABEL: icmp_v8i64_v8i1:
+define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
+; SSE2-LABEL: icmp0_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1467,7 +1467,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE2-NEXT:    setnp %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41-LABEL: icmp0_v8i64_v8i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm4, %xmm4
 ; SSE41-NEXT:    pcmpeqq %xmm4, %xmm3
@@ -1483,7 +1483,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE41-NEXT:    setnp %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1-LABEL: icmp0_v8i64_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1501,7 +1501,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2-LABEL: icmp0_v8i64_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
@@ -1514,7 +1514,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F-LABEL: icmp0_v8i64_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1523,7 +1523,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW-LABEL: icmp0_v8i64_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1532,7 +1532,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL-LABEL: icmp0_v8i64_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1545,8 +1545,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i32_v16i1(<16 x i32>) {
-; SSE-LABEL: icmp_v16i32_v16i1:
+define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
+; SSE-LABEL: icmp0_v16i32_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1561,7 +1561,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1-LABEL: icmp0_v16i32_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1579,7 +1579,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2-LABEL: icmp0_v16i32_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
@@ -1594,7 +1594,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i32_v16i1:
+; AVX512F-LABEL: icmp0_v16i32_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1605,7 +1605,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i32_v16i1:
+; AVX512BW-LABEL: icmp0_v16i32_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1616,7 +1616,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i32_v16i1:
+; AVX512VL-LABEL: icmp0_v16i32_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1631,8 +1631,8 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp_v32i16_v32i1:
+define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
+; SSE-LABEL: icmp0_v32i16_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
@@ -1647,7 +1647,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1665,7 +1665,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
@@ -1681,7 +1681,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F-LABEL: icmp0_v32i16_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1703,7 +1703,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW-LABEL: icmp0_v32i16_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1715,7 +1715,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL-LABEL: icmp0_v32i16_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1731,8 +1731,8 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v64i8_v64i1(<64 x i8>) {
-; SSE-LABEL: icmp_v64i8_v64i1:
+define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
+; SSE-LABEL: icmp0_v64i8_v64i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqb %xmm4, %xmm2
@@ -1747,7 +1747,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1-LABEL: icmp0_v64i8_v64i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm3
@@ -1765,7 +1765,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2-LABEL: icmp0_v64i8_v64i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
@@ -1780,7 +1780,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F-LABEL: icmp0_v64i8_v64i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1807,7 +1807,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW-LABEL: icmp0_v64i8_v64i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovq %k0, %rax
@@ -1822,7 +1822,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL-LABEL: icmp0_v64i8_v64i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovq %k0, %rax

From 0553f5e61ac7e919c3eb573f57ed0e1096ccbbeb Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Mon, 24 Jan 2022 14:04:42 +0000
Subject: [PATCH 386/946] [X86] Add cmp-equality bool reductions

PR53379 test coverage
---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 844 ++++++++++++++++
 .../test/CodeGen/X86/vector-reduce-or-bool.ll | 836 ++++++++++++++++
 .../CodeGen/X86/vector-reduce-xor-bool.ll     | 927 ++++++++++++++++++
 3 files changed, 2607 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index c1ddc9454b939..c9aef7b8e7404 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1662,6 +1662,850 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
   ret i1 %b
 }
 
+;
+; Comparison
+;
+
+define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
+; SSE2-LABEL: icmp_v2i64_v2i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    cmpb $3, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    movmskpd %xmm0, %eax
+; SSE41-NEXT:    cmpb $3, %al
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskpd %xmm0, %eax
+; AVX-NEXT:    cmpb $3, %al
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $3, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $3, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $3, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <2 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
+; SSE-LABEL: icmp_v4i32_v4i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    movmskps %xmm0, %eax
+; SSE-NEXT:    cmpb $15, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    cmpb $15, %al
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $15, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
+; SSE-LABEL: icmp_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpb $-1, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    cmpb $-1, %al
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    cmpb $-1, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    cmpb $-1, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $-1, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
+; SSE-LABEL: icmp_v16i8_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    cmpw $-1, %ax
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
+; AVX512F-NEXT:    cmpw $-1, %ax
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
+; SSE2-LABEL: icmp_v4i64_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    movmskps %xmm1, %eax
+; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    movmskps %xmm0, %eax
+; SSE41-NEXT:    cmpb $15, %al
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskpd %ymm0, %eax
+; AVX1-NEXT:    cmpb $15, %al
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskpd %ymm0, %eax
+; AVX2-NEXT:    cmpb $15, %al
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $15, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
+; SSE-LABEL: icmp_v8i32_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpb $-1, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    cmpb $-1, %al
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    cmpb $-1, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    cmpb $-1, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $-1, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
+; SSE-LABEL: icmp_v16i16_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setb %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
+; SSE-LABEL: icmp_v32i8_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
+; SSE2-LABEL: icmp_v8i64_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
+; SSE2-NEXT:    pand %xmm2, %xmm3
+; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm2, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    packsswb %xmm1, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    cmpb $-1, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm7, %xmm3
+; SSE41-NEXT:    pcmpeqq %xmm6, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    packssdw %xmm2, %xmm0
+; SSE41-NEXT:    packsswb %xmm0, %xmm0
+; SSE41-NEXT:    pmovmskb %xmm0, %eax
+; SSE41-NEXT:    cmpb $-1, %al
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    cmpb $-1, %al
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    cmpb $-1, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    cmpb $-1, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $-1, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
+; SSE-LABEL: icmp_v16i32_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX2-NEXT:    cmpw $-1, %ax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    setb %al
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %a = icmp eq <16 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
+; SSE-LABEL: icmp_v32i16_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
+; SSE-NEXT:    packsswb %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm0, %xmm2
+; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
+; SSE-LABEL: icmp_v64i8_v64i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpand %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpand %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT:    vpand %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestq %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <64 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
+  ret i1 %b
+}
+
 declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
 declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index b73ff78330c7b..6409a8ff4761f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -1671,6 +1671,842 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
   ret i1 %b
 }
 
+; Comparison
+;
+
+define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
+; SSE2-LABEL: icmp_v2i64_v2i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    movmskpd %xmm0, %eax
+; SSE41-NEXT:    testl %eax, %eax
+; SSE41-NEXT:    setne %al
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskpd %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $3, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $3, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <2 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
+; SSE-LABEL: icmp_v4i32_v4i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    movmskps %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
+; SSE-LABEL: icmp_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
+; SSE-LABEL: icmp_v16i8_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
+; AVX512F-NEXT:    testl %eax, %eax
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
+; SSE2-LABEL: icmp_v4i64_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    movmskps %xmm1, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    movmskps %xmm0, %eax
+; SSE41-NEXT:    testl %eax, %eax
+; SSE41-NEXT:    setne %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskpd %ymm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskpd %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
+; SSE-LABEL: icmp_v8i32_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
+; SSE-LABEL: icmp_v16i16_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
+; SSE-LABEL: icmp_v32i8_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
+; SSE2-LABEL: icmp_v8i64_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
+; SSE2-NEXT:    pand %xmm2, %xmm3
+; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm2, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    testl $43690, %eax # imm = 0xAAAA
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm7, %xmm3
+; SSE41-NEXT:    pcmpeqq %xmm6, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    packssdw %xmm2, %xmm0
+; SSE41-NEXT:    pmovmskb %xmm0, %eax
+; SSE41-NEXT:    testl $43690, %eax # imm = 0xAAAA
+; SSE41-NEXT:    setne %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
+; SSE-LABEL: icmp_v16i32_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %a = icmp eq <16 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
+; SSE-LABEL: icmp_v32i16_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
+; SSE-NEXT:    packsswb %xmm3, %xmm2
+; SSE-NEXT:    por %xmm0, %xmm2
+; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
+; SSE-LABEL: icmp_v64i8_v64i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    por %xmm3, %xmm1
+; SSE-NEXT:    por %xmm2, %xmm1
+; SSE-NEXT:    por %xmm0, %xmm1
+; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpor %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT:    vpor %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestq %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <64 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a)
+  ret i1 %b
+}
+
 declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
 declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 9fb94c386bab8..493a1168a84ae 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -1841,6 +1841,933 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
   ret i1 %b
 }
 
+; Comparison
+;
+
+define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
+; SSE2-LABEL: icmp_v2i64_v2i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    testb %al, %al
+; SSE2-NEXT:    setnp %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    movmskpd %xmm0, %eax
+; SSE41-NEXT:    testb %al, %al
+; SSE41-NEXT:    setnp %al
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskpd %xmm0, %eax
+; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $3, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $3, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb $3, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <2 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
+; SSE-LABEL: icmp_v4i32_v4i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    movmskps %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb $15, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
+; SSE-LABEL: icmp_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
+; SSE-LABEL: icmp_v16i8_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    xorb %ah, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
+; AVX512F-NEXT:    xorb %ah, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $8, %ecx
+; AVX512BW-NEXT:    xorb %al, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $8, %ecx
+; AVX512VL-NEXT:    xorb %al, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
+; SSE2-LABEL: icmp_v4i64_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    movmskps %xmm1, %eax
+; SSE2-NEXT:    testb %al, %al
+; SSE2-NEXT:    setnp %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    movmskps %xmm0, %eax
+; SSE41-NEXT:    testb %al, %al
+; SSE41-NEXT:    setnp %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskpd %ymm0, %eax
+; AVX1-NEXT:    testb %al, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskpd %ymm0, %eax
+; AVX2-NEXT:    testb %al, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb $15, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
+; SSE-LABEL: icmp_v8i32_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testb %al, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testb %al, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
+; SSE-LABEL: icmp_v16i16_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX2-NEXT:    xorb %ah, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    movl %eax, %ecx
+; AVX512F-NEXT:    shrl $8, %ecx
+; AVX512F-NEXT:    xorb %al, %cl
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $8, %ecx
+; AVX512BW-NEXT:    xorb %al, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $8, %ecx
+; AVX512VL-NEXT:    xorb %al, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
+; SSE-LABEL: icmp_v32i8_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $16, %ecx
+; AVX2-NEXT:    xorl %eax, %ecx
+; AVX2-NEXT:    xorb %ch, %cl
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $16, %ecx
+; AVX512BW-NEXT:    xorl %eax, %ecx
+; AVX512BW-NEXT:    xorb %ch, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $16, %ecx
+; AVX512VL-NEXT:    xorl %eax, %ecx
+; AVX512VL-NEXT:    xorb %ch, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
+; SSE2-LABEL: icmp_v8i64_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
+; SSE2-NEXT:    pand %xmm2, %xmm3
+; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm2, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    packsswb %xmm1, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    testb %al, %al
+; SSE2-NEXT:    setnp %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm7, %xmm3
+; SSE41-NEXT:    pcmpeqq %xmm6, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    packssdw %xmm2, %xmm0
+; SSE41-NEXT:    packsswb %xmm0, %xmm0
+; SSE41-NEXT:    pmovmskb %xmm0, %eax
+; SSE41-NEXT:    testb %al, %al
+; SSE41-NEXT:    setnp %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testb %al, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testb %al, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
+; SSE-LABEL: icmp_v16i32_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX2-NEXT:    xorb %ah, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i32_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    movl %eax, %ecx
+; AVX512F-NEXT:    shrl $8, %ecx
+; AVX512F-NEXT:    xorb %al, %cl
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i32_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $8, %ecx
+; AVX512BW-NEXT:    xorb %al, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i32_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $8, %ecx
+; AVX512VL-NEXT:    xorb %al, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
+; SSE-LABEL: icmp_v32i16_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
+; SSE-NEXT:    packsswb %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm0, %xmm2
+; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $16, %ecx
+; AVX2-NEXT:    xorl %eax, %ecx
+; AVX2-NEXT:    xorb %ch, %cl
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $16, %ecx
+; AVX512BW-NEXT:    xorl %eax, %ecx
+; AVX512BW-NEXT:    xorb %ch, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $16, %ecx
+; AVX512VL-NEXT:    xorl %eax, %ecx
+; AVX512VL-NEXT:    xorb %ch, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
+; SSE-LABEL: icmp_v64i8_v64i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm0, %xmm1
+; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpxor %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $16, %ecx
+; AVX2-NEXT:    xorl %eax, %ecx
+; AVX2-NEXT:    xorb %ch, %cl
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpxor %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT:    vpxor %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    movq %rax, %rcx
+; AVX512BW-NEXT:    shrq $32, %rcx
+; AVX512BW-NEXT:    xorl %eax, %ecx
+; AVX512BW-NEXT:    movl %ecx, %eax
+; AVX512BW-NEXT:    shrl $16, %eax
+; AVX512BW-NEXT:    xorl %ecx, %eax
+; AVX512BW-NEXT:    xorb %ah, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovq %k0, %rax
+; AVX512VL-NEXT:    movq %rax, %rcx
+; AVX512VL-NEXT:    shrq $32, %rcx
+; AVX512VL-NEXT:    xorl %eax, %ecx
+; AVX512VL-NEXT:    movl %ecx, %eax
+; AVX512VL-NEXT:    shrl $16, %eax
+; AVX512VL-NEXT:    xorl %ecx, %eax
+; AVX512VL-NEXT:    xorb %ah, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <64 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
+  ret i1 %b
+}
+
 declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>)
 declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>)

From 34aedbe90d7667a3fd1e0427808648364b594034 Mon Sep 17 00:00:00 2001
From: Paul Walker 
Date: Mon, 24 Jan 2022 12:03:16 +0000
Subject: [PATCH 387/946] [AArch64] Regenerate CHECK lines for
 llvm/test/CodeGen/AArch64/sve2-int-mul.ll

---
 llvm/test/CodeGen/AArch64/sve2-int-mul.ll | 208 +++++++++++++---------
 1 file changed, 121 insertions(+), 87 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
index 6e495b0989c22..57d5775f7c39e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
@@ -1,13 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; MUL with SPLAT
 ;
 define  @mul_i16_imm( %a) {
-; CHECK-LABEL: mul_i16_imm
-; CHECK: mov w[[W:[0-9]+]], #255
-; CHECK-NEXT: mov z1.h, w[[W]]
-; CHECK-NEXT: mul z0.h, z0.h, z1.h
+; CHECK-LABEL: mul_i16_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %elt = insertelement  undef, i16 255, i32 0
   %splat = shufflevector  %elt,  undef,  zeroinitializer
   %res = mul  %a, %splat
@@ -15,10 +18,12 @@ define  @mul_i16_imm( %a) {
 }
 
 define  @mul_i16_imm_neg( %a) {
-; CHECK-LABEL: mul_i16_imm_neg
-; CHECK: mov w[[W:[0-9]+]], #-200
-; CHECK-NEXT: mov z1.h, w[[W]]
-; CHECK-NEXT: mul z0.h, z0.h, z1.h
+; CHECK-LABEL: mul_i16_imm_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-200
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %elt = insertelement  undef, i16 -200, i32 0
   %splat = shufflevector  %elt,  undef,  zeroinitializer
   %res = mul  %a, %splat
@@ -26,10 +31,12 @@ define  @mul_i16_imm_neg( %a) {
 }
 
 define  @mul_i32_imm( %a) {
-; CHECK-LABEL: mul_i32_imm
-; CHECK: mov w[[W:[0-9]+]], #255
-; CHECK-NEXT: mov z1.s, w[[W]]
-; CHECK-NEXT: mul z0.s, z0.s, z1.s
+; CHECK-LABEL: mul_i32_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %elt = insertelement  undef, i32 255, i32 0
   %splat = shufflevector  %elt,  undef,  zeroinitializer
   %res = mul  %a, %splat
@@ -37,10 +44,12 @@ define  @mul_i32_imm( %a) {
 }
 
 define  @mul_i32_imm_neg( %a) {
-; CHECK-LABEL: mul_i32_imm_neg
-; CHECK: mov w[[W:[0-9]+]], #-200
-; CHECK-NEXT: mov z1.s, w[[W]]
-; CHECK-NEXT: mul z0.s, z0.s, z1.s
+; CHECK-LABEL: mul_i32_imm_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-200
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %elt = insertelement  undef, i32 -200, i32 0
   %splat = shufflevector  %elt,  undef,  zeroinitializer
   %res = mul  %a, %splat
@@ -48,10 +57,12 @@ define  @mul_i32_imm_neg( %a) {
 }
 
 define  @mul_i64_imm( %a) {
-; CHECK-LABEL: mul_i64_imm
-; CHECK: mov w[[X:[0-9]+]], #255
-; CHECK-NEXT: z1.d, x[[X]]
-; CHECK-NEXT: mul z0.d, z0.d, z1.d
+; CHECK-LABEL: mul_i64_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %elt = insertelement  undef, i64 255, i32 0
   %splat = shufflevector  %elt,  undef,  zeroinitializer
   %res = mul  %a, %splat
@@ -59,10 +70,12 @@ define  @mul_i64_imm( %a) {
 }
 
 define  @mul_i64_imm_neg( %a) {
-; CHECK-LABEL: mul_i64_imm_neg
-; CHECK: mov x[[X:[0-9]+]], #-200
-; CHECK-NEXT: z1.d, x[[X]]
-; CHECK-NEXT: mul z0.d, z0.d, z1.d
+; CHECK-LABEL: mul_i64_imm_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-200
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %elt = insertelement  undef, i64 -200, i32 0
   %splat = shufflevector  %elt,  undef,  zeroinitializer
   %res = mul  %a, %splat
@@ -73,37 +86,41 @@ define  @mul_i64_imm_neg( %a) {
 ; MUL (vector, unpredicated)
 ;
 define  @mul_i8( %a,
+; CHECK-LABEL: mul_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                    %b) {
-; CHECK-LABEL: mul_i8
-; CHECK: mul z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = mul  %a, %b
   ret  %res
 }
 
 define  @mul_i16( %a,
+; CHECK-LABEL: mul_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                    %b) {
-; CHECK-LABEL: mul_i16
-; CHECK: mul z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %res = mul  %a, %b
   ret  %res
 }
 
 define  @mul_i32( %a,
+; CHECK-LABEL: mul_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                    %b) {
-; CHECK-LABEL: mul_i32
-; CHECK: mul z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %res = mul  %a, %b
   ret  %res
 }
 
 define  @mul_i64( %a,
+; CHECK-LABEL: mul_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                    %b) {
-; CHECK-LABEL: mul_i64
-; CHECK: mul z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %res = mul  %a, %b
   ret  %res
 }
@@ -112,10 +129,11 @@ define  @mul_i64( %a,
 ; SMULH (vector, unpredicated)
 ;
 define  @smulh_i8( %a,
+; CHECK-LABEL: smulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                      %b) {
-; CHECK-LABEL: smulh_i8
-; CHECK: smulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %res = call  @llvm.aarch64.sve.smulh.nxv16i8( %sel,  %a,
                                                                   %b)
@@ -123,10 +141,11 @@ define  @smulh_i8( %a,
 }
 
 define  @smulh_i16( %a,
+; CHECK-LABEL: smulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                       %b) {
-; CHECK-LABEL: smulh_i16
-; CHECK: smulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %res = call  @llvm.aarch64.sve.smulh.nxv8i16( %sel,  %a,
                                                                   %b)
@@ -134,10 +153,11 @@ define  @smulh_i16( %a,
 }
 
 define  @smulh_i32( %a,
+; CHECK-LABEL: smulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                       %b) {
-; CHECK-LABEL: smulh_i32
-; CHECK: smulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %res = call  @llvm.aarch64.sve.smulh.nxv4i32( %sel,  %a,
                                                                   %b)
@@ -145,10 +165,11 @@ define  @smulh_i32( %a,
 }
 
 define  @smulh_i64( %a,
+; CHECK-LABEL: smulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                       %b) {
-; CHECK-LABEL: smulh_i64
-; CHECK: smulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %res = call  @llvm.aarch64.sve.smulh.nxv2i64( %sel,  %a,
                                                                   %b)
@@ -159,10 +180,11 @@ define  @smulh_i64( %a,
 ; UMULH (vector, unpredicated)
 ;
 define  @umulh_i8( %a,
+; CHECK-LABEL: umulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                      %b) {
-; CHECK-LABEL: umulh_i8
-; CHECK: umulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %res = call  @llvm.aarch64.sve.umulh.nxv16i8( %sel,  %a,
                                                                   %b)
@@ -170,10 +192,11 @@ define  @umulh_i8( %a,
 }
 
 define  @umulh_i16( %a,
+; CHECK-LABEL: umulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                       %b) {
-; CHECK-LABEL: umulh_i16
-; CHECK: umulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %res = call  @llvm.aarch64.sve.umulh.nxv8i16( %sel,  %a,
                                                                   %b)
@@ -181,10 +204,11 @@ define  @umulh_i16( %a,
 }
 
 define  @umulh_i32( %a,
+; CHECK-LABEL: umulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                       %b) {
-; CHECK-LABEL: umulh_i32
-; CHECK: umulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %res = call  @llvm.aarch64.sve.umulh.nxv4i32( %sel,  %a,
                                                                   %b)
@@ -192,10 +216,11 @@ define  @umulh_i32( %a,
 }
 
 define  @umulh_i64( %a,
+; CHECK-LABEL: umulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                       %b) {
-; CHECK-LABEL: umulh_i64
-; CHECK: umulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %sel = call  @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %res = call  @llvm.aarch64.sve.umulh.nxv2i64( %sel,  %a,
                                                                   %b)
@@ -206,10 +231,11 @@ define  @umulh_i64( %a,
 ; PMUL (vector, unpredicated)
 ;
 define  @pmul_i8( %a,
+; CHECK-LABEL: pmul_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmul z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                     %b) {
-; CHECK-LABEL: pmul_i8
-; CHECK: pmul z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.pmul.nxv16i8( %a,
                                                                  %b)
   ret  %res
@@ -219,40 +245,44 @@ define  @pmul_i8( %a,
 ; SQDMULH (vector, unpredicated)
 ;
 define  @sqdmulh_i8( %a,
+; CHECK-LABEL: sqdmulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                        %b) {
-; CHECK-LABEL: sqdmulh_i8
-; CHECK: sqdmulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqdmulh.nxv16i8( %a,
                                                                     %b)
   ret  %res
 }
 
 define  @sqdmulh_i16( %a,
+; CHECK-LABEL: sqdmulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                         %b) {
-; CHECK-LABEL: sqdmulh_i16
-; CHECK: sqdmulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqdmulh.nxv8i16( %a,
                                                                     %b)
   ret  %res
 }
 
 define  @sqdmulh_i32( %a,
+; CHECK-LABEL: sqdmulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                         %b) {
-; CHECK-LABEL: sqdmulh_i32
-; CHECK: sqdmulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqdmulh.nxv4i32( %a,
                                                                     %b)
   ret  %res
 }
 
 define  @sqdmulh_i64( %a,
+; CHECK-LABEL: sqdmulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                         %b) {
-; CHECK-LABEL: sqdmulh_i64
-; CHECK: sqdmulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqdmulh.nxv2i64( %a,
                                                                     %b)
   ret  %res
@@ -262,40 +292,44 @@ define  @sqdmulh_i64( %a,
 ; SQRDMULH (vector, unpredicated)
 ;
 define  @sqrdmulh_i8( %a,
+; CHECK-LABEL: sqrdmulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                         %b) {
-; CHECK-LABEL: sqrdmulh_i8
-; CHECK: sqrdmulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqrdmulh.nxv16i8( %a,
                                                                      %b)
   ret  %res
 }
 
 define  @sqrdmulh_i16( %a,
+; CHECK-LABEL: sqrdmulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                          %b) {
-; CHECK-LABEL: sqrdmulh_i16
-; CHECK: sqrdmulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqrdmulh.nxv8i16( %a,
                                                                      %b)
   ret  %res
 }
 
 define  @sqrdmulh_i32( %a,
+; CHECK-LABEL: sqrdmulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                          %b) {
-; CHECK-LABEL: sqrdmulh_i32
-; CHECK: sqrdmulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqrdmulh.nxv4i32( %a,
                                                                      %b)
   ret  %res
 }
 
 define  @sqrdmulh_i64( %a,
+; CHECK-LABEL: sqrdmulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                          %b) {
-; CHECK-LABEL: sqrdmulh_i64
-; CHECK: sqrdmulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %res = call  @llvm.aarch64.sve.sqrdmulh.nxv2i64( %a,
                                                                      %b)
   ret  %res

From 5e5efd8a91f2e340e79a73bedbc6ab66ad4a4281 Mon Sep 17 00:00:00 2001
From: ksyx <18738953+ksyx@users.noreply.github.com>
Date: Mon, 17 Jan 2022 18:43:49 -0500
Subject: [PATCH 388/946] [clang-format] Fix SeparateDefinitionBlocks issues

- Fixes https://github.com/llvm/llvm-project/issues/53227 that wrongly
  indents multiline comments
- Fixes wrong detection of single-line opening braces when used along
  with those only opening scopes, causing crashes due to duplicated
  replacements on the same token:
    void foo()
    {
      {
        int x;
      }
    }
- Fixes wrong recognition of first line of definition when the line
  starts with block comment, causing crashes due to duplicated
  replacements on the same token for this leads toward skipping the line
  starting with inline block comment:
    /*
      Some descriptions about function
    */
    /*inline*/ void bar() {
    }
- Fixes wrong recognition of enum when used as a type name rather than
  starting definition block, causing crashes due to duplicated
  replacements on the same token since both actions for enum and for
  definition blocks were taken place:
    void foobar(const enum EnumType e) {
    }
- Change to use function keyword for JavaScript instead of comparing
  strings
- Resolves formatting conflict with options EmptyLineAfterAccessModifier
  and EmptyLineBeforeAccessModifier (prompts with --dry-run (-n) or
  --output-replacement-xml but no observable change)
- Recognize long (len>=5) uppercased name taking a single line as return
  type and fix the problem of adding newline below it, with adding new
  token type FunctionLikeOrFreestandingMacro and marking tokens in
  UnwrappedLineParser:
    void
    afunc(int x) {
      return;
    }
    TYPENAME
    func(int x, int y) {
      // ...
    }
- Remove redundant and repeated initialization
- Do no change to newlines before EOF

Reviewed By: MyDeveloperDay, curdeius, HazardyKnusperkeks
Differential Revision: https://reviews.llvm.org/D117520
---
 clang/lib/Format/DefinitionBlockSeparator.cpp |  86 ++++++++++---
 clang/lib/Format/DefinitionBlockSeparator.h   |   2 +-
 clang/lib/Format/FormatToken.h                |   1 +
 clang/lib/Format/TokenAnnotator.cpp           |   2 +-
 clang/lib/Format/UnwrappedLineParser.cpp      |   3 +
 .../Format/DefinitionBlockSeparatorTest.cpp   | 117 +++++++++++++++---
 6 files changed, 169 insertions(+), 42 deletions(-)

diff --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp
index d09cd0bd33fbe..827564357f788 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.cpp
+++ b/clang/lib/Format/DefinitionBlockSeparator.cpp
@@ -25,22 +25,27 @@ std::pair DefinitionBlockSeparator::analyze(
   assert(Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave);
   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
   tooling::Replacements Result;
-  separateBlocks(AnnotatedLines, Result);
+  separateBlocks(AnnotatedLines, Result, Tokens);
   return {Result, 0};
 }
 
 void DefinitionBlockSeparator::separateBlocks(
-    SmallVectorImpl &Lines, tooling::Replacements &Result) {
+    SmallVectorImpl &Lines, tooling::Replacements &Result,
+    FormatTokenLexer &Tokens) {
   const bool IsNeverStyle =
       Style.SeparateDefinitionBlocks == FormatStyle::SDS_Never;
-  auto LikelyDefinition = [this](const AnnotatedLine *Line) {
+  const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
+  auto LikelyDefinition = [this, ExtraKeywords](const AnnotatedLine *Line,
+                                                bool ExcludeEnum = false) {
     if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
         Line->startsWithNamespace())
       return true;
     FormatToken *CurrentToken = Line->First;
     while (CurrentToken) {
-      if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_enum) ||
-          (Style.isJavaScript() && CurrentToken->TokenText == "function"))
+      if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct) ||
+          (Style.isJavaScript() && CurrentToken->is(ExtraKeywords.kw_function)))
+        return true;
+      if (!ExcludeEnum && CurrentToken->is(tok::kw_enum))
         return true;
       CurrentToken = CurrentToken->Next;
     }
@@ -63,18 +68,25 @@ void DefinitionBlockSeparator::separateBlocks(
     AnnotatedLine *TargetLine;
     auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
     AnnotatedLine *OpeningLine = nullptr;
+    const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
+      return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
+    };
     const auto InsertReplacement = [&](const int NewlineToInsert) {
       assert(TargetLine);
       assert(TargetToken);
 
       // Do not handle EOF newlines.
-      if (TargetToken->is(tok::eof) && NewlineToInsert > 0)
+      if (TargetToken->is(tok::eof))
+        return;
+      if (IsAccessSpecifierToken(TargetToken) ||
+          (OpeningLineIndex > 0 &&
+           IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First)))
         return;
       if (!TargetLine->Affected)
         return;
       Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert,
-                                    TargetToken->SpacesRequiredBefore - 1,
-                                    TargetToken->StartsColumn);
+                                    TargetToken->OriginalColumn,
+                                    TargetToken->OriginalColumn);
     };
     const auto IsPPConditional = [&](const size_t LineIndex) {
       const auto &Line = Lines[LineIndex];
@@ -89,26 +101,57 @@ void DefinitionBlockSeparator::separateBlocks(
              Lines[OpeningLineIndex - 1]->Last->opensScope() ||
              IsPPConditional(OpeningLineIndex - 1);
     };
-    const auto HasEnumOnLine = [CurrentLine]() {
+    const auto HasEnumOnLine = [&]() {
       FormatToken *CurrentToken = CurrentLine->First;
+      bool FoundEnumKeyword = false;
       while (CurrentToken) {
         if (CurrentToken->is(tok::kw_enum))
+          FoundEnumKeyword = true;
+        else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace))
           return true;
         CurrentToken = CurrentToken->Next;
       }
-      return false;
+      return FoundEnumKeyword && I + 1 < Lines.size() &&
+             Lines[I + 1]->First->is(tok::l_brace);
     };
 
     bool IsDefBlock = false;
     const auto MayPrecedeDefinition = [&](const int Direction = -1) {
+      assert(Direction >= -1);
+      assert(Direction <= 1);
       const size_t OperateIndex = OpeningLineIndex + Direction;
       assert(OperateIndex < Lines.size());
       const auto &OperateLine = Lines[OperateIndex];
-      return (Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare)) ||
-             OperateLine->First->is(tok::comment);
+      if (LikelyDefinition(OperateLine))
+        return false;
+
+      if (OperateLine->First->is(tok::comment))
+        return true;
+
+      // A single line identifier that is not in the last line.
+      if (OperateLine->First->is(tok::identifier) &&
+          OperateLine->First == OperateLine->Last &&
+          OperateIndex + 1 < Lines.size()) {
+        // UnwrappedLineParser's recognition of free-standing macro like
+        // Q_OBJECT may also recognize some uppercased type names that may be
+        // used as return type as that kind of macros, which is a bit hard to
+        // distinguish one from another purely from token patterns. Here, we
+        // try not to add new lines below those identifiers.
+        AnnotatedLine *NextLine = Lines[OperateIndex + 1];
+        if (NextLine->MightBeFunctionDecl &&
+            NextLine->mightBeFunctionDefinition() &&
+            NextLine->First->NewlinesBefore == 1 &&
+            OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro))
+          return true;
+      }
+
+      if ((Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare)))
+        return true;
+      return false;
     };
 
-    if (HasEnumOnLine()) {
+    if (HasEnumOnLine() &&
+        !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
       // We have no scope opening/closing information for enum.
       IsDefBlock = true;
       OpeningLineIndex = I;
@@ -132,8 +175,13 @@ void DefinitionBlockSeparator::separateBlocks(
     } else if (CurrentLine->First->closesScope()) {
       if (OpeningLineIndex > Lines.size())
         continue;
-      // Handling the case that opening bracket has its own line.
-      OpeningLineIndex -= Lines[OpeningLineIndex]->First->is(tok::l_brace);
+      // Handling the case that opening brace has its own line, with checking
+      // whether the last line already had an opening brace to guard against
+      // misrecognition.
+      if (OpeningLineIndex > 0 &&
+          Lines[OpeningLineIndex]->First->is(tok::l_brace) &&
+          Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace))
+        --OpeningLineIndex;
       OpeningLine = Lines[OpeningLineIndex];
       // Closing a function definition.
       if (LikelyDefinition(OpeningLine)) {
@@ -168,15 +216,13 @@ void DefinitionBlockSeparator::separateBlocks(
           ++OpeningLineIndex;
         TargetLine = Lines[OpeningLineIndex];
         if (!LikelyDefinition(TargetLine)) {
+          OpeningLineIndex = I + 1;
           TargetLine = Lines[I + 1];
           TargetToken = TargetLine->First;
           InsertReplacement(NewlineCount);
         }
-      } else if (IsNeverStyle) {
-        TargetLine = Lines[I + 1];
-        TargetToken = TargetLine->First;
-        InsertReplacement(OpeningLineIndex != 0);
-      }
+      } else if (IsNeverStyle)
+        InsertReplacement(/*NewlineToInsert=*/1);
     }
   }
   for (const auto &R : Whitespaces.generateReplacements())
diff --git a/clang/lib/Format/DefinitionBlockSeparator.h b/clang/lib/Format/DefinitionBlockSeparator.h
index 13b90c5ab083d..31c0f34d6e198 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.h
+++ b/clang/lib/Format/DefinitionBlockSeparator.h
@@ -33,7 +33,7 @@ class DefinitionBlockSeparator : public TokenAnalyzer {
 
 private:
   void separateBlocks(SmallVectorImpl &Lines,
-                      tooling::Replacements &Result);
+                      tooling::Replacements &Result, FormatTokenLexer &Tokens);
 };
 } // namespace format
 } // namespace clang
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index b087f9f120411..7cc090cb77def 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -51,6 +51,7 @@ namespace format {
   TYPE(FunctionAnnotationRParen)                                               \
   TYPE(FunctionDeclarationName)                                                \
   TYPE(FunctionLBrace)                                                         \
+  TYPE(FunctionLikeOrFreestandingMacro)                                        \
   TYPE(FunctionTypeLParen)                                                     \
   TYPE(IfMacro)                                                                \
   TYPE(ImplicitStringLiteral)                                                  \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index cc8b48387fc9e..b9535f7965598 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1423,7 +1423,7 @@ class AnnotatingParser {
             TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
             TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
             TT_UntouchableMacroFunc, TT_ConstraintJunctions,
-            TT_StatementAttributeLikeMacro))
+            TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro))
       CurrentToken->setType(TT_Unknown);
     CurrentToken->Role.reset();
     CurrentToken->MatchingParen = nullptr;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 67b7b3937b07e..96d227b7fe763 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1682,6 +1682,8 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
 
       // See if the following token should start a new unwrapped line.
       StringRef Text = FormatTok->TokenText;
+
+      FormatToken *PreviousToken = FormatTok;
       nextToken();
 
       // JS doesn't have macros, and within classes colons indicate fields, not
@@ -1710,6 +1712,7 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
 
         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
+          PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
           addUnwrappedLine();
           return;
         }
diff --git a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
index 69c87cb4b51fd..4cbae0f55b036 100644
--- a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
+++ b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
@@ -131,6 +131,73 @@ TEST_F(DefinitionBlockSeparatorTest, Basic) {
                "\n"
                "enum Bar { FOOBAR, BARFOO };\n",
                Style);
+
+  FormatStyle BreakAfterReturnTypeStyle = Style;
+  BreakAfterReturnTypeStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All;
+  // Test uppercased long typename
+  verifyFormat("class Foo {\n"
+               "  void\n"
+               "  Bar(int t, int p) {\n"
+               "    int r = t + p;\n"
+               "    return r;\n"
+               "  }\n"
+               "\n"
+               "  HRESULT\n"
+               "  Foobar(int t, int p) {\n"
+               "    int r = t * p;\n"
+               "    return r;\n"
+               "  }\n"
+               "}\n",
+               BreakAfterReturnTypeStyle);
+}
+
+TEST_F(DefinitionBlockSeparatorTest, FormatConflict) {
+  FormatStyle Style = getLLVMStyle();
+  Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always;
+  llvm::StringRef Code = "class Test {\n"
+                         "public:\n"
+                         "  static void foo() {\n"
+                         "    int t;\n"
+                         "    return 1;\n"
+                         "  }\n"
+                         "};";
+  std::vector Ranges = {1, tooling::Range(0, Code.size())};
+  EXPECT_EQ(reformat(Style, Code, Ranges, "").size(), 0u);
+}
+
+TEST_F(DefinitionBlockSeparatorTest, CommentBlock) {
+  FormatStyle Style = getLLVMStyle();
+  Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always;
+  std::string Prefix = "enum Foo { FOO, BAR };\n"
+                       "\n"
+                       "/*\n"
+                       "test1\n"
+                       "test2\n"
+                       "*/\n"
+                       "int foo(int i, int j) {\n"
+                       "  int r = i + j;\n"
+                       "  return r;\n"
+                       "}\n";
+  std::string Suffix = "enum Bar { FOOBAR, BARFOO };\n"
+                       "\n"
+                       "/* Comment block in one line*/\n"
+                       "int bar3(int j, int k) {\n"
+                       "  // A comment\n"
+                       "  int r = j % k;\n"
+                       "  return r;\n"
+                       "}\n";
+  std::string CommentedCode = "/*\n"
+                              "int bar2(int j, int k) {\n"
+                              "  int r = j / k;\n"
+                              "  return r;\n"
+                              "}\n"
+                              "*/\n";
+  verifyFormat(removeEmptyLines(Prefix) + "\n" + CommentedCode + "\n" +
+                   removeEmptyLines(Suffix),
+               Style, Prefix + "\n" + CommentedCode + "\n" + Suffix);
+  verifyFormat(removeEmptyLines(Prefix) + "\n" + CommentedCode +
+                   removeEmptyLines(Suffix),
+               Style, Prefix + "\n" + CommentedCode + Suffix);
 }
 
 TEST_F(DefinitionBlockSeparatorTest, UntouchBlockStartStyle) {
@@ -175,13 +242,15 @@ TEST_F(DefinitionBlockSeparatorTest, UntouchBlockStartStyle) {
   FormatStyle NeverStyle = getLLVMStyle();
   NeverStyle.SeparateDefinitionBlocks = FormatStyle::SDS_Never;
 
-  auto TestKit = MakeUntouchTest("#ifdef FOO\n\n", "\n#elifndef BAR\n\n",
-                                 "\n#endif\n\n", false);
+  auto TestKit = MakeUntouchTest("/* FOOBAR */\n"
+                                 "#ifdef FOO\n\n",
+                                 "\n#elifndef BAR\n\n", "\n#endif\n\n", false);
   verifyFormat(TestKit.first, AlwaysStyle, TestKit.second);
   verifyFormat(TestKit.second, NeverStyle, removeEmptyLines(TestKit.second));
 
-  TestKit =
-      MakeUntouchTest("#ifdef FOO\n", "#elifndef BAR\n", "#endif\n", false);
+  TestKit = MakeUntouchTest("/* FOOBAR */\n"
+                            "#ifdef FOO\n",
+                            "#elifndef BAR\n", "#endif\n", false);
   verifyFormat(TestKit.first, AlwaysStyle, TestKit.second);
   verifyFormat(TestKit.second, NeverStyle, removeEmptyLines(TestKit.second));
 
@@ -213,7 +282,7 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "test1\n"
                       "test2\n"
                       "*/\n"
-                      "int foo(int i, int j) {\n"
+                      "/*const*/ int foo(int i, int j) {\n"
                       "  int r = i + j;\n"
                       "  return r;\n"
                       "}\n"
@@ -225,8 +294,10 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "// Comment line 2\n"
                       "// Comment line 3\n"
                       "int bar(int j, int k) {\n"
-                      "  int r = j * k;\n"
-                      "  return r;\n"
+                      "  {\n"
+                      "    int r = j * k;\n"
+                      "    return r;\n"
+                      "  }\n"
                       "}\n"
                       "\n"
                       "int bar2(int j, int k) {\n"
@@ -237,7 +308,7 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "/* Comment block in one line*/\n"
                       "enum Bar { FOOBAR, BARFOO };\n"
                       "\n"
-                      "int bar3(int j, int k) {\n"
+                      "int bar3(int j, int k, const enum Bar b) {\n"
                       "  // A comment\n"
                       "  int r = j % k;\n"
                       "  return r;\n"
@@ -264,7 +335,7 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "test1\n"
                         "test2\n"
                         "*/\n"
-                        "int foo(int i, int j) {\n"
+                        "/*const*/ int foo(int i, int j) {\n"
                         "  int r = i + j;\n"
                         "  return r;\n"
                         "}\n"
@@ -276,8 +347,10 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "// Comment line 2\n"
                         "// Comment line 3\n"
                         "int bar(int j, int k) {\n"
-                        "  int r = j * k;\n"
-                        "  return r;\n"
+                        "  {\n"
+                        "    int r = j * k;\n"
+                        "    return r;\n"
+                        "  }\n"
                         "}\n"
                         "\n"
                         "int bar2(int j, int k) {\n"
@@ -288,7 +361,7 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "/* Comment block in one line*/\n"
                         "enum Bar { FOOBAR, BARFOO };\n"
                         "\n"
-                        "int bar3(int j, int k) {\n"
+                        "int bar3(int j, int k, const enum Bar b) {\n"
                         "  // A comment\n"
                         "  int r = j % k;\n"
                         "  return r;\n"
@@ -316,7 +389,7 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "test1\n"
                "test2\n"
                "*/\n"
-               "int foo(int i, int j)\n"
+               "/*const*/ int foo(int i, int j)\n"
                "{\n"
                "  int r = i + j;\n"
                "  return r;\n"
@@ -330,8 +403,10 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "// Comment line 3\n"
                "int bar(int j, int k)\n"
                "{\n"
-               "  int r = j * k;\n"
-               "  return r;\n"
+               "  {\n"
+               "    int r = j * k;\n"
+               "    return r;\n"
+               "  }\n"
                "}\n"
                "\n"
                "int bar2(int j, int k)\n"
@@ -346,7 +421,7 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "  BARFOO\n"
                "};\n"
                "\n"
-               "int bar3(int j, int k)\n"
+               "int bar3(int j, int k, const enum Bar b)\n"
                "{\n"
                "  // A comment\n"
                "  int r = j % k;\n"
@@ -370,7 +445,7 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "test1\n"
                         "test2\n"
                         "*/\n"
-                        "int foo(int i, int j) {\n"
+                        "/*const*/ int foo(int i, int j) {\n"
                         "  int r = i + j;\n"
                         "  return r;\n"
                         "}\n"
@@ -382,8 +457,10 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "// Comment line 2\n"
                         "// Comment line 3\n"
                         "int bar(int j, int k) {\n"
-                        "  int r = j * k;\n"
-                        "  return r;\n"
+                        "  {\n"
+                        "    int r = j * k;\n"
+                        "    return r;\n"
+                        "  }\n"
                         "}\n"
                         "\n"
                         "int bar2(int j, int k) {\n"
@@ -393,7 +470,7 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "\n"
                         "// Comment for inline enum\n"
                         "enum Bar { FOOBAR, BARFOO };\n"
-                        "int bar3(int j, int k) {\n"
+                        "int bar3(int j, int k, const enum Bar b) {\n"
                         "  // A comment\n"
                         "  int r = j % k;\n"
                         "  return r;\n"

From 7a5b0a2934f3b82ae93f03d1e7603371fe5c42d1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 14 Jan 2022 21:02:15 -0500
Subject: [PATCH 389/946] Reapply "IR: Make getRetAlign check callee function
 attributes"

Reapply 3d2d208f6a0a421b23937c39b9d371183a5913a3, reverted in
a97e20a3a8a58be751f023e610758310d5664562
---
 llvm/include/llvm/IR/InstrTypes.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 179aa579fa96b..b3d2a2c8ed9dd 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1723,7 +1723,13 @@ class CallBase : public Instruction {
   }
 
   /// Extract the alignment of the return value.
-  MaybeAlign getRetAlign() const { return Attrs.getRetAlignment(); }
+  MaybeAlign getRetAlign() const {
+    if (auto Align = Attrs.getRetAlignment())
+      return Align;
+    if (const Function *F = getCalledFunction())
+      return F->getAttributes().getRetAlignment();
+    return None;
+  }
 
   /// Extract the alignment for a call or parameter (0=unknown).
   MaybeAlign getParamAlign(unsigned ArgNo) const {

From 99e8e17313e76c50a0d6606394fed98832fd8fec Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 14 Jan 2022 18:18:26 -0500
Subject: [PATCH 390/946] Reapply "Revert "GlobalISel: Add G_ASSERT_ALIGN hint
 instruction"

This reverts commit a97e20a3a8a58be751f023e610758310d5664562.
---
 .../CodeGen/GlobalISel/MachineIRBuilder.h     |  25 ++-
 llvm/include/llvm/Support/TargetOpcodes.def   |   3 +-
 llvm/include/llvm/Target/GenericOpcodes.td    |   7 +
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |  27 ++-
 .../lib/CodeGen/GlobalISel/GISelKnownBits.cpp |  17 ++
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   |  12 -
 llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp |   3 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   2 +
 .../AArch64/GISel/AArch64CallLowering.cpp     |   1 +
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp |   1 +
 .../AArch64/GlobalISel/assert-align.ll        |  28 +++
 .../GlobalISel/regbank-assert-align.mir       |  30 +++
 .../CodeGen/AMDGPU/GlobalISel/assert-align.ll |  55 +++++
 .../GlobalISel/irtranslator-assert-align.ll   | 208 ++++++++++++++++++
 .../GlobalISel/regbankselect-assert-align.mir |  62 ++++++
 .../CodeGen/GlobalISel/KnownBitsTest.cpp      |  55 +++++
 16 files changed, 518 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index fde0cb3cf1af5..c4c2fc076dd8e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -836,17 +836,38 @@ class MachineIRBuilder {
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op);
 
+
+  /// Build and insert G_ASSERT_SEXT, G_ASSERT_ZEXT, or G_ASSERT_ALIGN
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAssertOp(unsigned Opc, const DstOp &Res, const SrcOp &Op,
+				    unsigned Val) {
+    return buildInstr(Opc, Res, Op).addImm(Val);
+  }
+
   /// Build and insert \p Res = G_ASSERT_ZEXT Op, Size
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op,
-                                      unsigned Size);
+                                      unsigned Size) {
+    return buildAssertOp(TargetOpcode::G_ASSERT_ZEXT, Res, Op, Size);
+  }
 
   /// Build and insert \p Res = G_ASSERT_SEXT Op, Size
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildAssertSExt(const DstOp &Res, const SrcOp &Op,
-                                      unsigned Size);
+                                      unsigned Size) {
+    return buildAssertOp(TargetOpcode::G_ASSERT_SEXT, Res, Op, Size);
+  }
+
+  /// Build and insert \p Res = G_ASSERT_ALIGN Op, AlignVal
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAssertAlign(const DstOp &Res, const SrcOp &Op,
+				       Align AlignVal) {
+    return buildAssertOp(TargetOpcode::G_ASSERT_ALIGN, Res, Op, AlignVal.value());
+  }
 
   /// Build and insert `Res = G_LOAD Addr, MMO`.
   ///
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index b34b885ddc357..428cbb44705d8 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -228,10 +228,11 @@ HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL)
 /// generate code. These instructions only act as optimization hints.
 HANDLE_TARGET_OPCODE(G_ASSERT_SEXT)
 HANDLE_TARGET_OPCODE(G_ASSERT_ZEXT)
+HANDLE_TARGET_OPCODE(G_ASSERT_ALIGN)
 HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPTIMIZATION_HINT_START,
                             G_ASSERT_SEXT)
 HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPTIMIZATION_HINT_END,
-                            G_ASSERT_ZEXT)
+                            G_ASSERT_ALIGN)
 
 /// Generic ADD instruction. This is an integer add.
 HANDLE_TARGET_OPCODE(G_ADD)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 72c974834a2fa..2af20ab6a53f5 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1434,3 +1434,10 @@ def G_ASSERT_SEXT : GenericInstruction {
   let InOperandList = (ins type0:$src, untyped_imm_0:$sz);
   let hasSideEffects = false;
 }
+
+// Asserts that a value has at least the given alignment.
+def G_ASSERT_ALIGN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, untyped_imm_0:$align);
+  let hasSideEffects = false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 486eff4dc7100..1ec7868f22345 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -86,6 +86,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   CallLoweringInfo Info;
   const DataLayout &DL = MIRBuilder.getDataLayout();
   MachineFunction &MF = MIRBuilder.getMF();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   bool CanBeTailCalled = CB.isTailCall() &&
                          isInTailCallPosition(CB, MF.getTarget()) &&
                          (MF.getFunction()
@@ -109,6 +110,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
     CanBeTailCalled = false;
   }
 
+
   // First step is to marshall all the function's parameters into the correct
   // physregs and memory locations. Gather the sequence of argument types that
   // we'll pass to the assigner function.
@@ -136,10 +138,23 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   else
     Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
 
+  Register ReturnHintAlignReg;
+  Align ReturnHintAlign;
+
   Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}};
-  if (!Info.OrigRet.Ty->isVoidTy())
+
+  if (!Info.OrigRet.Ty->isVoidTy()) {
     setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
 
+    if (MaybeAlign Alignment = CB.getRetAlign()) {
+      if (*Alignment > Align(1)) {
+        ReturnHintAlignReg = MRI.cloneVirtualRegister(ResRegs[0]);
+        Info.OrigRet.Regs[0] = ReturnHintAlignReg;
+        ReturnHintAlign = *Alignment;
+      }
+    }
+  }
+
   Info.CB = &CB;
   Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
   Info.CallConv = CallConv;
@@ -147,7 +162,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   Info.IsMustTailCall = CB.isMustTailCall();
   Info.IsTailCall = CanBeTailCalled;
   Info.IsVarArg = IsVarArg;
-  return lowerCall(MIRBuilder, Info);
+  if (!lowerCall(MIRBuilder, Info))
+    return false;
+
+  if (ReturnHintAlignReg && !Info.IsTailCall) {
+    MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg,
+                                ReturnHintAlign);
+  }
+
+  return true;
 }
 
 template 
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 306af808659a9..64c2f0d5f8e49 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -37,6 +37,11 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
   switch (MI->getOpcode()) {
   case TargetOpcode::COPY:
     return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
+  case TargetOpcode::G_ASSERT_ALIGN: {
+    // TODO: Min with source
+    int64_t LogAlign = MI->getOperand(2).getImm();
+    return Align(1ull << LogAlign);
+  }
   case TargetOpcode::G_FRAME_INDEX: {
     int FrameIdx = MI->getOperand(1).getIndex();
     return MF.getFrameInfo().getObjectAlign(FrameIdx);
@@ -466,6 +471,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
       Known.Zero.setBitsFrom(SrcBitWidth);
     break;
   }
+  case TargetOpcode::G_ASSERT_ALIGN: {
+    int64_t LogOfAlign = MI.getOperand(2).getImm();
+    if (LogOfAlign == 0)
+      break;
+
+    // TODO: Should use maximum with source
+    // If a node is guaranteed to be aligned, set low zero bits accordingly as
+    // well as clearing one bits.
+    Known.Zero.setLowBits(LogOfAlign);
+    Known.One.clearLowBits(LogOfAlign);
+    break;
+  }
   case TargetOpcode::G_MERGE_VALUES: {
     unsigned NumOps = MI.getNumOperands();
     unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 391251886fbb2..c6720568b362b 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -282,18 +282,6 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
   return buildInstr(TargetOpcode::COPY, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildAssertSExt(const DstOp &Res,
-                                                      const SrcOp &Op,
-                                                      unsigned Size) {
-  return buildInstr(TargetOpcode::G_ASSERT_SEXT, Res, Op).addImm(Size);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildAssertZExt(const DstOp &Res,
-                                                      const SrcOp &Op,
-                                                      unsigned Size) {
-  return buildInstr(TargetOpcode::G_ASSERT_ZEXT, Res, Op).addImm(Size);
-}
-
 MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
                                                     const ConstantInt &Val) {
   LLT Ty = Res.getLLTTy(*getMRI());
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 8d2677ea67e0e..01af6bb51bb79 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -626,7 +626,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
   unsigned Opc = MI.getOpcode();
   if (isPreISelGenericOptimizationHint(Opc)) {
     assert((Opc == TargetOpcode::G_ASSERT_ZEXT ||
-            Opc == TargetOpcode::G_ASSERT_SEXT) &&
+            Opc == TargetOpcode::G_ASSERT_SEXT ||
+            Opc == TargetOpcode::G_ASSERT_ALIGN) &&
            "Unexpected hint opcode!");
     // The only correct mapping for these is to always use the source register
     // bank.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7ca6f9aa4cf0a..447bb326e6511 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3381,6 +3381,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   case ISD::AssertAlign: {
     unsigned LogOfAlign = Log2(cast(Op)->getAlign());
     assert(LogOfAlign != 0);
+
+    // TODO: Should use maximum with source
     // If a node is guaranteed to be aligned, set low zero bits accordingly as
     // well as clearing one bits.
     Known.Zero.setLowBits(LogOfAlign);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 677e7a6684d56..097b93e4fccae 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1112,6 +1112,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     return false;
   }
 
+  Info.IsTailCall = CanTailCallOpt;
   if (CanTailCallOpt)
     return lowerTailCall(MIRBuilder, Info, OutArgs);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index c6aa697f94d45..76eebb327010e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1338,6 +1338,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     return false;
   }
 
+  Info.IsTailCall = CanTailCallOpt;
   if (CanTailCallOpt)
     return lowerTailCall(MIRBuilder, Info, OutArgs);
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll
new file mode 100644
index 0000000000000..f9ce504c3d68b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+declare i8* @foo()
+
+define void @call_assert_align() {
+; CHECK-LABEL: call_assert_align:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl foo
+; CHECK-NEXT:    strb wzr, [x0]
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %ptr = call align 8 i8* @foo()
+  store i8 0, i8* %ptr
+  ret void
+}
+
+define i8* @tailcall_assert_align() {
+; CHECK-LABEL: tailcall_assert_align:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    b foo
+entry:
+  %call = tail call align 4 i8* @foo()
+  ret i8* %call
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir
new file mode 100644
index 0000000000000..618a4532b2362
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify register banks for G_ASSERT_ALIGN.
+#
+
+---
+name:            gpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; G_ASSERT_ALIGN should end up on a GPR.
+
+    ; CHECK-LABEL: name: gpr
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr(p0) = COPY $x0
+    ; CHECK-NEXT: %copy_assert_align:gpr(p0) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: $x1 = COPY %copy_assert_align(p0)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x1
+    %copy:_(p0) = COPY $x0
+    %copy_assert_align:_(p0) = G_ASSERT_ALIGN %copy(p0), 4
+    $x1 = COPY %copy_assert_align
+    RET_ReallyLR implicit $x1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
new file mode 100644
index 0000000000000..225dec6d39d72
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
+
+declare hidden i32 addrspace(1)* @ext(i8 addrspace(1)*)
+
+define i32 addrspace(1)* @call_assert_align() {
+; CHECK-LABEL: call_assert_align:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_or_saveexec_b64 s[16:17], -1
+; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_mov_b64 exec, s[16:17]
+; CHECK-NEXT:    v_writelane_b32 v40, s33, 2
+; CHECK-NEXT:    s_mov_b32 s33, s32
+; CHECK-NEXT:    s_addk_i32 s32, 0x400
+; CHECK-NEXT:    v_writelane_b32 v40, s30, 0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_writelane_b32 v40, s31, 1
+; CHECK-NEXT:    s_getpc_b64 s[16:17]
+; CHECK-NEXT:    s_add_u32 s16, s16, ext@rel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s17, s17, ext@rel32@hi+12
+; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_readlane_b32 s4, v40, 0
+; CHECK-NEXT:    global_store_dword v[0:1], v2, off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_readlane_b32 s5, v40, 1
+; CHECK-NEXT:    s_addk_i32 s32, 0xfc00
+; CHECK-NEXT:    v_readlane_b32 s33, v40, 2
+; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[4:5]
+entry:
+  %call = call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null)
+  store volatile i32 0, i32 addrspace(1)* %call
+  ret i32 addrspace(1)* %call
+}
+
+define i32 addrspace(1)* @tail_call_assert_align() {
+; CHECK-LABEL: tail_call_assert_align:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_getpc_b64 s[16:17]
+; CHECK-NEXT:    s_add_u32 s16, s16, ext@rel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s17, s17, ext@rel32@hi+12
+; CHECK-NEXT:    s_setpc_b64 s[16:17]
+entry:
+  %call = tail call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null)
+  ret i32 addrspace(1)* %call
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
new file mode 100644
index 0000000000000..7124c8c700481
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s
+
+; TODO: Could potentially insert it here
+define void @arg_align_8(i8 addrspace(1)* align 8 %arg0) {
+  ; CHECK-LABEL: name: arg_align_8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.arg0, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
+  store i8 0, i8 addrspace(1)* %arg0, align 8
+  ret void
+}
+
+declare i8 addrspace(1)* @returns_ptr()
+declare align 8 i8 addrspace(1)* @returns_ptr_align8()
+
+define void @call_result_align_1() {
+  ; CHECK-LABEL: name: call_result_align_1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY20]]
+  %ptr = call align 1 i8 addrspace(1)* @returns_ptr()
+  store i8 0, i8 addrspace(1)* %ptr, align 1
+  ret void
+}
+
+define void @call_result_align_8() {
+  ; CHECK-LABEL: name: call_result_align_8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY20]]
+  %ptr = call align 8 i8 addrspace(1)* @returns_ptr()
+  store i8 0, i8 addrspace(1)* %ptr, align 8
+  ret void
+}
+
+define void @declaration_result_align_8() {
+  ; CHECK-LABEL: name: declaration_result_align_8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY20]]
+  %ptr = call i8 addrspace(1)* @returns_ptr_align8()
+  store i8 0, i8 addrspace(1)* %ptr, align 8
+  ret void
+}
+
+define i8 addrspace(1)* @tail_call_assert_align() {
+  ; CHECK-LABEL: name: tail_call_assert_align
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+entry:
+  %call = tail call i8 addrspace(1)* @returns_ptr_align8()
+  ret i8 addrspace(1)* %call
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir
new file mode 100644
index 0000000000000..783a1e9a6797e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir
@@ -0,0 +1,62 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s
+
+---
+name:            assert_align_vgpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: assert_align_vgpr
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: %assert_align:vgpr(p1) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1)
+    %copy:_(p1) = COPY $vgpr0_vgpr1
+    %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4
+    S_ENDPGM 0, implicit %assert_align
+...
+
+---
+name:            assert_align_sgpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: assert_align_sgpr
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:sgpr(p1) = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: %assert_align:sgpr(p1) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1)
+    %copy:_(p1) = COPY $sgpr8_sgpr9
+    %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4
+    S_ENDPGM 0, implicit %assert_align
+...
+
+---
+name:            assert_align_agpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $agpr0_agpr1
+
+    ; CHECK-LABEL: name: assert_align_agpr
+    ; CHECK: liveins: $agpr0_agpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:agpr(p1) = COPY $agpr0_agpr1
+    ; CHECK-NEXT: %assert_align:agpr(p1) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1)
+    %copy:_(p1) = COPY $agpr0_agpr1
+    %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4
+    S_ENDPGM 0, implicit %assert_align
+...
diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
index f5594d016305e..7a2fc0ff1d51d 100644
--- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
@@ -1917,3 +1917,58 @@ TEST_F(AMDGPUGISelMITest, TestNumSignBitsSBFX) {
   EXPECT_EQ(1u, Info.computeNumSignBits(CopyUnkValBfxReg));
   EXPECT_EQ(1u, Info.computeNumSignBits(CopyUnkOffBfxReg));
 }
+
+TEST_F(AMDGPUGISelMITest, TestKnownBitsAssertAlign) {
+  StringRef MIRString = R"MIR(
+   %val:_(s64) = COPY $vgpr0_vgpr1
+   %ptrval:_(p1) = COPY $vgpr0_vgpr1
+
+   %assert_align0:_(s64) = G_ASSERT_ALIGN %val, 0
+   %copy_assert_align0:_(s64) = COPY %assert_align0
+
+   %assert_align1:_(s64) = G_ASSERT_ALIGN %val, 1
+   %copy_assert_align1:_(s64) = COPY %assert_align1
+
+   %assert_align2:_(s64) = G_ASSERT_ALIGN %val, 2
+   %copy_assert_align2:_(s64) = COPY %assert_align2
+
+   %assert_align3:_(s64) = G_ASSERT_ALIGN %val, 3
+   %copy_assert_align3:_(s64) = COPY %assert_align3
+
+   %assert_align8:_(s64) = G_ASSERT_ALIGN %val, 8
+   %copy_assert_align8:_(s64) = COPY %assert_align8
+
+   %assert_maxalign:_(s64) = G_ASSERT_ALIGN %val, 30
+   %copy_assert_maxalign:_(s64) = COPY %assert_maxalign
+
+   %assert_ptr_align5:_(p1) = G_ASSERT_ALIGN %ptrval, 5
+   %copy_assert_ptr_align5:_(p1) = COPY %assert_ptr_align5
+)MIR";
+  setUp(MIRString);
+  if (!TM)
+    return;
+  GISelKnownBits Info(*MF);
+
+  KnownBits Res;
+  auto GetKB = [&](unsigned Idx) {
+    Register CopyReg = Copies[Idx];
+    auto *Copy = MRI->getVRegDef(CopyReg);
+    return Info.getKnownBits(Copy->getOperand(1).getReg());
+  };
+
+  auto CheckBits = [&](unsigned NumBits, unsigned Idx) {
+    Res = GetKB(Idx);
+    EXPECT_EQ(64u, Res.getBitWidth());
+    EXPECT_EQ(NumBits, Res.Zero.countTrailingOnes());
+    EXPECT_EQ(64u, Res.One.countTrailingZeros());
+    EXPECT_EQ(Align(1ull << NumBits), Info.computeKnownAlignment(Copies[Idx]));
+  };
+
+  CheckBits(0, Copies.size() - 7);
+  CheckBits(1, Copies.size() - 6);
+  CheckBits(2, Copies.size() - 5);
+  CheckBits(3, Copies.size() - 4);
+  CheckBits(8, Copies.size() - 3);
+  CheckBits(30, Copies.size() - 2);
+  CheckBits(5, Copies.size() - 1);
+}

From 354b2c36ee46a60f316e816d31737559a0694515 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson 
Date: Mon, 10 Jan 2022 11:32:08 +0100
Subject: [PATCH 391/946] Pre-commit test cases for (sra (load)) -> (sextload)
 folds. NFC

Add test case to show missing folds for (sra (load)) -> (sextload).

Differential Revision: https://reviews.llvm.org/D116929
---
 llvm/test/CodeGen/X86/combine-sra-load.ll | 107 ++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/combine-sra-load.ll

diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll
new file mode 100644
index 0000000000000..119acaa6a02b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-sra-load.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
+
+; FIXME: fold (sra (load i32), 16)) -> (sextload i16)
+define i32 @sra_half(i32* %p) {
+; CHECK-LABEL: sra_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    sarl $16, %eax
+; CHECK-NEXT:    retq
+  %load = load i32, i32* %p
+  %shift = ashr i32 %load, 16
+  ret i32 %shift
+}
+
+; Vector version not folded.
+define <4 x i32> @sra_half_vec(<4 x i32>* %p) {
+; CHECK-LABEL: sra_half_vec:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movdqa (%rdi), %xmm0
+; CHECK-NEXT:    psrad $16, %xmm0
+; CHECK-NEXT:    retq
+  %load = load <4 x i32>, <4 x i32>* %p
+  %shift = ashr <4 x i32> %load, 
+  ret <4 x i32> %shift
+}
+
+; FIXME: fold (sra (load i64), 48)) -> (sextload i16)
+define i64 @sra_large_shift(i64* %r) {
+; CHECK-LABEL: sra_large_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    sarq $48, %rax
+; CHECK-NEXT:    retq
+  %t0 = load i64, i64* %r
+  %conv = ashr i64 %t0, 48
+  ret i64 %conv
+}
+
+; Negative test, no fold expected.
+define i32 @sra_small_shift(i32* %p) {
+; CHECK-LABEL: sra_small_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    retq
+  %load = load i32, i32* %p
+  %shift = ashr i32 %load, 8
+  ret i32 %shift
+}
+
+; This should be folded to a zextload.
+define i32 @sra_of_zextload(i16* %p) {
+; CHECK-LABEL: sra_of_zextload:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzbl 1(%rdi), %eax
+; CHECK-NEXT:    retq
+  %load = load i16, i16* %p
+  %zext = zext i16 %load to i32
+  %shift = ashr i32 %zext, 8
+  ret i32 %shift
+}
+
+; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8)
+define i32 @sra_of_sextload(i16* %p) {
+; CHECK-LABEL: sra_of_sextload:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movswl (%rdi), %eax
+; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    retq
+  %load = load i16, i16* %p
+  %sext = sext i16 %load to i32
+  %shift = ashr i32 %sext, 8
+  ret i32 %shift
+}
+
+; Negative test. If the shift amount is larger than the memory type then
+; we're not accessing any of the loaded bytes (only the extended bits). So the
+; shift is expected to remain.
+define i32 @sra_of_sextload_no_fold(i16* %p) {
+; CHECK-LABEL: sra_of_sextload_no_fold:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movswl (%rdi), %eax
+; CHECK-NEXT:    sarl $16, %eax
+; CHECK-NEXT:    retq
+  %load = load i16, i16* %p
+  %sext = sext i16 %load to i32
+  %shift = ashr i32 %sext, 16
+  ret i32 %shift
+}
+
+; FIXME: Fold even if SRA has multiple uses.
+define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {
+; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rdi), %ecx
+; CHECK-NEXT:    sarl $16, %ecx
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    xorl $6, %eax
+; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    retq
+  %load = load i32, i32* %p
+  %shift = ashr i32 %load, 16
+  %use1 = xor i32 %shift, 6
+  %use2 = or i32 %shift, %use1
+  ret i32 %use2
+}

From 18aabae8e2b6ccea4575ac9e4fb4d38ec7e4e971 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 20 Jan 2022 14:57:23 -0500
Subject: [PATCH 392/946] AMDGPU: Fix assertion on fixed stack objects with
 VGPR->AGPR spills

These have negative / out of bounds frame index values and would
assert when trying to set the BitVector. Fixed stack objects can't be
colored away so ignore them.
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    |  3 +-
 .../test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll | 39 +++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index a347e91f3fad2..6078f4a0577ab 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1195,7 +1195,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
           }
         } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
                    TII->isLoadFromStackSlot(MI, FrameIndex))
-          NonVGPRSpillFIs.set(FrameIndex);
+          if (!MFI.isFixedObjectIndex(FrameIndex))
+            NonVGPRSpillFIs.set(FrameIndex);
       }
     }
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
index 4c5e7ec3e727d..ab3fc800f4a7e 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
@@ -281,6 +281,45 @@ st:
   ret void
 }
 
+; Make sure there's no crash when we have loads from fixed stack
+; objects and are processing VGPR spills
+
+; GCN-LABEL: {{^}}stack_args_vgpr_spill:
+; GFX908: v_accvgpr_write_b32
+; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32
+; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
+define void @stack_args_vgpr_spill(<32 x float> %arg0, <32 x float> %arg1, <32 x float> addrspace(1)* %p) #1 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
+  %p2 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p1, i32 %tid
+  %p3 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p2, i32 %tid
+  %p4 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p3, i32 %tid
+  %p5 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p4, i32 %tid
+  %p6 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p5, i32 %tid
+  %p7 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p6, i32 %tid
+  %v1 = load volatile <32 x float>, <32 x float> addrspace(1)* %p1
+  %v2 = load volatile <32 x float>, <32 x float> addrspace(1)* %p2
+  %v3 = load volatile <32 x float>, <32 x float> addrspace(1)* %p3
+  %v4 = load volatile <32 x float>, <32 x float> addrspace(1)* %p4
+  %v5 = load volatile <32 x float>, <32 x float> addrspace(1)* %p5
+  %v6 = load volatile <32 x float>, <32 x float> addrspace(1)* %p6
+  %v7 = load volatile <32 x float>, <32 x float> addrspace(1)* %p7
+  br label %st
+
+st:
+  store volatile <32 x float> %arg0, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %arg1, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v1, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v2, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v3, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v4, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v5, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v6, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v7, <32 x float> addrspace(1)* undef
+  ret void
+}
+
+
 declare i32 @llvm.amdgcn.workitem.id.x()
 
 attributes #0 = { nounwind "amdgpu-num-vgpr"="10" }

From 49e37000691a17a073003219a584414e5f481e11 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Mon, 24 Jan 2022 23:16:29 +0900
Subject: [PATCH 393/946] [mlir][tensor] Move BufferizableOpInterface impl to
 tensor dialect

This is in preparation of unifying the existing bufferization with One-Shot bufferization.

A subsequent commit will replace `tensor-bufferize`'s implementation with the BufferizableOpInterface-based implementation and move over missing test cases.

Differential Revision: https://reviews.llvm.org/D117984
---
 .../Bufferization/Transforms/Bufferize.h      |  7 ++--
 .../TensorInterfaceImpl.h                     | 27 --------------
 .../Transforms/BufferizableOpInterfaceImpl.h  | 20 +++++++++++
 .../Bufferization/Transforms/Bufferize.cpp    |  4 +++
 .../ComprehensiveBufferize/CMakeLists.txt     | 11 ------
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |  2 +-
 .../Transforms/ComprehensiveBufferizePass.cpp |  4 +--
 .../BufferizableOpInterfaceImpl.cpp}          | 36 ++++++++-----------
 .../Dialect/Tensor/Transforms/CMakeLists.txt  |  2 ++
 mlir/test/lib/Dialect/Linalg/CMakeLists.txt   |  2 +-
 .../Linalg/TestComprehensiveBufferize.cpp     |  4 +--
 .../llvm-project-overlay/mlir/BUILD.bazel     | 27 ++++----------
 .../mlir/test/BUILD.bazel                     |  2 +-
 13 files changed, 58 insertions(+), 90 deletions(-)
 delete mode 100644 mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
 create mode 100644 mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h
 rename mlir/lib/Dialect/{Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp => Tensor/Transforms/BufferizableOpInterfaceImpl.cpp} (94%)

diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
index c2254dfa3a168..b587955d65b13 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
@@ -28,6 +28,9 @@
 namespace mlir {
 namespace bufferization {
 
+class BufferizationState;
+struct BufferizationOptions;
+
 /// A helper type converter class that automatically populates the relevant
 /// materializations and type conversions for bufferization.
 class BufferizeTypeConverter : public TypeConverter {
@@ -52,8 +55,6 @@ void populateBufferizeMaterializationLegality(ConversionTarget &target);
 void populateEliminateBufferizeMaterializationsPatterns(
     BufferizeTypeConverter &typeConverter, RewritePatternSet &patterns);
 
-class BufferizationState;
-
 /// Bufferize `op` and its nested ops that implement `BufferizableOpInterface`.
 /// Whether buffer copies are needed or not is queried from `state`.
 ///
@@ -61,7 +62,7 @@ class BufferizationState;
 /// unknown op (that does not implement `BufferizableOpInterface`) is found. No
 /// to_tensor/to_memref ops are inserted in that case.
 ///
-/// Note: Tje layout map chosen to bufferize is the most dynamic canonical
+/// Note: The layout map chosen to bufferize is the most dynamic canonical
 /// strided layout of the proper rank. This ensures compatibility with expected
 /// layouts after transformations. Combinations of memref.cast +
 /// canonicalization are responsible for clean ups.
diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
deleted file mode 100644
index cc3ba5aac84b5..0000000000000
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===- LinalgInterfaceImpl.h - Linalg Impl. of BufferizableOpInterface ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSORINTERFACEIMPL_H
-#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSORINTERFACEIMPL_H
-
-namespace mlir {
-
-class DialectRegistry;
-
-namespace linalg {
-namespace comprehensive_bufferize {
-namespace tensor_ext {
-
-void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry);
-
-} // namespace tensor_ext
-} // namespace comprehensive_bufferize
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSORINTERFACEIMPL_H
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h
new file mode 100644
index 0000000000000..298c2259c1fab
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h
@@ -0,0 +1,20 @@
+//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_TENSOR_BUFFERIZABLEOPINTERFACEIMPL_H
+#define MLIR_DIALECT_TENSOR_BUFFERIZABLEOPINTERFACEIMPL_H
+
+namespace mlir {
+class DialectRegistry;
+
+namespace tensor {
+void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry);
+} // namespace tensor
+} // namespace mlir
+
+#endif // MLIR_DIALECT_TENSOR_BUFFERIZABLEOPINTERFACEIMPL_H
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 0bacc21d46889..07bd8fdbb0bb4 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -132,6 +132,10 @@ mlir::bufferization::createFinalizingBufferizePass() {
   return std::make_unique();
 }
 
+//===----------------------------------------------------------------------===//
+// BufferizableOpInterface-based Bufferization
+//===----------------------------------------------------------------------===//
+
 static bool isaTensor(Type t) { return t.isa(); }
 
 /// Return true if the given op has a tensor result or a tensor operand.
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
index f3601f3c3935e..21e22de2eee24 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@@ -5,7 +5,6 @@ set(LLVM_OPTIONAL_SOURCES
   ModuleBufferization.cpp
   SCFInterfaceImpl.cpp
   StdInterfaceImpl.cpp
-  TensorInterfaceImpl.cpp
   VectorInterfaceImpl.cpp
 )
 
@@ -57,16 +56,6 @@ add_mlir_dialect_library(MLIRStdBufferizableOpInterfaceImpl
   MLIRStandard
 )
 
-add_mlir_dialect_library(MLIRTensorBufferizableOpInterfaceImpl
-  TensorInterfaceImpl.cpp
-
-  LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
-  MLIRIR
-  MLIRMemRef
-  MLIRTensor
-)
-
 add_mlir_dialect_library(MLIRVectorBufferizableOpInterfaceImpl
   VectorInterfaceImpl.cpp
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index bea51784b14c8..6059d51260975 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -55,7 +55,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   MLIRStandardOpsTransforms
   MLIRStandardToLLVM
   MLIRTensor
-  MLIRTensorBufferizableOpInterfaceImpl
+  MLIRTensorTransforms
   MLIRTransforms
   MLIRTransformUtils
   MLIRVector
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 9409492e12dba..90335f952d559 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -18,9 +18,9 @@
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/StdInterfaceImpl.h"
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -59,7 +59,7 @@ struct LinalgComprehensiveModuleBufferize
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerModuleBufferizationExternalModels(registry);
     std_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    tensor_ext::registerBufferizableOpInterfaceExternalModels(registry);
+    tensor::registerBufferizableOpInterfaceExternalModels(registry);
     vector_ext::registerBufferizableOpInterfaceExternalModels(registry);
   }
 };
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
similarity index 94%
rename from mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
rename to mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index f6748985dde14..03fa45f04c68f 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -1,4 +1,4 @@
-//===- TensorInterfaceImpl.cpp - Tensor Impl. of BufferizableOpInterface --===//
+//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
+#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -15,14 +15,11 @@
 
 using namespace mlir;
 using namespace mlir::bufferization;
+using namespace mlir::tensor;
 
 namespace mlir {
-namespace linalg {
-namespace comprehensive_bufferize {
-namespace tensor_ext {
-
-using tensor::ExtractSliceOp;
-using tensor::InsertSliceOp;
+namespace tensor {
+namespace {
 
 struct CastOpInterface
     : public BufferizableOpInterface::ExternalModel();
-  registry.addOpInterface();
-  registry.addOpInterface();
-  registry.addOpInterface();
-  registry.addOpInterface();
-  registry.addOpInterface();
+void mlir::tensor::registerBufferizableOpInterfaceExternalModels(
+    DialectRegistry ®istry) {
+  registry.addOpInterface();
+  registry.addOpInterface();
+  registry.addOpInterface();
+  registry.addOpInterface();
+  registry.addOpInterface();
+  registry.addOpInterface();
 }
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
index 7b2ccd46162c0..98787344c582b 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_dialect_library(MLIRTensorTransforms
+  BufferizableOpInterfaceImpl.cpp
   Bufferize.cpp
 
   ADDITIONAL_HEADER_DIRS
@@ -9,6 +10,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
+  MLIRBufferizableOpInterface
   MLIRBufferizationTransforms
   MLIRIR
   MLIRMemRef
diff --git a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
index 10657c8c514f8..c784461c2dc3d 100644
--- a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
@@ -30,7 +30,7 @@ add_mlir_library(MLIRLinalgTestPasses
   MLIRStdBufferizableOpInterfaceImpl
   MLIRStandard
   MLIRTensor
-  MLIRTensorBufferizableOpInterfaceImpl
+  MLIRTensorTransforms
   MLIRTransformUtils
   MLIRVector
   MLIRVectorBufferizableOpInterfaceImpl
diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
index 1ba0b891692b1..3e65330addb4d 100644
--- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
@@ -21,11 +21,11 @@
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/StdInterfaceImpl.h"
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/Passes.h"
@@ -65,7 +65,7 @@ struct TestComprehensiveFunctionBufferize
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    tensor_ext::registerBufferizableOpInterfaceExternalModels(registry);
+    tensor::registerBufferizableOpInterfaceExternalModels(registry);
     vector_ext::registerBufferizableOpInterfaceExternalModels(registry);
   }
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index e6b4654097d2c..74d10d9190b5d 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -4415,11 +4415,15 @@ cc_library(
             "lib/Dialect/Tensor/Transforms/*.h",
         ],
     ),
-    hdrs = ["include/mlir/Dialect/Tensor/Transforms/Passes.h"],
+    hdrs = [
+        "include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h",
+        "include/mlir/Dialect/Tensor/Transforms/Passes.h",
+    ],
     includes = ["include"],
     deps = [
         ":ArithmeticDialect",
         ":Async",
+        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6684,25 +6688,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "TensorBufferizableOpInterfaceImpl",
-    srcs = [
-        "lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp",
-    ],
-    hdrs = [
-        "include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h",
-    ],
-    includes = ["include"],
-    deps = [
-        ":BufferizableOpInterface",
-        ":IR",
-        ":MemRefDialect",
-        ":Support",
-        ":TensorDialect",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
     name = "VectorBufferizableOpInterfaceImpl",
     srcs = [
@@ -6946,8 +6931,8 @@ cc_library(
         ":StandardOpsTransforms",
         ":StdBufferizableOpInterfaceImpl",
         ":Support",
-        ":TensorBufferizableOpInterfaceImpl",
         ":TensorDialect",
+        ":TensorTransforms",
         ":TensorUtils",
         ":TransformUtils",
         ":VectorBufferizableOpInterfaceImpl",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index c494b994a4f33..8c24a43000c78 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -403,8 +403,8 @@ cc_library(
         "//mlir:SCFTransforms",
         "//mlir:StandardOps",
         "//mlir:StdBufferizableOpInterfaceImpl",
-        "//mlir:TensorBufferizableOpInterfaceImpl",
         "//mlir:TensorDialect",
+        "//mlir:TensorTransforms",
         "//mlir:TransformUtils",
         "//mlir:VectorBufferizableOpInterfaceImpl",
         "//mlir:VectorOps",

From 473aa8e10c49aeed7083a53f275176c5831711b3 Mon Sep 17 00:00:00 2001
From: David Spickett 
Date: Mon, 24 Jan 2022 14:55:06 +0000
Subject: [PATCH 394/946] [llvm][docs] Fix code-block in the testing guide

Without a langauge name it's an error (with some verisons of Sphinx
it seems) or the block is simply missing in the output.
---
 llvm/docs/TestingGuide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 6449661e5d38a..d0f890dd0a097 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -294,7 +294,7 @@ which first check the ``NOTE:`` line exists and matches the script name.
 These are the most common scripts and their purposes/applications in generating
 assertions:
 
-.. code-block::
+.. code-block:: none
 
   update_analyze_test_checks.py
   opt --analyze --costmodel

From 2d9ed1aba236b4e50ce4bbaf955347590d536a55 Mon Sep 17 00:00:00 2001
From: Denys Shabalin 
Date: Mon, 24 Jan 2022 12:44:47 +0100
Subject: [PATCH 395/946] [mlir] Fix broken __repr__ implementation in Linalg
 OpDSL

Reviewed By: gysit

Differential Revision: https://reviews.llvm.org/D118027
---
 mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py
index ddbebb29fd6f0..f6f3e01443b84 100644
--- a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py
@@ -468,7 +468,7 @@ def visit_tensor_exprs(self, callback):
     self.arg.visit_tensor_exprs(callback)
 
   def __repr__(self):
-    return f"{repr(self.type_fn)}({type_var}, {self.arg})"
+    return f"{repr(self.type_fn)}({self.type_var}, {self.arg})"
 
 
 class const(TensorExpression):

From d193f7be7898f165e98ee5fc38b07588f43fa856 Mon Sep 17 00:00:00 2001
From: Sean Fertile 
Date: Fri, 21 Jan 2022 11:05:31 -0500
Subject: [PATCH 396/946] [libc++][AIX] Do not assert chmod return value is
 non-zero.

A number of the filesystem tests create a directory that contains a bad
symlink. On AIX recursively setting permissions on said directory will
return a non-zero value because of the bad symlink, however the
following rm -r still completes successfully. Avoid the assertion on
AIX, and rely on the return value of the remove command to detect
problems.

Differential Revision: https://reviews.llvm.org/D112086
---
 .../directory_entry.cons/path.pass.cpp                      | 2 --
 .../directory_entry.mods/refresh.pass.cpp                   | 2 --
 .../directory_entry.mods/replace_filename.pass.cpp          | 2 --
 .../directory_entry.obs/file_size.pass.cpp                  | 2 --
 .../directory_entry.obs/file_type_obs.pass.cpp              | 2 --
 .../directory_entry.obs/hard_link_count.pass.cpp            | 2 --
 .../directory_entry.obs/last_write_time.pass.cpp            | 2 --
 .../directory_entry.obs/status.pass.cpp                     | 2 --
 .../directory_entry.obs/symlink_status.pass.cpp             | 2 --
 .../directory_iterator.members/copy.pass.cpp                | 2 --
 .../directory_iterator.members/copy_assign.pass.cpp         | 2 --
 .../directory_iterator.members/ctor.pass.cpp                | 2 --
 .../directory_iterator.members/increment.pass.cpp           | 2 --
 .../directory_iterator.members/move.pass.cpp                | 2 --
 .../directory_iterator.members/move_assign.pass.cpp         | 2 --
 .../directory_iterator.nonmembers/begin_end.pass.cpp        | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp     | 2 --
 .../rec.dir.itr.members/copy_assign.pass.cpp                | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp     | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp    | 2 --
 .../rec.dir.itr.members/disable_recursion_pending.pass.cpp  | 2 --
 .../rec.dir.itr.members/increment.pass.cpp                  | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp     | 2 --
 .../rec.dir.itr.members/move_assign.pass.cpp                | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp      | 2 --
 .../rec.dir.itr.members/recursion_pending.pass.cpp          | 2 --
 .../rec.dir.itr.nonmembers/begin_end.pass.cpp               | 2 --
 .../fs.op.funcs/fs.op.canonical/canonical.pass.cpp          | 2 --
 .../filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp        | 2 --
 .../fs.op.create_directories/create_directories.pass.cpp    | 2 --
 .../create_directory_with_attributes.pass.cpp               | 2 --
 .../fs.op.funcs/fs.op.current_path/current_path.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp        | 2 --
 .../filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.file_size/file_size.pass.cpp          | 2 --
 .../fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp   | 2 --
 .../fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp  | 2 --
 .../fs.op.is_char_file/is_character_file.pass.cpp           | 2 --
 .../fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp            | 2 --
 .../filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp  | 2 --
 .../fs.op.funcs/fs.op.is_other/is_other.pass.cpp            | 2 --
 .../fs.op.is_regular_file/is_regular_file.pass.cpp          | 2 --
 .../fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp          | 2 --
 .../fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp        | 2 --
 .../fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.relative/relative.pass.cpp            | 2 --
 .../filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp      | 2 --
 .../filesystems/fs.op.funcs/fs.op.space/space.pass.cpp      | 2 --
 .../filesystems/fs.op.funcs/fs.op.status/status.pass.cpp    | 2 --
 .../fs.op.symlink_status/symlink_status.pass.cpp            | 2 --
 .../fs.op.weakly_canonical/weakly_canonical.pass.cpp        | 2 --
 libcxx/test/support/filesystem_test_helper.h                | 6 ++++++
 54 files changed, 6 insertions(+), 106 deletions(-)

diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
index 29e9e92fb7de2..19148e7c779de 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
index ce87f4325fa75..da436fc6b959c 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
index 0cf99f2658b5b..564e0e21ad6cd 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
index e584fdd79ff17..271a6e826f2b7 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
index f1603abca4e10..541a6d9c9ffb0 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
index 3daf5fcb6a1f8..44eac78fe8f46 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
index 3e5f43ff4a347..928248b3c2b87 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
index 546170fc5141a..ce4f286e2b2c3 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
index e27f7e47fed40..364b832bafa75 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
index bfe107ee3feb2..47041cb455a9f 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
index fe1cc43378e38..e6f6d1657c32f 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
index d5eeb21e96b32..4ab9a2dea3360 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
index 3727c1527140f..09b513974aaa9 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
index 29ed2090aaf62..6ae6eed1ac022 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
index 6a9aaae28f0b7..d29f7b330862a 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
index 407af95705c46..f6f0bbe7687a8 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
index 1b731a4b39fed..aacf160a22293 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
index ff3320e73f576..dc689ad3d9f50 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
index f4fde828b3279..5a18c46546897 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
index bdf23b17beaf9..4983665978d10 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
index 0a6b73af42fa0..e9dc9648ec687 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
index 4d363b91aa3bf..4b8390e2a125d 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
index 44b939f227572..7dcc47d610020 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
index f4d5410e1346b..03c85f27f608c 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
index 6df6746e323a4..40b09f1e79a99 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
index 5fcf0f1078c7e..82c82c71b181c 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
index fc3c846db9006..104e419fa9e91 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
index 98a8a91a44f53..707b646be1798 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // path canonical(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
index 603d2e78fb9cc..3f9574de0bb01 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // void copy(const path& from, const path& to);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
index 8c57cbc771108..d6b18e2e043be 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // This test requires the dylib support introduced in D92769.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
 
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
index 424e7e55df1f9..4d5cdf31e5b59 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // This test requires the dylib support introduced in D92769.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
 
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
index 64f8effeb9fa1..5e90c4452a9db 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // path current_path();
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
index e2fdeff1159aa..5fe888609a92e 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool equivalent(path const& lhs, path const& rhs);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
index 8fc355cc3d547..13e8b95d1e540 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool exists(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
index cce1499176012..413ba881b59f1 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
index 6e1ea695b40d9..38b26710f1a95 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // uintmax_t hard_link_count(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
index 85e297d2f58a3..d28898472a94b 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_block_file(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
index 6efb66e312387..738e06cc1ad55 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_character_file(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
index 51b063b40a97a..8b1e41ef7f1b8 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_directory(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
index 930541bd5e357..8478037a03c68 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_empty(path const& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
index 39b059b5552ac..0169fed28f54f 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_fifo(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
index 9fc1dfd083509..f84eb7dd32d81 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_other(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
index a1fb918a36aad..441f15a9c5d11 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_regular_file(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
index cf6da35300de3..21aa537094344 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_socket(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
index 43552f935c138..d8ec533058c7a 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // bool is_symlink(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
index c3f8effb4bb32..ae94253903089 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // path read_symlink(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
index 53700e94e00ce..0c056057927d3 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // path proximate(const path& p, error_code &ec)
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
index e5d46f3c992b7..c651bf1785823 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // void rename(const path& old_p, const path& new_p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
index 8405b74801518..504561749759c 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // void resize_file(const path& p, uintmax_t new_size);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
index 44f60240a497e..c0317966d4fe8 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // space_info space(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
index f819a0f4aee52..3fa9f58b77b95 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // file_status status(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
index 3fa0f99538cb5..a1d8ba6e09fc7 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // file_status symlink_status(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
index 4ad6ba7e6e277..b0909da011710 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // 
 
 // path weakly_canonical(const path& p);
diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h
index dc85a86fa3ed3..746cbe603f521 100644
--- a/libcxx/test/support/filesystem_test_helper.h
+++ b/libcxx/test/support/filesystem_test_helper.h
@@ -148,7 +148,13 @@ struct scoped_test_env
         std::string cmd = "chmod -R 777 " + test_root.string();
 #endif // defined(__MVS__)
         int ret = std::system(cmd.c_str());
+#if !defined(_AIX)
+        // On AIX the chmod command will return non-zero when trying to set
+        // the permissions on a directory that contains a bad symlink. This triggers
+        // the assert, despite being able to delete everything with the following
+        // `rm -r` command.
         assert(ret == 0);
+#endif
 
         cmd = "rm -rf " + test_root.string();
         ret = std::system(cmd.c_str());

From fc08d1c2940609d26a534d7a12e6c6a528891830 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Tue, 25 Jan 2022 00:09:36 +0900
Subject: [PATCH 397/946] [mlir][tensor][bufferize] Support tensor.rank in
 BufferizableOpInterfaceImpl

This is the only op that is not supported via BufferizableOpInterfaceImpl bufferization. Once this op is supported we can switch `tensor-bufferize` over to the new unified bufferization.

Differential Revision: https://reviews.llvm.org/D117985
---
 .../ModuleBufferization.cpp                   | 20 ++++++++-----
 .../BufferizableOpInterfaceImpl.cpp           | 30 +++++++++++++++++++
 .../comprehensive-module-bufferize.mlir       | 11 +++++++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
index f4a2a5d692152..0fe79862a69d0 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -457,16 +457,22 @@ static LogicalResult bufferizeFuncOpBoundary(FuncOp funcOp,
     Value memref = frontBlock.addArgument(memrefType, bbArg.getLoc());
     OpBuilder b(funcOp->getContext());
     b.setInsertionPointToStart(&frontBlock);
-    // Replace all uses of bbArg through a ToMemRefOp by a memref::CastOp.
+    // Replace all uses of bbArg through a ToMemRefOp.
     for (auto &use : llvm::make_early_inc_range(bbArg.getUses())) {
       if (auto toMemrefOp =
               dyn_cast(use.getOwner())) {
-        assert(memref::CastOp::areCastCompatible(
-                   memref.getType(), toMemrefOp.memref().getType()) &&
-               "bufferizeFuncOpBoundary: cast incompatible");
-        auto castOp = b.create(
-            funcOp.getLoc(), toMemrefOp.memref().getType(), memref);
-        toMemrefOp.memref().replaceAllUsesWith(castOp);
+        if (memref.getType() != toMemrefOp.memref().getType()) {
+          // Type has changed, insert a cast.
+          assert(memref::CastOp::areCastCompatible(
+                     memref.getType(), toMemrefOp.memref().getType()) &&
+                 "bufferizeFuncOpBoundary: cast incompatible");
+          auto castOp = b.create(
+              funcOp.getLoc(), toMemrefOp.memref().getType(), memref);
+          toMemrefOp.memref().replaceAllUsesWith(castOp);
+        } else {
+          // Type did not change, replace directly.
+          toMemrefOp.memref().replaceAllUsesWith(memref);
+        }
       }
     }
     // Replace all remaining uses by a to_tensor.
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 03fa45f04c68f..ea9d885736f90 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -463,6 +463,35 @@ struct InsertSliceOpInterface
   }
 };
 
+/// Bufferization of tensor.rank. Replace with memref.rank.
+struct RankOpInterface
+    : public BufferizableOpInterface::ExternalModel {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const BufferizationState &state) const {
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    return false;
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    return OpResult();
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationState &state) const {
+    auto rankOp = cast(op);
+    Value v = *state.getBuffer(rewriter, rankOp->getOpOperand(0) /*source*/);
+    replaceOpWithNewBufferizedOp(rewriter, op, rankOp.getType(),
+                                                 v);
+    return success();
+  }
+};
+
 } // namespace
 } // namespace tensor
 } // namespace mlir
@@ -475,4 +504,5 @@ void mlir::tensor::registerBufferizableOpInterfaceExternalModels(
   registry.addOpInterface();
   registry.addOpInterface();
   registry.addOpInterface();
+  registry.addOpInterface();
 }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index a739fc4645ed0..1f301a14c11e1 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1348,3 +1348,14 @@ func @write_after_select_read_one(
   // CHECK: return %[[f]], %[[select]]
   return %f, %w : f32, tensor
 }
+
+// -----
+
+// CHECK-LABEL: func @tensor_rank(
+//  CHECK-SAME:     %[[arg0:.*]]: memref<*xf32>
+func @tensor_rank(%arg0: tensor<*xf32>) -> index {
+  // CHECK: %[[r:.*]] = memref.rank %[[arg0]]
+  %0 = tensor.rank %arg0 : tensor<*xf32>
+  // CHECK: return %[[r]] : index
+  return %0 : index
+}

From 4d53f88d1a18e288362e1077ae09c98c843593ba Mon Sep 17 00:00:00 2001
From: Valentin Clement 
Date: Mon, 24 Jan 2022 16:32:04 +0100
Subject: [PATCH 398/946] [flang] Add MemoryAllocation pass to the pipeline

Add the MemoryAllocation pass into the pipeline. Add
the possibilty to pass the options directly within the tool (tco).

This patch is part of the upstreaming effort from fir-dev branch.

Reviewed By: jeanPerier

Differential Revision: https://reviews.llvm.org/D117886
---
 .../flang/Optimizer/Transforms/Passes.h       |  2 +
 flang/include/flang/Tools/CLOptions.inc       |  8 ++++
 .../Optimizer/Transforms/MemoryAllocation.cpp | 39 +++++++++++++++++--
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 2e273163ebfc4..4c13572386447 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -35,6 +35,8 @@ std::unique_ptr createExternalNameConversionPass();
 std::unique_ptr createMemDataFlowOptPass();
 std::unique_ptr createPromoteToAffinePass();
 std::unique_ptr createMemoryAllocationPass();
+std::unique_ptr
+createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize);
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 1c85075d5cc17..1be3d59dde490 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -87,6 +87,13 @@ inline void addAVC(mlir::PassManager &pm) {
       pm, disableFirAvc, fir::createArrayValueCopyPass);
 }
 
+inline void addMemoryAllocationOpt(mlir::PassManager &pm) {
+  addNestedPassConditionally(pm, disableFirMao, [&]() {
+    return fir::createMemoryAllocationPass(
+        dynamicArrayStackToHeapAllocation, arrayStackAllocationThreshold);
+  });
+}
+
 #if !defined(FLANG_EXCLUDE_CODEGEN)
 inline void addCodeGenRewritePass(mlir::PassManager &pm) {
   addPassConditionally(
@@ -121,6 +128,7 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) {
   fir::addAVC(pm);
   pm.addNestedPass(fir::createCharacterConversionPass());
   pm.addPass(mlir::createCanonicalizerPass(config));
+  fir::addMemoryAllocationOpt(pm);
 
   // The default inliner pass adds the canonicalizer pass with the default
   // configuration. Create the inliner pass with tco config.
diff --git a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
index 4c0144f757186..83c77d6895841 100644
--- a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
+++ b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
@@ -21,7 +21,7 @@
 #define DEBUG_TYPE "flang-memory-allocation-opt"
 
 // Number of elements in an array does not determine where it is allocated.
-static constexpr std::size_t UnlimitedArraySize = ~static_cast(0);
+static constexpr std::size_t unlimitedArraySize = ~static_cast(0);
 
 namespace {
 struct MemoryAllocationOptions {
@@ -32,7 +32,7 @@ struct MemoryAllocationOptions {
   // Number of elements in array threshold for moving to heap. In environments
   // with limited stack size, moving large arrays to the heap can avoid running
   // out of stack space.
-  std::size_t maxStackArraySize = UnlimitedArraySize;
+  std::size_t maxStackArraySize = unlimitedArraySize;
 };
 
 class ReturnAnalysis {
@@ -150,13 +150,36 @@ class AllocaOpConversion : public mlir::OpRewritePattern {
 class MemoryAllocationOpt
     : public fir::MemoryAllocationOptBase {
 public:
+  MemoryAllocationOpt() {
+    // Set options with default values. (See Passes.td.) Note that the
+    // command-line options, e.g. dynamicArrayOnHeap,  are not set yet.
+    options = {dynamicArrayOnHeap, maxStackArraySize};
+  }
+
+  MemoryAllocationOpt(bool dynOnHeap, std::size_t maxStackSize) {
+    // Set options with default values. (See Passes.td.)
+    options = {dynOnHeap, maxStackSize};
+  }
+
+  /// Override `options` if command-line options have been set.
+  inline void useCommandLineOptions() {
+    if (dynamicArrayOnHeap)
+      options.dynamicArrayOnHeap = dynamicArrayOnHeap;
+    if (maxStackArraySize != unlimitedArraySize)
+      options.maxStackArraySize = maxStackArraySize;
+  }
+
   void runOnOperation() override {
     auto *context = &getContext();
     auto func = getOperation();
     mlir::OwningRewritePatternList patterns(context);
     mlir::ConversionTarget target(*context);
-    MemoryAllocationOptions options = {dynamicArrayOnHeap.getValue(),
-                                       maxStackArraySize.getValue()};
+
+    useCommandLineOptions();
+    LLVM_DEBUG(llvm::dbgs()
+               << "dynamic arrays on heap: " << options.dynamicArrayOnHeap
+               << "\nmaximum number of elements of array on stack: "
+               << options.maxStackArraySize << '\n');
 
     // If func is a declaration, skip it.
     if (func.empty())
@@ -178,9 +201,17 @@ class MemoryAllocationOpt
       signalPassFailure();
     }
   }
+
+private:
+  MemoryAllocationOptions options;
 };
 } // namespace
 
 std::unique_ptr fir::createMemoryAllocationPass() {
   return std::make_unique();
 }
+
+std::unique_ptr
+fir::createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize) {
+  return std::make_unique(dynOnHeap, maxStackSize);
+}

From 3ad35ba4dea5240dd58476f0c85f0fe096d6c7ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= 
Date: Wed, 10 Nov 2021 15:03:06 +0100
Subject: [PATCH 399/946] [Templight] Don't display empty strings for names of
 unnamed template parameters

Patch originally by oktal3000: https://github.com/mikael-s-persson/templight/pull/40

When a template parameter is unnamed, the name of -templight-dump might return
an empty string. This is fine, they are unnamed after all, but it might be more
user friendly to at least describe what entity is unnamed.

Differential Revision: https://reviews.llvm.org/D115521
---
 clang/lib/Frontend/FrontendActions.cpp        |  95 ++++-
 .../Templight/templight-empty-entries-fix.cpp | 333 ++++++++++++++++++
 2 files changed, 415 insertions(+), 13 deletions(-)
 create mode 100644 clang/test/Templight/templight-empty-entries-fix.cpp

diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 5b77c3e01aace..ad2e6039477f8 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -8,9 +8,10 @@
 
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/AST/ASTConsumer.h"
+#include "clang/AST/Decl.h"
 #include "clang/Basic/FileManager.h"
-#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/LangStandard.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/ASTConsumers.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -23,6 +24,7 @@
 #include "clang/Sema/TemplateInstCallback.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTWriter.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -480,25 +482,92 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback {
     Out << "---" << YAML << "\n";
   }
 
+  static void printEntryName(const Sema &TheSema, const Decl *Entity,
+                             llvm::raw_string_ostream &OS) {
+    auto *NamedTemplate = cast(Entity);
+
+    PrintingPolicy Policy = TheSema.Context.getPrintingPolicy();
+    // FIXME: Also ask for FullyQualifiedNames?
+    Policy.SuppressDefaultTemplateArgs = false;
+    NamedTemplate->getNameForDiagnostic(OS, Policy, true);
+
+    if (!OS.str().empty())
+      return;
+
+    Decl *Ctx = Decl::castFromDeclContext(NamedTemplate->getDeclContext());
+    NamedDecl *NamedCtx = dyn_cast_or_null(Ctx);
+
+    if (const auto *Decl = dyn_cast(NamedTemplate)) {
+      if (const auto *R = dyn_cast(Decl)) {
+        if (R->isLambda()) {
+          OS << "lambda at ";
+          Decl->getLocation().print(OS, TheSema.getSourceManager());
+          return;
+        }
+      }
+      OS << "unnamed " << Decl->getKindName();
+      return;
+    }
+
+    if (const auto *Decl = dyn_cast(NamedTemplate)) {
+      OS << "unnamed function parameter " << Decl->getFunctionScopeIndex()
+         << " ";
+      if (Decl->getFunctionScopeDepth() > 0)
+        OS << "(at depth " << Decl->getFunctionScopeDepth() << ") ";
+      OS << "of ";
+      NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+      return;
+    }
+
+    if (const auto *Decl = dyn_cast(NamedTemplate)) {
+      if (const Type *Ty = Decl->getTypeForDecl()) {
+        if (const auto *TTPT = dyn_cast_or_null(Ty)) {
+          OS << "unnamed template type parameter " << TTPT->getIndex() << " ";
+          if (TTPT->getDepth() > 0)
+            OS << "(at depth " << TTPT->getDepth() << ") ";
+          OS << "of ";
+          NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+          return;
+        }
+      }
+    }
+
+    if (const auto *Decl = dyn_cast(NamedTemplate)) {
+      OS << "unnamed template non-type parameter " << Decl->getIndex() << " ";
+      if (Decl->getDepth() > 0)
+        OS << "(at depth " << Decl->getDepth() << ") ";
+      OS << "of ";
+      NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+      return;
+    }
+
+    if (const auto *Decl = dyn_cast(NamedTemplate)) {
+      OS << "unnamed template template parameter " << Decl->getIndex() << " ";
+      if (Decl->getDepth() > 0)
+        OS << "(at depth " << Decl->getDepth() << ") ";
+      OS << "of ";
+      NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+      return;
+    }
+
+    llvm_unreachable("Failed to retrieve a name for this entry!");
+    OS << "unnamed identifier";
+  }
+
   template 
   static TemplightEntry getTemplightEntry(const Sema &TheSema,
                                           const CodeSynthesisContext &Inst) {
     TemplightEntry Entry;
     Entry.Kind = toString(Inst.Kind);
     Entry.Event = BeginInstantiation ? "Begin" : "End";
-    if (auto *NamedTemplate = dyn_cast_or_null(Inst.Entity)) {
-      llvm::raw_string_ostream OS(Entry.Name);
-      PrintingPolicy Policy = TheSema.Context.getPrintingPolicy();
-      // FIXME: Also ask for FullyQualifiedNames?
-      Policy.SuppressDefaultTemplateArgs = false;
-      NamedTemplate->getNameForDiagnostic(OS, Policy, true);
-      const PresumedLoc DefLoc =
+    llvm::raw_string_ostream OS(Entry.Name);
+    printEntryName(TheSema, Inst.Entity, OS);
+    const PresumedLoc DefLoc =
         TheSema.getSourceManager().getPresumedLoc(Inst.Entity->getLocation());
-      if(!DefLoc.isInvalid())
-        Entry.DefinitionLocation = std::string(DefLoc.getFilename()) + ":" +
-                                   std::to_string(DefLoc.getLine()) + ":" +
-                                   std::to_string(DefLoc.getColumn());
-    }
+    if (!DefLoc.isInvalid())
+      Entry.DefinitionLocation = std::string(DefLoc.getFilename()) + ":" +
+                                 std::to_string(DefLoc.getLine()) + ":" +
+                                 std::to_string(DefLoc.getColumn());
     const PresumedLoc PoiLoc =
         TheSema.getSourceManager().getPresumedLoc(Inst.PointOfInstantiation);
     if (!PoiLoc.isInvalid()) {
diff --git a/clang/test/Templight/templight-empty-entries-fix.cpp b/clang/test/Templight/templight-empty-entries-fix.cpp
new file mode 100644
index 0000000000000..9154e56fa24ab
--- /dev/null
+++ b/clang/test/Templight/templight-empty-entries-fix.cpp
@@ -0,0 +1,333 @@
+// RUN: %clang_cc1 -templight-dump -Wno-unused-value %s 2>&1 | FileCheck %s
+
+void a() {
+  [] {};
+}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'lambda at .*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'lambda at .*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+
+template  void a() { a(); }
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a<0>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a<0>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+
+template  struct b { typedef int c; };
+template ::c> void a() { a(); }
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template type parameter 1 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:32'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template type parameter 1 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:32'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+
+template  void d(int = 0) { d(); }
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'unnamed function parameter 0 of d'$}}
+// CHECK: {{^kind:[ ]+DefaultFunctionArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:35'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'unnamed function parameter 0 of d'$}}
+// CHECK: {{^kind:[ ]+DefaultFunctionArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:35'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+
+void e() {
+  struct {
+  } f;
+}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+
+
+template  class>
+void d();
+
+template  struct C;
+
+void foo() {
+  d();
+}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template template parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+PriorTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:265:35'$}}
+// CHECK: {{^poi:[ ]+''$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template template parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+PriorTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:265:35'$}}
+// CHECK: {{^poi:[ ]+''$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}

From c30d2893a43d20b587d0abd288472adfa2c8672b Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Tue, 25 Jan 2022 00:34:11 +0900
Subject: [PATCH 400/946] [mlir][bufferize] Change insertion point for
 ToTensorOps

Both insertion points are valid. This is to make BufferizableOpInteface-based bufferization compatible with existing partial bufferization test cases. (So less changes are necessary to unit tests.)

Differential Revision: https://reviews.llvm.org/D117986
---
 .../Dialect/Bufferization/IR/BufferizableOpInterface.cpp    | 2 +-
 .../Dialect/Linalg/comprehensive-function-bufferize.mlir    | 2 +-
 .../Linalg/comprehensive-module-bufferize-alloca.mlir       | 6 +++---
 .../test/Dialect/Linalg/comprehensive-module-bufferize.mlir | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index e565f41a39d5a..9cb99db16d6a7 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -302,7 +302,7 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,
       // The existing uses of the OpResult still expect a tensor. Insert a
       // ToTensorOp. Throughout bufferization, this ToTensorOp will gradually
       // loose all of its users and eventually DCE away.
-      setInsertionPointAfter(rewriter, replacement);
+      rewriter.setInsertionPointAfter(op);
       replacement = rewriter.create(
           replacement.getLoc(), replacement);
     }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
index 609a0df7a7cb6..1a3b266ee4b80 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
@@ -30,9 +30,9 @@ func @return_tensor(%A : tensor, %v : vector<4xf32>) -> (tensor) {
   // CHECK: %[[dim:.*]] = tensor.dim %[[A]]
   // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]])
   // CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
-  // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[casted]]
   // CHECK: memref.copy %[[A_memref]], %[[alloc]]
   // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
+  // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[casted]]
   %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor
 
   // CHECK: return %[[res_tensor]]
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir
index 991429cb18cb0..8b4db05f8251e 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir
@@ -34,16 +34,16 @@ func @main() {
   // CHECK-NEXT:   %[[A:.*]] = memref.alloca() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[B:.*]] = memref.alloca() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[C:.*]] = memref.alloca() {alignment = 128 : i64} : memref
+  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref to memref
   %A = linalg.init_tensor [64] : tensor<64xf32>
   %B = linalg.init_tensor [64] : tensor<64xf32>
   %C = linalg.init_tensor [] : tensor
 
   // CHECK-NEXT:   linalg.fill(%[[C1]], %[[A]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C2]], %[[B]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C0]], %[[C]]) : f32, memref
-  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref to memref
   %AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32>
   %BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32>
   %CC = linalg.fill(%v0, %C) : f32, tensor -> tensor
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 1f301a14c11e1..28ee8bea2e9ec 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -597,16 +597,16 @@ func @main() {
   // CHECK-NEXT:   %[[A:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref
+  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref to memref
   %A = linalg.init_tensor [64] : tensor<64xf32>
   %B = linalg.init_tensor [64] : tensor<64xf32>
   %C = linalg.init_tensor [] : tensor
 
   // CHECK-NEXT:   linalg.fill(%[[C1]], %[[A]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C2]], %[[B]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C0]], %[[C]]) : f32, memref
-  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref to memref
   %AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32>
   %BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32>
   %CC = linalg.fill(%v0, %C) : f32, tensor -> tensor

From 217570b03bbe810e6d4183aee72637ae5c326fbc Mon Sep 17 00:00:00 2001
From: Lorenzo Chelini 
Date: Sun, 23 Jan 2022 16:13:46 +0100
Subject: [PATCH 401/946] [MLIR][OpenMP] Suppress -Wreturn-type warnings (NFC)

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp     | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d1f261e52bf2e..07fdc2de08a91 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -198,6 +198,7 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
   case omp::ClauseProcBindKind::spread:
     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
   }
+  llvm_unreachable("Unknown ClauseProcBindKind kind");
 }
 
 /// Converts the OpenMP parallel operation to LLVM IR.
@@ -891,7 +892,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
-// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
+/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
 llvm::AtomicOrdering
 convertAtomicOrdering(Optional ao) {
   if (!ao)
@@ -909,9 +910,10 @@ convertAtomicOrdering(Optional ao) {
   case omp::ClauseMemoryOrderKind::relaxed:
     return llvm::AtomicOrdering::Monotonic;
   }
+  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
 }
 
-// Convert omp.atomic.read operation to LLVM IR.
+/// Convert omp.atomic.read operation to LLVM IR.
 static LogicalResult
 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
                      LLVM::ModuleTranslation &moduleTranslation) {

From c1335166b2659b02784b9dfb562c6b8b1c746407 Mon Sep 17 00:00:00 2001
From: Hans Wennborg 
Date: Mon, 24 Jan 2022 16:50:49 +0100
Subject: [PATCH 402/946] Don't run test/ClangScanDeps/modules-symlink.c on
 Windows

'ln -s' isn't Windows friendly.
---
 clang/test/ClangScanDeps/modules-symlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/test/ClangScanDeps/modules-symlink.c b/clang/test/ClangScanDeps/modules-symlink.c
index 1a2fe2d9f5123..46831b0a3fc00 100644
--- a/clang/test/ClangScanDeps/modules-symlink.c
+++ b/clang/test/ClangScanDeps/modules-symlink.c
@@ -1,5 +1,6 @@
 // RUN: rm -rf %t
 // RUN: split-file %s %t
+// UNSUPPORTED: system-windows
 
 //--- cdb_pch.json
 [

From cd2a9ff39788578f419d41f32d046150462696e2 Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Mon, 24 Jan 2022 07:54:59 -0800
Subject: [PATCH 403/946] [RISCV] Select int_riscv_vsll with shift of 1 to
 vadd.vv.

Add might be faster than shift. We can't do this earlier without
using a Freeze instruction.

This is the intrinsic version of D106689.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D118013
---
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    | 24 ++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll      | 32 +++++++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll      | 32 +++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 798f848a50b7a..3d4864003b518 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4543,6 +4543,30 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsrl", "PseudoVSRL", AllIntegerVectors,
 defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
                             uimm5>;
 
+foreach vti = AllIntegerVectors in {
+  // Emit shift by 1 as an add since it might be faster.
+  def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$rs1),
+                                        (XLenVT 1), VLOpFrag)),
+            (!cast("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
+                                                              vti.RegClass:$rs1,
+                                                              GPR:$vl,
+                                                              vti.Log2SEW)>;
+  def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
+                                             (vti.Vector vti.RegClass:$rs1),
+                                             (XLenVT 1),
+                                             (vti.Mask V0),
+                                             VLOpFrag,
+                                             (XLenVT timm:$policy))),
+            (!cast("PseudoVADD_VV_"#vti.LMul.MX#"_MASK")
+                                                        vti.RegClass:$merge,
+                                                        vti.RegClass:$rs1,
+                                                        vti.RegClass:$rs1,
+                                                        (vti.Mask V0),
+                                                        GPR:$vl,
+                                                        vti.Log2SEW,
+                                                        (XLenVT timm:$policy))>;
+}
+
 //===----------------------------------------------------------------------===//
 // 12.7. Vector Narrowing Integer Right Shift Instructions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
index 3f555dba39c5f..cd141520ce540 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
@@ -2000,6 +2000,21 @@ entry:
   ret  %a
 }
 
+define  @intrinsic_vsll_1_nxv1i8_nxv1i8_i8( %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsll_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    ret
+entry:
+  %a = call  @llvm.riscv.vsll.nxv1i8(
+     %0,
+    i32 1,
+    i32 %1)
+
+  ret  %a
+}
+
 define  @intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8( %0,  %1,  %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8:
 ; CHECK:       # %bb.0: # %entry
@@ -2017,6 +2032,23 @@ entry:
   ret  %a
 }
 
+define  @intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8( %0,  %1,  %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v9, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call  @llvm.riscv.vsll.mask.nxv1i8(
+     %0,
+     %1,
+    i32 1,
+     %2,
+    i32 %3, i32 1)
+
+  ret  %a
+}
+
 define  @intrinsic_vsll_vi_nxv2i8_nxv2i8_i8( %0, i32 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_vi_nxv2i8_nxv2i8_i8:
 ; CHECK:       # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
index 8aa798d3b9e67..305f8712b0f43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
@@ -2000,6 +2000,21 @@ entry:
   ret  %a
 }
 
+define  @intrinsic_vsll_1_nxv1i8_nxv1i8_i8( %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsll_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    ret
+entry:
+  %a = call  @llvm.riscv.vsll.nxv1i8(
+     %0,
+    i64 1,
+    i64 %1)
+
+  ret  %a
+}
+
 define  @intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8( %0,  %1,  %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8:
 ; CHECK:       # %bb.0: # %entry
@@ -2017,6 +2032,23 @@ entry:
   ret  %a
 }
 
+define  @intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8( %0,  %1,  %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v9, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call  @llvm.riscv.vsll.mask.nxv1i8(
+     %0,
+     %1,
+    i64 1,
+     %2,
+    i64 %3, i64 1)
+
+  ret  %a
+}
+
 define  @intrinsic_vsll_vi_nxv2i8_nxv2i8_i8( %0, i64 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_vi_nxv2i8_nxv2i8_i8:
 ; CHECK:       # %bb.0: # %entry

From b8c7cdcc81a04613d01b1f468d510959f1e66416 Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Mon, 24 Jan 2022 08:17:45 -0800
Subject: [PATCH 404/946] [SelectionDAG][RISCV] Teach getNode to fold
 bswap(bswap(x))->x.

This can show up during when bitreverse is expanded to bswap and
swap of bits within a byte. If the input is already a bswap, we
should cancel them out before we further transform them in a way
that makes it harder to see the redundancy.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D118007
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   3 +
 .../RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll | 256 ++++--------------
 2 files changed, 53 insertions(+), 206 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 447bb326e6511..199dee9b0105f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5119,6 +5119,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            "BSWAP types must be a multiple of 16 bits!");
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
+    // bswap(bswap(X)) -> X.
+    if (OpOpcode == ISD::BSWAP)
+      return Operand.getOperand(0);
     break;
   case ISD::BITREVERSE:
     assert(VT.isInteger() && VT == Operand.getValueType() &&
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
index 435ea9c0d80df..3913427d5ccd0 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
@@ -694,13 +694,6 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
 define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a1, a0, 8
-; RV32I-NEXT:    slli a2, a0, 16
-; RV32I-NEXT:    srli a2, a2, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a1, a1, 8
-; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    lui a2, 1
 ; RV32I-NEXT:    addi a2, a2, -241
@@ -726,13 +719,6 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ;
 ; RV64I-LABEL: test_bswap_bitreverse_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a0, 8
-; RV64I-NEXT:    slli a2, a0, 48
-; RV64I-NEXT:    srli a2, a2, 56
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a1, a1, 8
-; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    lui a2, 1
 ; RV64I-NEXT:    addiw a2, a2, -241
@@ -758,16 +744,13 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ;
 ; RV32ZBB-LABEL: test_bswap_bitreverse_i16:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a0, a0, 16
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a1, a0, 12
-; RV32ZBB-NEXT:    lui a2, 15
-; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 1
+; RV32ZBB-NEXT:    addi a2, a2, -241
 ; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    srli a0, a0, 20
-; RV32ZBB-NEXT:    andi a0, a0, -241
-; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
 ; RV32ZBB-NEXT:    srli a1, a0, 2
 ; RV32ZBB-NEXT:    lui a2, 3
 ; RV32ZBB-NEXT:    addi a2, a2, 819
@@ -786,16 +769,13 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ;
 ; RV64ZBB-LABEL: test_bswap_bitreverse_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 48
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 44
-; RV64ZBB-NEXT:    lui a2, 15
-; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 1
+; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 52
-; RV64ZBB-NEXT:    andi a0, a0, -241
-; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 3
 ; RV64ZBB-NEXT:    addiw a2, a2, 819
@@ -819,27 +799,6 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a3, a0, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    slli a3, a0, 8
-; RV32I-NEXT:    lui a4, 4080
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    lui a2, 61681
 ; RV32I-NEXT:    addi a2, a2, -241
@@ -865,27 +824,6 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: test_bswap_bitreverse_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 8
-; RV64I-NEXT:    lui a4, 4080
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a2, a0, 24
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    lui a2, 61681
 ; RV64I-NEXT:    addiw a2, a2, -241
@@ -936,18 +874,12 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ;
 ; RV64ZBB-LABEL: test_bswap_bitreverse_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 32
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    srli a1, a0, 4
 ; RV64ZBB-NEXT:    lui a2, 61681
 ; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 28
-; RV64ZBB-NEXT:    lui a2, 986895
-; RV64ZBB-NEXT:    addiw a2, a2, 240
 ; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    slliw a0, a0, 4
 ; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 209715
@@ -972,155 +904,67 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a2, a1, 8
-; RV32I-NEXT:    lui a3, 16
-; RV32I-NEXT:    addi a3, a3, -256
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a1, 8
-; RV32I-NEXT:    lui a5, 4080
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a4
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a4, a0, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    srli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a4, a0, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    or a0, a0, a2
 ; RV32I-NEXT:    srli a2, a0, 4
-; RV32I-NEXT:    lui a4, 61681
-; RV32I-NEXT:    addi a4, a4, -241
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a0, a0, a3
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a2, a0
 ; RV32I-NEXT:    srli a2, a0, 2
-; RV32I-NEXT:    lui a6, 209715
-; RV32I-NEXT:    addi a6, a6, 819
-; RV32I-NEXT:    and a2, a2, a6
-; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a0, a0, a4
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a2, a0
 ; RV32I-NEXT:    srli a2, a0, 1
-; RV32I-NEXT:    lui a7, 349525
-; RV32I-NEXT:    addi a7, a7, 1365
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    lui a5, 349525
+; RV32I-NEXT:    addi a5, a5, 1365
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    srli a2, a1, 8
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a3, a1, 24
-; RV32I-NEXT:    or a2, a2, a3
-; RV32I-NEXT:    slli a3, a1, 8
-; RV32I-NEXT:    and a3, a3, a5
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a1, a1, a2
 ; RV32I-NEXT:    srli a2, a1, 4
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a1, a1, a3
 ; RV32I-NEXT:    slli a1, a1, 4
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    srli a2, a1, 2
-; RV32I-NEXT:    and a2, a2, a6
-; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
 ; RV32I-NEXT:    slli a1, a1, 2
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    srli a2, a1, 1
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a1, a1, a5
 ; RV32I-NEXT:    slli a1, a1, 1
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bswap_bitreverse_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a1, a0, 24
-; RV64I-NEXT:    lui a2, 4080
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    li a4, 255
-; RV64I-NEXT:    slli a5, a4, 24
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a1, a3, a1
-; RV64I-NEXT:    srli a3, a0, 40
-; RV64I-NEXT:    lui a6, 16
-; RV64I-NEXT:    addiw a6, a6, -256
-; RV64I-NEXT:    and a3, a3, a6
-; RV64I-NEXT:    srli a7, a0, 56
-; RV64I-NEXT:    or a3, a3, a7
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    slli a7, a4, 40
-; RV64I-NEXT:    and a3, a3, a7
-; RV64I-NEXT:    srliw t0, a0, 24
-; RV64I-NEXT:    slli t0, t0, 32
-; RV64I-NEXT:    or a3, a3, t0
-; RV64I-NEXT:    slli t0, a0, 40
-; RV64I-NEXT:    slli a4, a4, 48
-; RV64I-NEXT:    and t0, t0, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, t0
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 40
-; RV64I-NEXT:    and a1, a1, a6
-; RV64I-NEXT:    srli a3, a0, 56
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    srli a3, a0, 24
-; RV64I-NEXT:    and a2, a3, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    or a1, a2, a1
-; RV64I-NEXT:    slli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a7
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    slli a3, a3, 32
-; RV64I-NEXT:    or a2, a2, a3
-; RV64I-NEXT:    slli a3, a0, 40
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    lui a3, %hi(.LCPI9_0)
-; RV64I-NEXT:    ld a3, %lo(.LCPI9_0)(a3)
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    lui a2, %hi(.LCPI9_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI9_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI9_0)(a1)
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_1)
+; RV64I-NEXT:    ld a1, %lo(.LCPI9_1)(a1)
 ; RV64I-NEXT:    slli a0, a0, 4
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI9_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI9_2)(a2)
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_2)
+; RV64I-NEXT:    ld a1, %lo(.LCPI9_2)(a1)
 ; RV64I-NEXT:    slli a0, a0, 2
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bswap_bitreverse_i64:

From b00ee46b5e4bf5f0b5700373ca6302c3c50b10b9 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Tue, 25 Jan 2022 00:51:41 +0900
Subject: [PATCH 405/946] [mlir][bufferize][NFC] Implement
 BufferizableOpInterface on bufferization ops directly

No longer go through an external model. Also put BufferizableOpInterface into the same build target as the BufferizationDialect. This allows for some code reuse between BufferizationOps canonicalizers and BufferizableOpInterface implementations.

Differential Revision: https://reviews.llvm.org/D117987
---
 .../Dialect/Bufferization/IR/Bufferization.h  |   1 +
 .../IR/BufferizationInterfaceImpl.h           |  25 ----
 .../Bufferization/IR/BufferizationOps.td      |  95 ++++++++++--
 .../IR/BufferizationInterfaceImpl.cpp         | 127 ----------------
 .../Bufferization/IR/BufferizationOps.cpp     | 139 +++++++++++-------
 .../Dialect/Bufferization/IR/CMakeLists.txt   |  16 +-
 .../Bufferization/Transforms/CMakeLists.txt   |   1 -
 .../ComprehensiveBufferize/CMakeLists.txt     |  14 +-
 mlir/lib/Dialect/Linalg/IR/CMakeLists.txt     |   2 +-
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   2 +-
 .../Transforms/ComprehensiveBufferizePass.cpp |   2 -
 .../SparseTensor/Transforms/CMakeLists.txt    |   2 +-
 .../Dialect/Tensor/Transforms/CMakeLists.txt  |   2 +-
 mlir/test/lib/Dialect/Linalg/CMakeLists.txt   |   2 +-
 .../Linalg/TestComprehensiveBufferize.cpp     |   2 -
 .../llvm-project-overlay/mlir/BUILD.bazel     |  51 ++-----
 .../mlir/test/BUILD.bazel                     |   1 -
 17 files changed, 198 insertions(+), 286 deletions(-)
 delete mode 100644 mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h
 delete mode 100644 mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
index 21aeb91ff2290..2cbfc901f239b 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
@@ -10,6 +10,7 @@
 #define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATION_H_
 
 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h
deleted file mode 100644
index 7b903b59f1769..0000000000000
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- BufferizationInterfaceImpl.h - Bufferization Impl. of Op Interface -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATIONINTERFACEIMPL_H_
-#define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATIONINTERFACEIMPL_H_
-
-namespace mlir {
-
-class DialectRegistry;
-
-namespace bufferization {
-namespace bufferization_ext {
-
-void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry);
-
-} // namespace bufferization_ext
-} // namespace bufferization
-} // namespace mlir
-
-#endif // MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATIONINTERFACEIMPL_H_
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
index 9b977a7d250f5..1a76f8b3eea00 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -10,6 +10,7 @@
 #define BUFFERIZATION_OPS
 
 include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.td"
+include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
 include "mlir/Dialect/Bufferization/IR/BufferizationBase.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/CopyOpInterface.td"
@@ -64,11 +65,14 @@ def Bufferization_CloneOp : Bufferization_Op<"clone", [
 // ToTensorOp
 //===----------------------------------------------------------------------===//
 
-def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
-     TypesMatchWith<"result type matches tensor equivalent of 'memref'",
-                    "memref", "result",
-                    "memref::getTensorTypeFromMemRefType($_self)">]> {
+def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [
+    BufferizableOpInterface,
+    SameOperandsAndResultShape,
+    SameOperandsAndResultElementType,
+    TypesMatchWith<"result type matches tensor equivalent of 'memref'",
+                   "memref", "result",
+                   "memref::getTensorTypeFromMemRefType($_self)">
+  ]> {
   let summary = "memref to tensor operation";
   let description = [{
     Create a tensor from a memref, making an independent copy of the element
@@ -110,6 +114,35 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor",
         return resultType.cast();
       return {};
     }
+
+    //===------------------------------------------------------------------===//
+    // BufferizableOpInterface implementation
+    //===------------------------------------------------------------------===//
+
+    // ToTensorOp conceptually loads a tensor from a memory location. The
+    // One-Shot analysis has no information about the memref that is loaded from
+    // by ToTensorOp. We have to assume that the loaded tensor may after
+    // bufferization potentially alias with any other bufferized tensor. Since
+    // ToTensorOp and ToMemrefOp have no aliasing OpOperand/OpResult pairs, this
+    // cannot be encoded directly in the analysis. However, declaring ToTensorOp
+    // results as not writable enforces a buffer copy and has the same effect.
+
+    LogicalResult bufferize(RewriterBase &rewriter,
+                            const BufferizationState &state) const {
+      // to_tensor cannot be bufferized. However, other ops that are using
+      // to_tensor's result will eventually be bufferized. At that point, they
+      // will start using to_tensor's memref operand. Once all users of
+      // to_tensor are bufferized, the op will not have any users anymore and
+      // DCE away. In case of partial bufferization, to_memref(to_tensor(x))
+      // constructs may be left over. These are folded by the canonicalizer or
+      // FinalizingBufferize.
+      return failure();
+    }
+
+    bool isWritable(Value value, const BufferizationState &state) const {
+      // It is unknown whether the memref operand is writable or not.
+      return false;
+    }
   }];
 
   let assemblyFormat = "$memref attr-dict `:` type($memref)";
@@ -123,11 +156,15 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor",
 // ToMemrefOp
 //===----------------------------------------------------------------------===//
 
-def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType, NoSideEffect,
-     TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
-                    "memref", "tensor",
-                    "memref::getTensorTypeFromMemRefType($_self)">]> {
+def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref", [
+    BufferizableOpInterface,
+    SameOperandsAndResultShape,
+    SameOperandsAndResultElementType,
+    NoSideEffect,
+    TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
+                   "memref", "tensor",
+                   "memref::getTensorTypeFromMemRefType($_self)">
+  ]> {
   let summary = "tensor to memref cast operation";
   let description = [{
     Casts a tensor to a memref.
@@ -150,6 +187,44 @@ def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref",
   // This op is fully verified by traits.
   let verifier = ?;
 
+  let extraClassDeclaration = [{
+    //===------------------------------------------------------------------===//
+    // BufferizableOpInterface implementation
+    //===------------------------------------------------------------------===//
+
+    // Note: ToMemrefOp / ToTensorOp are temporary ops that are inserted at the
+    // bufferization boundary. When One-Shot bufferization is complete, there
+    // should be no such ops left over. If `allowUnknownOps` (or after running a
+    // partial bufferization pass), such ops may be part of the resulting IR,
+    // but such IR may no longer be analyzable by One-Shot analysis.
+
+    bool bufferizesToMemoryRead(OpOperand &opOperand,
+                                const BufferizationState &state) const {
+      // It is unknown whether the resulting memref will be read or not.
+      return true;
+    }
+
+    bool bufferizesToMemoryWrite(OpOperand &opOperand,
+                                 const BufferizationState &state) const {
+      // It is unknown whether the resulting MemRef will be written or not.
+      return true;
+    }
+
+    bool mustBufferizeInPlace(OpOperand &opOperand,
+                              const BufferizationState &state) const {
+      // ToMemrefOps always bufferize inplace.
+      return true;
+    }
+
+    OpResult getAliasingOpResult(OpOperand &opOperand,
+                                 const BufferizationState &state) const {
+      return OpResult();
+    }
+
+    LogicalResult bufferize(RewriterBase &rewriter,
+                            const BufferizationState &state);
+  }];
+
   let assemblyFormat = "$tensor attr-dict `:` type($memref)";
 
   let hasFolder = 1;
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp
deleted file mode 100644
index 835a153eb8548..0000000000000
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-//===- BufferizationInterfaceImpl.cpp - Bufferization Impl. of Interface --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h"
-#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
-#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Operation.h"
-
-using namespace mlir;
-using namespace mlir::bufferization;
-
-namespace mlir {
-namespace bufferization {
-namespace bufferization_ext {
-
-// TODO: These ops should implement BufferizableOpInterface.
-
-/// Bufferization of bufferization.to_memref. to_memref(to_tensor(x)) is folded
-/// to x. Other to_memref ops are ignored during bufferization.
-///
-/// ToMemrefOp casts a tensor into a memref. The resulting memref is the memory
-/// location of the incoming tensor once it will be bufferized. In the anlysis,
-/// the incoming tensor is assumed to bufferize to a memory read and to an
-/// inplace memory write, since it is unknown what will happen to the resulting
-/// memref.
-///
-/// Note: ToMemrefOp / ToTensorOp are temporary ops that are inserted at the
-/// bufferization boundary. When bufferization is complete, there should be no
-/// such ops left over. If `allowUnknownOps`, such ops may be part of the
-/// resulting IR, but such IR may no longer be bufferizable by Comprehensive
-/// Bufferize.
-struct ToMemrefOpInterface
-    : public BufferizableOpInterface::ExternalModel {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
-                              const BufferizationState &state) const {
-    // It is unknown whether the resulting memref will be read or not.
-    return true;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
-                               const BufferizationState &state) const {
-    // It is unknown whether the resulting MemRef will be written or not.
-    return true;
-  }
-
-  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
-                            const BufferizationState &state) const {
-    // ToMemrefOps always bufferize inplace.
-    return true;
-  }
-
-  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
-                               const BufferizationState &state) const {
-    return OpResult();
-  }
-
-  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
-                          const BufferizationState &state) const {
-    auto toMemrefOp = cast(op);
-
-    // Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
-    if (auto toTensorOp =
-            toMemrefOp.tensor().getDefiningOp()) {
-      Value buffer = toTensorOp.memref();
-
-      // Insert cast in case to_memref(to_tensor(x))'s type is different from
-      // x's type.
-      if (toTensorOp.memref().getType() != toMemrefOp.getType()) {
-        assert(memref::CastOp::areCastCompatible(buffer.getType(),
-                                                 toMemrefOp.getType()) &&
-               "ToMemrefOp::bufferize : cast incompatible");
-        buffer = rewriter.create(toMemrefOp.getLoc(), buffer,
-                                                 toMemrefOp.getType());
-      }
-      replaceOpWithBufferizedValues(rewriter, toMemrefOp, buffer);
-      return success();
-    }
-
-    return failure();
-  }
-};
-
-/// Bufferization of bufferization.to_tensor. Such ops cannot be bufferized.
-/// However, other ops that are using to_tensor's result will eventually be
-/// bufferized. At that point, they will start using to_tensor's memref operand.
-/// Once all users of to_tensor are bufferized, the op will not have any users
-/// anymore and DCE away.
-///
-/// ToTensorOp conceptually loads a tensor from a memory location. The analysis
-/// has no information about the memref that is loaded from by ToTensorOp. We
-/// have to assume that the loaded tensor may after bufferization potentially
-/// alias with any other bufferized tensor. Since ToTensorOp and ToMemrefOp have
-/// no aliasing OpOperand/OpResult pairs, this cannot be encoded directly in the
-/// analysis. However, declaring ToTensorOp results as not writable enforces a
-/// buffer copy and has the same effect.
-struct ToTensorOpInterface
-    : public BufferizableOpInterface::ExternalModel {
-  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
-                          const BufferizationState &state) const {
-    return failure();
-  }
-
-  bool isWritable(Operation *op, Value value,
-                  const BufferizationState &state) const {
-    // It is unknown whether the memref operand is writable or not.
-    return false;
-  }
-};
-
-} // namespace bufferization_ext
-} // namespace bufferization
-} // namespace mlir
-
-void bufferization_ext::registerBufferizableOpInterfaceExternalModels(
-    DialectRegistry ®istry) {
-  registry.addOpInterface();
-  registry.addOpInterface();
-}
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index f81d726108d15..93770c9da5aa6 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -182,6 +182,79 @@ struct ToMemrefOfCast : public OpRewritePattern {
   }
 };
 
+/// Try to fold to_memref(to_tensor(x)). If x's type and the result type of the
+/// to_memref op are different, a memref.cast is needed.
+static LogicalResult foldToMemrefToTensorPair(RewriterBase &rewriter,
+                                              ToMemrefOp toMemref,
+                                              bool allowSameType = true) {
+  auto memrefToTensor = toMemref.tensor().getDefiningOp();
+  if (!memrefToTensor)
+    return failure();
+
+  // A memref_to_tensor + tensor_to_memref with same types can be folded without
+  // inserting a cast.
+  if (memrefToTensor.memref().getType() == toMemref.getType()) {
+    if (!allowSameType)
+      // Function can be configured to only handle cases where a cast is needed.
+      return failure();
+    rewriter.replaceOp(toMemref, memrefToTensor.memref());
+    return success();
+  }
+
+  // If types are definitely not cast-compatible, bail.
+  if (!memref::CastOp::areCastCompatible(memrefToTensor.memref().getType(),
+                                         toMemref.getType()))
+    return failure();
+
+  // We already know that the types are potentially cast-compatible. However
+  // in case the affine maps are different, we may need to use a copy if we go
+  // from dynamic to static offset or stride (the canonicalization cannot know
+  // at this point that it is really cast compatible).
+  auto isGuaranteedCastCompatible = [](MemRefType source, MemRefType target) {
+    int64_t sourceOffset, targetOffset;
+    SmallVector sourceStrides, targetStrides;
+    if (failed(getStridesAndOffset(source, sourceStrides, sourceOffset)) ||
+        failed(getStridesAndOffset(target, targetStrides, targetOffset)))
+      return false;
+    auto dynamicToStatic = [](int64_t a, int64_t b) {
+      return a == MemRefType::getDynamicStrideOrOffset() &&
+             b != MemRefType::getDynamicStrideOrOffset();
+    };
+    if (dynamicToStatic(sourceOffset, targetOffset))
+      return false;
+    for (auto it : zip(sourceStrides, targetStrides))
+      if (dynamicToStatic(std::get<0>(it), std::get<1>(it)))
+        return false;
+    return true;
+  };
+
+  auto memrefToTensorType =
+      memrefToTensor.memref().getType().dyn_cast();
+  auto toMemrefType = toMemref.getType().dyn_cast();
+  if (memrefToTensorType && toMemrefType &&
+      !isGuaranteedCastCompatible(memrefToTensorType, toMemrefType)) {
+    MemRefType resultType = toMemrefType;
+    auto loc = toMemref.getLoc();
+    SmallVector dynamicOperands;
+    for (int i = 0; i < resultType.getRank(); ++i) {
+      if (resultType.getShape()[i] != ShapedType::kDynamicSize)
+        continue;
+      auto index = rewriter.createOrFold(loc, i);
+      Value size = rewriter.create(loc, memrefToTensor, index);
+      dynamicOperands.push_back(size);
+    }
+    // TODO: Use alloc/memcpy callback from BufferizationOptions if called via
+    // BufferizableOpInterface impl of ToMemrefOp.
+    auto copy =
+        rewriter.create(loc, resultType, dynamicOperands);
+    rewriter.create(loc, memrefToTensor.memref(), copy);
+    rewriter.replaceOp(toMemref, {copy});
+  } else
+    rewriter.replaceOpWithNewOp(toMemref, toMemref.getType(),
+                                                memrefToTensor.memref());
+  return success();
+}
+
 /// Canonicalize bufferization.to_tensor + bufferization.to_memref to
 /// memref.cast when type mismatches prevent `ToMemrefOp::fold` to kick in.
 struct TensorLoadToMemref : public OpRewritePattern {
@@ -189,62 +262,10 @@ struct TensorLoadToMemref : public OpRewritePattern {
 
   LogicalResult matchAndRewrite(ToMemrefOp toMemref,
                                 PatternRewriter &rewriter) const final {
-    auto memrefToTensor = toMemref.tensor().getDefiningOp();
-    // Bail unless we have a memref_to_tensor + tensor_to_memref with different
-    // types. `ToMemrefOp::fold` handles the same type case.
-    if (!memrefToTensor ||
-        memrefToTensor.memref().getType() == toMemref.getType())
-      return failure();
-    // If types are definitely not cast-compatible, bail.
-    if (!memref::CastOp::areCastCompatible(memrefToTensor.memref().getType(),
-                                           toMemref.getType()))
-      return failure();
-
-    // We already know that the types are potentially cast-compatible. However
-    // in case the affine maps are different, we may need to use a copy if we go
-    // from dynamic to static offset or stride (the canonicalization cannot know
-    // at this point that it is really cast compatible).
-    auto isGuaranteedCastCompatible = [](MemRefType source, MemRefType target) {
-      int64_t sourceOffset, targetOffset;
-      SmallVector sourceStrides, targetStrides;
-      if (failed(getStridesAndOffset(source, sourceStrides, sourceOffset)) ||
-          failed(getStridesAndOffset(target, targetStrides, targetOffset)))
-        return false;
-      auto dynamicToStatic = [](int64_t a, int64_t b) {
-        return a == MemRefType::getDynamicStrideOrOffset() &&
-               b != MemRefType::getDynamicStrideOrOffset();
-      };
-      if (dynamicToStatic(sourceOffset, targetOffset))
-        return false;
-      for (auto it : zip(sourceStrides, targetStrides))
-        if (dynamicToStatic(std::get<0>(it), std::get<1>(it)))
-          return false;
-      return true;
-    };
-
-    auto memrefToTensorType =
-        memrefToTensor.memref().getType().dyn_cast();
-    auto toMemrefType = toMemref.getType().dyn_cast();
-    if (memrefToTensorType && toMemrefType &&
-        !isGuaranteedCastCompatible(memrefToTensorType, toMemrefType)) {
-      MemRefType resultType = toMemrefType;
-      auto loc = toMemref.getLoc();
-      SmallVector dynamicOperands;
-      for (int i = 0; i < resultType.getRank(); ++i) {
-        if (resultType.getShape()[i] != ShapedType::kDynamicSize)
-          continue;
-        auto index = rewriter.createOrFold(loc, i);
-        Value size = rewriter.create(loc, memrefToTensor, index);
-        dynamicOperands.push_back(size);
-      }
-      auto copy =
-          rewriter.create(loc, resultType, dynamicOperands);
-      rewriter.create(loc, memrefToTensor.memref(), copy);
-      rewriter.replaceOp(toMemref, {copy});
-    } else
-      rewriter.replaceOpWithNewOp(toMemref, toMemref.getType(),
-                                                  memrefToTensor.memref());
-    return success();
+    // Only handle cases where a cast is needed. The other case is handled by
+    // the folder.
+    return foldToMemrefToTensorPair(rewriter, toMemref,
+                                    /*allowSameType=*/false);
   }
 };
 
@@ -288,6 +309,12 @@ void ToMemrefOp::getCanonicalizationPatterns(RewritePatternSet &results,
       context);
 }
 
+LogicalResult ToMemrefOp::bufferize(RewriterBase &rewriter,
+                                    const BufferizationState &state) {
+  // Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
+  return foldToMemrefToTensorPair(rewriter, *this);
+}
+
 Optional CloneOp::buildDealloc(OpBuilder &builder, Value alloc) {
   return builder.create(alloc.getLoc(), alloc)
       .getOperation();
diff --git a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
index cdb6656f0f0ae..8ec23af66eac3 100644
--- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_mlir_dialect_library(MLIRBufferization
-  PARTIAL_SOURCES_INTENDED
   AllocationOpInterface.cpp
+  BufferizableOpInterface.cpp
   BufferizationOps.cpp
   BufferizationDialect.cpp
 
@@ -17,17 +17,3 @@ add_mlir_dialect_library(MLIRBufferization
   MLIRTensor
   MLIRMemRef
   )
-
-add_mlir_dialect_library(MLIRBufferizableOpInterface
-  PARTIAL_SOURCES_INTENDED
-  BufferizableOpInterface.cpp
-  BufferizationInterfaceImpl.cpp
-
-  DEPENDS
-  MLIRBufferizableOpInterfaceIncGen
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRBufferization
-  MLIRMemRef
-)
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
index b3f4fb38d003a..b212ef952a05e 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -10,7 +10,6 @@ add_mlir_dialect_library(MLIRBufferizationTransforms
   MLIRBufferizationPassIncGen
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
   MLIRBufferization
   MLIRControlFlowInterfaces
   MLIRInferTypeOpInterface
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
index 21e22de2eee24..8b64809e4b97c 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@@ -13,7 +13,7 @@ add_mlir_dialect_library(MLIRAffineBufferizableOpInterfaceImpl
 
   LINK_LIBS PUBLIC
   MLIRAffine
-  MLIRBufferizableOpInterface
+  MLIRBufferization
 )
 
 add_mlir_dialect_library(MLIRArithBufferizableOpInterfaceImpl
@@ -21,7 +21,7 @@ add_mlir_dialect_library(MLIRArithBufferizableOpInterfaceImpl
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRIR
   MLIRMemRef
   MLIRStandardOpsTransforms
@@ -31,7 +31,7 @@ add_mlir_dialect_library(MLIRLinalgBufferizableOpInterfaceImpl
   LinalgInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRLinalg
@@ -42,7 +42,7 @@ add_mlir_dialect_library(MLIRSCFBufferizableOpInterfaceImpl
   SCFInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRSCF
@@ -52,7 +52,7 @@ add_mlir_dialect_library(MLIRStdBufferizableOpInterfaceImpl
   StdInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRStandard
 )
 
@@ -60,7 +60,7 @@ add_mlir_dialect_library(MLIRVectorBufferizableOpInterfaceImpl
   VectorInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRIR
   MLIRVector
 )
@@ -69,7 +69,7 @@ add_mlir_dialect_library(MLIRModuleBufferization
   ModuleBufferization.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRMemRef
diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
index b88e57dcd13b9..0b9142f8d9e0f 100644
--- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
@@ -14,7 +14,7 @@ add_mlir_dialect_library(MLIRLinalg
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRDialectUtils
   MLIRInferTypeOpInterface
   MLIRIR
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index 6059d51260975..366600f6d33de 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -36,7 +36,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   MLIRAnalysis
   MLIRArithBufferizableOpInterfaceImpl
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRComplex
   MLIRInferTypeOpInterface
   MLIRIR
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 90335f952d559..12d43300aacf7 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -10,7 +10,6 @@
 
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
@@ -54,7 +53,6 @@ struct LinalgComprehensiveModuleBufferize
                 arith::ArithmeticDialect, StandardOpsDialect, AffineDialect>();
     affine_ext::registerBufferizableOpInterfaceExternalModels(registry);
     arith_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    bufferization_ext::registerBufferizableOpInterfaceExternalModels(registry);
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerModuleBufferizationExternalModels(registry);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
index 5c4ce30042d69..0c04e729e4cbe 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
@@ -12,7 +12,7 @@ add_mlir_dialect_library(MLIRSparseTensorTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRIR
   MLIRLLVMIR
   MLIRLinalg
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
index 98787344c582b..d36e556fd7723 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -10,7 +10,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRMemRef
diff --git a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
index c784461c2dc3d..b45786123f723 100644
--- a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
@@ -16,7 +16,7 @@ add_mlir_library(MLIRLinalgTestPasses
   MLIRAffineBufferizableOpInterfaceImpl
   MLIRArithBufferizableOpInterfaceImpl
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRGPUTransforms
   MLIRLinalg
diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
index 3e65330addb4d..a9b5ab206d42f 100644
--- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
@@ -14,7 +14,6 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
@@ -61,7 +60,6 @@ struct TestComprehensiveFunctionBufferize
                     arith::ArithmeticDialect, AffineDialect>();
     affine_ext::registerBufferizableOpInterfaceExternalModels(registry);
     arith_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    bufferization_ext::registerBufferizableOpInterfaceExternalModels(registry);
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerBufferizableOpInterfaceExternalModels(registry);
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 74d10d9190b5d..a3876c0b71f4f 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1958,7 +1958,6 @@ cc_library(
     deps = [
         ":Affine",
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":IR",
         ":LLVMDialect",
@@ -4423,7 +4422,6 @@ cc_library(
     deps = [
         ":ArithmeticDialect",
         ":Async",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6573,27 +6571,6 @@ gentbl_cc_library(
     ],
 )
 
-cc_library(
-    name = "BufferizableOpInterface",
-    srcs = [
-        "lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp",
-        "lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp",
-    ],
-    hdrs = [
-        "include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h",
-        "include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h",
-    ],
-    includes = ["include"],
-    deps = [
-        ":BufferizableOpInterfaceIncGen",
-        ":BufferizationDialect",
-        ":IR",
-        ":MemRefDialect",
-        ":Support",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
     name = "AffineBufferizableOpInterfaceImpl",
     srcs = [
@@ -6605,7 +6582,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":Affine",
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         "//llvm:Support",
     ],
 )
@@ -6621,7 +6598,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":IR",
         ":MemRefDialect",
         ":Support",
@@ -6640,7 +6617,6 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6660,7 +6636,6 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6680,7 +6655,7 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":IR",
         ":StandardOps",
         ":Support",
@@ -6698,7 +6673,7 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":IR",
         ":Support",
         ":VectorOps",
@@ -6827,7 +6802,7 @@ cc_library(
     deps = [
         ":Affine",
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":CopyOpInterface",
         ":DialectUtils",
         ":IR",
@@ -6909,7 +6884,6 @@ cc_library(
         ":Analysis",
         ":ArithBufferizableOpInterfaceImpl",
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":ComplexDialect",
@@ -6953,7 +6927,6 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":DialectUtils",
@@ -7968,19 +7941,27 @@ gentbl_cc_library(
     ],
     tblgen = ":mlir-tblgen",
     td_file = "include/mlir/Dialect/Bufferization/IR/BufferizationOps.td",
-    deps = [":BufferizationOpsTdFiles"],
+    deps = [
+        ":BufferizableOpInterfaceTdFiles",
+        ":BufferizationOpsTdFiles",
+    ],
 )
 
 cc_library(
     name = "BufferizationDialect",
     srcs = [
+        "lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp",
         "lib/Dialect/Bufferization/IR/BufferizationDialect.cpp",
         "lib/Dialect/Bufferization/IR/BufferizationOps.cpp",
     ],
-    hdrs = ["include/mlir/Dialect/Bufferization/IR/Bufferization.h"],
+    hdrs = [
+        "include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h",
+        "include/mlir/Dialect/Bufferization/IR/Bufferization.h",
+    ],
     includes = ["include"],
     deps = [
         ":AllocationOpInterface",
+        ":BufferizableOpInterfaceIncGen",
         ":BufferizationBaseIncGen",
         ":BufferizationOpsIncGen",
         ":ControlFlowInterfaces",
@@ -7989,6 +7970,7 @@ cc_library(
         ":InferTypeOpInterface",
         ":MemRefDialect",
         ":StandardOps",
+        ":Support",
         ":TensorDialect",
         ":ViewLikeInterface",
         "//llvm:Support",
@@ -8025,7 +8007,6 @@ cc_library(
     deps = [
         ":AllocationOpInterface",
         ":Analysis",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationPassIncGen",
         ":ControlFlowInterfaces",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 8c24a43000c78..7bc92df875c46 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -388,7 +388,6 @@ cc_library(
         "//mlir:AffineBufferizableOpInterfaceImpl",
         "//mlir:ArithBufferizableOpInterfaceImpl",
         "//mlir:ArithmeticDialect",
-        "//mlir:BufferizableOpInterface",
         "//mlir:BufferizationDialect",
         "//mlir:BufferizationTransforms",
         "//mlir:GPUDialect",

From a43ed49f5b163b2926641729a30a5c17c2116a08 Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Mon, 24 Jan 2022 08:23:58 -0800
Subject: [PATCH 406/946] [DAGCombiner][RISCV] Canonicalize
 (bswap(bitreverse(x))->bitreverse(bswap(x)).

If the bitreverse gets expanded, it will introduce a new bswap. By
putting a bswap before the bitreverse, we can ensure it gets cancelled
out when this happens.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D118012
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  11 +
 .../RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll | 384 +++++-------------
 llvm/test/CodeGen/RISCV/rv32zbp.ll            | 150 ++-----
 llvm/test/CodeGen/RISCV/rv64zbp.ll            | 130 ++----
 4 files changed, 196 insertions(+), 479 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bf4409e77a916..77a6e7bba3660 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9375,6 +9375,17 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
   // fold (bswap (bswap x)) -> x
   if (N0.getOpcode() == ISD::BSWAP)
     return N0->getOperand(0);
+
+  // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
+  // isn't supported, it will be expanded to bswap followed by a manual reversal
+  // of bits in each byte. By placing bswaps before bitreverse, we can remove
+  // the two bswaps if the bitreverse gets expanded.
+  if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
+    SDLoc DL(N);
+    SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+    return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
index 3913427d5ccd0..f2f0494a6fb88 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
@@ -1039,10 +1039,6 @@ define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
 define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a1, a0, 8
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srli a0, a0, 24
-; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    lui a2, 1
 ; RV32I-NEXT:    addi a2, a2, -241
@@ -1064,17 +1060,10 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    slli a0, a0, 8
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bitreverse_bswap_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a0, 8
-; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srli a0, a0, 56
-; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    lui a2, 1
 ; RV64I-NEXT:    addiw a2, a2, -241
@@ -1096,21 +1085,17 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slli a0, a0, 1
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 8
-; RV64I-NEXT:    slli a0, a0, 8
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bitreverse_bswap_i16:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a1, a0, 12
-; RV32ZBB-NEXT:    lui a2, 15
-; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 1
+; RV32ZBB-NEXT:    addi a2, a2, -241
 ; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    srli a0, a0, 20
-; RV32ZBB-NEXT:    andi a0, a0, -241
-; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
 ; RV32ZBB-NEXT:    srli a1, a0, 2
 ; RV32ZBB-NEXT:    lui a2, 3
 ; RV32ZBB-NEXT:    addi a2, a2, 819
@@ -1125,20 +1110,17 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV32ZBB-NEXT:    and a0, a0, a2
 ; RV32ZBB-NEXT:    slli a0, a0, 1
 ; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a0, a0, 16
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: test_bitreverse_bswap_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 44
-; RV64ZBB-NEXT:    lui a2, 15
-; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 1
+; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 52
-; RV64ZBB-NEXT:    andi a0, a0, -241
-; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 3
 ; RV64ZBB-NEXT:    addiw a2, a2, 819
@@ -1153,8 +1135,6 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV64ZBB-NEXT:    and a0, a0, a2
 ; RV64ZBB-NEXT:    slli a0, a0, 1
 ; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 48
 ; RV64ZBB-NEXT:    ret
   %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
   %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp)
@@ -1164,99 +1144,56 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a3, a0, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    slli a3, a0, 8
-; RV32I-NEXT:    lui a4, 4080
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bitreverse_bswap_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 8
-; RV64I-NEXT:    lui a4, 4080
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    lui a3, 61681
-; RV64I-NEXT:    addiw a3, a3, -241
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 4
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    lui a3, 209715
-; RV64I-NEXT:    addiw a3, a3, 819
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 2
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    lui a3, 349525
-; RV64I-NEXT:    addiw a3, a3, 1365
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 1
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a2, a0, 24
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bitreverse_bswap_i32:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a0, a0
 ; RV32ZBB-NEXT:    srli a1, a0, 4
 ; RV32ZBB-NEXT:    lui a2, 61681
 ; RV32ZBB-NEXT:    addi a2, a2, -241
@@ -1278,21 +1215,16 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV32ZBB-NEXT:    and a0, a0, a2
 ; RV32ZBB-NEXT:    slli a0, a0, 1
 ; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    rev8 a0, a0
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: test_bitreverse_bswap_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    srli a1, a0, 4
 ; RV64ZBB-NEXT:    lui a2, 61681
 ; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 28
-; RV64ZBB-NEXT:    lui a2, 986895
-; RV64ZBB-NEXT:    addiw a2, a2, 240
 ; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    slliw a0, a0, 4
 ; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 209715
@@ -1306,10 +1238,8 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV64ZBB-NEXT:    addiw a2, a2, 1365
 ; RV64ZBB-NEXT:    and a1, a1, a2
 ; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    slliw a0, a0, 1
 ; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 32
 ; RV64ZBB-NEXT:    ret
   %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
   %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp)
@@ -1319,206 +1249,113 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a3, a1, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a3, a3, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a4, a3, a4
-; RV32I-NEXT:    slli a5, a1, 8
-; RV32I-NEXT:    lui a3, 4080
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a5
-; RV32I-NEXT:    or a1, a1, a4
-; RV32I-NEXT:    srli a4, a1, 4
-; RV32I-NEXT:    lui a5, 61681
-; RV32I-NEXT:    addi a5, a5, -241
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a1, a1, a5
-; RV32I-NEXT:    slli a1, a1, 4
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 2
-; RV32I-NEXT:    lui a6, 209715
-; RV32I-NEXT:    addi a6, a6, 819
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    slli a1, a1, 2
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 1
-; RV32I-NEXT:    lui a7, 349525
-; RV32I-NEXT:    addi a7, a7, 1365
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a1, a1, a7
-; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli t0, a0, 24
-; RV32I-NEXT:    or a4, a4, t0
-; RV32I-NEXT:    slli t0, a0, 8
-; RV32I-NEXT:    and t0, t0, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, t0
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a0, 4
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a0, a0, a5
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a0, a0, a3
 ; RV32I-NEXT:    slli a0, a0, 4
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 2
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a0, a0, a4
 ; RV32I-NEXT:    slli a0, a0, 2
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 1
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    lui a5, 349525
+; RV32I-NEXT:    addi a5, a5, 1365
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli a5, a0, 24
-; RV32I-NEXT:    or a4, a4, a5
-; RV32I-NEXT:    slli a5, a0, 8
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a5
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a1, 8
-; RV32I-NEXT:    and a2, a4, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a1, 8
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a1, 4
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 2
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 1
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a1, a1, a5
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bitreverse_bswap_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a1, a0, 24
-; RV64I-NEXT:    lui a2, 4080
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    li a4, 255
-; RV64I-NEXT:    slli a5, a4, 24
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a1, a3, a1
-; RV64I-NEXT:    srli a3, a0, 40
-; RV64I-NEXT:    lui a6, 16
-; RV64I-NEXT:    addiw a6, a6, -256
-; RV64I-NEXT:    and a3, a3, a6
-; RV64I-NEXT:    srli a7, a0, 56
-; RV64I-NEXT:    or a3, a3, a7
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    slli a7, a4, 40
-; RV64I-NEXT:    and a3, a3, a7
-; RV64I-NEXT:    srliw t0, a0, 24
-; RV64I-NEXT:    slli t0, t0, 32
-; RV64I-NEXT:    or a3, a3, t0
-; RV64I-NEXT:    slli t0, a0, 40
-; RV64I-NEXT:    slli a4, a4, 48
-; RV64I-NEXT:    and t0, t0, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, t0
-; RV64I-NEXT:    lui t0, %hi(.LCPI12_0)
-; RV64I-NEXT:    ld t0, %lo(.LCPI12_0)(t0)
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    and a1, a1, t0
-; RV64I-NEXT:    and a0, a0, t0
-; RV64I-NEXT:    lui a3, %hi(.LCPI12_1)
-; RV64I-NEXT:    ld a3, %lo(.LCPI12_1)(a3)
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_1)
+; RV64I-NEXT:    ld a1, %lo(.LCPI12_1)(a1)
 ; RV64I-NEXT:    slli a0, a0, 4
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    lui a3, %hi(.LCPI12_2)
-; RV64I-NEXT:    ld a3, %lo(.LCPI12_2)(a3)
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_2)
+; RV64I-NEXT:    ld a1, %lo(.LCPI12_2)(a1)
 ; RV64I-NEXT:    slli a0, a0, 2
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 40
-; RV64I-NEXT:    and a1, a1, a6
-; RV64I-NEXT:    srli a3, a0, 56
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    srli a3, a0, 24
-; RV64I-NEXT:    and a2, a3, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    or a1, a2, a1
-; RV64I-NEXT:    slli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a7
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    slli a3, a3, 32
-; RV64I-NEXT:    or a2, a2, a3
-; RV64I-NEXT:    slli a3, a0, 40
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bitreverse_bswap_i64:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a1, a1
-; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    srli a2, a0, 4
 ; RV32ZBB-NEXT:    lui a3, 61681
 ; RV32ZBB-NEXT:    addi a3, a3, -241
 ; RV32ZBB-NEXT:    and a2, a2, a3
-; RV32ZBB-NEXT:    and a1, a1, a3
-; RV32ZBB-NEXT:    slli a1, a1, 4
-; RV32ZBB-NEXT:    or a1, a2, a1
-; RV32ZBB-NEXT:    srli a2, a1, 2
-; RV32ZBB-NEXT:    lui a4, 209715
-; RV32ZBB-NEXT:    addi a4, a4, 819
-; RV32ZBB-NEXT:    and a2, a2, a4
-; RV32ZBB-NEXT:    and a1, a1, a4
-; RV32ZBB-NEXT:    slli a1, a1, 2
-; RV32ZBB-NEXT:    or a1, a2, a1
-; RV32ZBB-NEXT:    srli a2, a1, 1
-; RV32ZBB-NEXT:    lui a5, 349525
-; RV32ZBB-NEXT:    addi a5, a5, 1365
-; RV32ZBB-NEXT:    and a2, a2, a5
-; RV32ZBB-NEXT:    and a1, a1, a5
-; RV32ZBB-NEXT:    slli a1, a1, 1
-; RV32ZBB-NEXT:    or a1, a2, a1
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a2, a0, 4
-; RV32ZBB-NEXT:    and a2, a2, a3
 ; RV32ZBB-NEXT:    and a0, a0, a3
 ; RV32ZBB-NEXT:    slli a0, a0, 4
 ; RV32ZBB-NEXT:    or a0, a2, a0
 ; RV32ZBB-NEXT:    srli a2, a0, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
 ; RV32ZBB-NEXT:    and a2, a2, a4
 ; RV32ZBB-NEXT:    and a0, a0, a4
 ; RV32ZBB-NEXT:    slli a0, a0, 2
 ; RV32ZBB-NEXT:    or a0, a2, a0
 ; RV32ZBB-NEXT:    srli a2, a0, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
 ; RV32ZBB-NEXT:    and a2, a2, a5
 ; RV32ZBB-NEXT:    and a0, a0, a5
 ; RV32ZBB-NEXT:    slli a0, a0, 1
 ; RV32ZBB-NEXT:    or a0, a2, a0
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    rev8 a1, a1
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a1, a2, a1
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: test_bitreverse_bswap_i64:
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_0)
 ; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
-; RV64ZBB-NEXT:    rev8 a0, a0
 ; RV64ZBB-NEXT:    srli a2, a0, 4
 ; RV64ZBB-NEXT:    and a2, a2, a1
 ; RV64ZBB-NEXT:    and a0, a0, a1
@@ -1538,7 +1375,6 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
 ; RV64ZBB-NEXT:    and a0, a0, a1
 ; RV64ZBB-NEXT:    slli a0, a0, 1
 ; RV64ZBB-NEXT:    or a0, a2, a0
-; RV64ZBB-NEXT:    rev8 a0, a0
 ; RV64ZBB-NEXT:    ret
   %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
   %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp)
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 1df371c407d77..c51151f47962f 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -2404,48 +2404,27 @@ define i32 @bswap_rotl_i32(i32 %a) {
 define i32 @bitreverse_bswap_i32(i32 %a) {
 ; RV32I-LABEL: bitreverse_bswap_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a3, a0, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    slli a3, a0, 8
-; RV32I-NEXT:    lui a4, 4080
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i32:
@@ -2460,81 +2439,42 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
 define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV32I-LABEL: bitreverse_bswap_i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a3, a1, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a3, a3, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a4, a3, a4
-; RV32I-NEXT:    slli a5, a1, 8
-; RV32I-NEXT:    lui a3, 4080
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a5
-; RV32I-NEXT:    or a1, a1, a4
-; RV32I-NEXT:    srli a4, a1, 4
-; RV32I-NEXT:    lui a5, 61681
-; RV32I-NEXT:    addi a5, a5, -241
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a1, a1, a5
-; RV32I-NEXT:    slli a1, a1, 4
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 2
-; RV32I-NEXT:    lui a6, 209715
-; RV32I-NEXT:    addi a6, a6, 819
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    slli a1, a1, 2
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 1
-; RV32I-NEXT:    lui a7, 349525
-; RV32I-NEXT:    addi a7, a7, 1365
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a1, a1, a7
-; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli t0, a0, 24
-; RV32I-NEXT:    or a4, a4, t0
-; RV32I-NEXT:    slli t0, a0, 8
-; RV32I-NEXT:    and t0, t0, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, t0
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a0, 4
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a0, a0, a5
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a0, a0, a3
 ; RV32I-NEXT:    slli a0, a0, 4
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 2
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a0, a0, a4
 ; RV32I-NEXT:    slli a0, a0, 2
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 1
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    lui a5, 349525
+; RV32I-NEXT:    addi a5, a5, 1365
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli a5, a0, 24
-; RV32I-NEXT:    or a4, a4, a5
-; RV32I-NEXT:    slli a5, a0, 8
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a5
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a1, 8
-; RV32I-NEXT:    and a2, a4, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a1, 8
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a1, 4
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 2
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 1
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a1, a1, a5
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i64:
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 260892285643a..b68a98b13052b 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -2351,48 +2351,27 @@ define i32 @bswap_rotl_i32(i32 %a) {
 define i32 @bitreverse_bswap_i32(i32 %a) {
 ; RV64I-LABEL: bitreverse_bswap_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 8
-; RV64I-NEXT:    lui a4, 4080
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    lui a3, 61681
-; RV64I-NEXT:    addiw a3, a3, -241
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 4
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    lui a3, 209715
-; RV64I-NEXT:    addiw a3, a3, 819
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 2
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    lui a3, 349525
-; RV64I-NEXT:    addiw a3, a3, 1365
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 1
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a2, a0, 24
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBP-LABEL: bitreverse_bswap_i32:
@@ -2407,76 +2386,27 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
 define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV64I-LABEL: bitreverse_bswap_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a1, a0, 24
-; RV64I-NEXT:    lui a2, 4080
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    li a4, 255
-; RV64I-NEXT:    slli a5, a4, 24
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a1, a3, a1
-; RV64I-NEXT:    srli a3, a0, 40
-; RV64I-NEXT:    lui a6, 16
-; RV64I-NEXT:    addiw a6, a6, -256
-; RV64I-NEXT:    and a3, a3, a6
-; RV64I-NEXT:    srli a7, a0, 56
-; RV64I-NEXT:    or a3, a3, a7
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    slli a7, a4, 40
-; RV64I-NEXT:    and a3, a3, a7
-; RV64I-NEXT:    srliw t0, a0, 24
-; RV64I-NEXT:    slli t0, t0, 32
-; RV64I-NEXT:    or a3, a3, t0
-; RV64I-NEXT:    slli t0, a0, 40
-; RV64I-NEXT:    slli a4, a4, 48
-; RV64I-NEXT:    and t0, t0, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, t0
-; RV64I-NEXT:    lui t0, %hi(.LCPI68_0)
-; RV64I-NEXT:    ld t0, %lo(.LCPI68_0)(t0)
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    and a1, a1, t0
-; RV64I-NEXT:    and a0, a0, t0
-; RV64I-NEXT:    lui a3, %hi(.LCPI68_1)
-; RV64I-NEXT:    ld a3, %lo(.LCPI68_1)(a3)
+; RV64I-NEXT:    lui a1, %hi(.LCPI68_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI68_0)(a1)
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI68_1)
+; RV64I-NEXT:    ld a1, %lo(.LCPI68_1)(a1)
 ; RV64I-NEXT:    slli a0, a0, 4
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    lui a3, %hi(.LCPI68_2)
-; RV64I-NEXT:    ld a3, %lo(.LCPI68_2)(a3)
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI68_2)
+; RV64I-NEXT:    ld a1, %lo(.LCPI68_2)(a1)
 ; RV64I-NEXT:    slli a0, a0, 2
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 40
-; RV64I-NEXT:    and a1, a1, a6
-; RV64I-NEXT:    srli a3, a0, 56
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    srli a3, a0, 24
-; RV64I-NEXT:    and a2, a3, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    or a1, a2, a1
-; RV64I-NEXT:    slli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a7
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    slli a3, a3, 32
-; RV64I-NEXT:    or a2, a2, a3
-; RV64I-NEXT:    slli a3, a0, 40
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBP-LABEL: bitreverse_bswap_i64:

From e494278ceeb70d2973392a349b3ab105da488b13 Mon Sep 17 00:00:00 2001
From: gysit 
Date: Mon, 24 Jan 2022 16:25:42 +0000
Subject: [PATCH 407/946] [mlir][linalg] Add transpose support to hoist
 padding.

Add a transpose option to hoist padding to transpose the padded tensor before storing it into the packed tensor. The early transpose improves the memory access patterns of the actual compute kernel. The patch introduces a transpose right after the hoisted pad tensor and a second transpose inside the compute loop. The second transpose can either be fused into the compute operation or will canonicalize away when lowering to vector instructions.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D117893
---
 .../Dialect/Linalg/Transforms/HoistPadding.h  |  18 +-
 .../Dialect/Linalg/Transforms/Transforms.h    |  36 ++--
 .../include/mlir/Dialect/Linalg/Utils/Utils.h |  10 +-
 .../Linalg/Transforms/HoistPadding.cpp        | 165 +++++++++++-------
 .../Dialect/Linalg/Transforms/Transforms.cpp  |  19 +-
 mlir/lib/Dialect/Linalg/Utils/Utils.cpp       |  78 ++++++---
 mlir/test/Dialect/Linalg/hoist-padding.mlir   |  85 +++++++--
 .../Linalg/TestLinalgCodegenStrategy.cpp      |  23 +++
 8 files changed, 316 insertions(+), 118 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
index 8d3315d5ea971..795dd00cbdfd6 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
@@ -19,12 +19,17 @@ class PadOp;
 } // namespace tensor
 
 namespace linalg {
+class GenericOp;
 
 /// Mechanically hoist padding operations on tensors by `numLoops` into a new,
 /// generally larger tensor. This achieves packing of multiple padding ops into
-/// a larger tensor. On success, `padTensorOp` is replaced by the cloned version
+/// a larger tensor. On success, `opToHoist` is replaced by the cloned version
 /// in the packing loop so the caller can continue reasoning about the padding
-/// operation.
+/// operation. If `transposeVector` is non-empty, hoist padding introduces a
+/// GenericOp to transpose the padded tensor before inserting it into the packed
+/// tensor. A `transposeVector` can change the storage order of the padded
+/// tensor but does not change the order of the pack or compute loops.
+///
 ///
 /// Example in pseudo-mlir:
 /// =======================
@@ -33,7 +38,7 @@ namespace linalg {
 /// ```
 ///    scf.for (%i, %j, %k)
 ///      %st0 = tensor.extract_slice f(%i, %k) : ... to tensor
-///      %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
+///      %0 = tensor.pad %st0 low[0, 0] high[...] {
 ///      ^bb0( ... ):
 ///        linalg.yield %pad
 ///      } : tensor to tensor<4x8xf32>
@@ -47,7 +52,7 @@ namespace linalg {
 ///      %packed_init = linalg.init_tensor range(%j) : tensor
 ///      %packed = scf.for (%k) iter_args(%p : %packed_init) {
 ///        %st0 = tensor.extract_slice f(%i, %k) : ... to tensor
-///        %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
+///        %0 = tensor.pad %st0 low[0, 0] high[...] {
 ///        ^bb0( ... ):
 ///          linalg.yield %pad
 ///        } : tensor to tensor<4x8xf32>
@@ -62,8 +67,9 @@ namespace linalg {
 ///      }
 ///    }
 /// ```
-FailureOr hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops,
-                                       tensor::PadOp &hoistedOp);
+FailureOr hoistPaddingOnTensors(
+    tensor::PadOp opToHoist, int numLoops, ArrayRef transposeVector,
+    tensor::PadOp &hoistedOp, SmallVectorImpl &transposeOps);
 
 } // namespace linalg
 } // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index f5e99d5afe83e..61156c733594d 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -484,14 +484,19 @@ using TileSizeComputationFunction =
 using PaddingValueComputationFunction =
     std::function(OpBuilder &, OpOperand &)>;
 
-/// Callback returning true if the pad tensor operation defining the given
-/// OpOperand shall be marked as nofold to enable packing.
+/// Callback returning true if the PadOp defining the given OpOperand shall be
+/// marked as nofold to enable packing.
 using PaddingNoFoldComputationFunction = std::function;
 
-/// Callback returning the number of loops to hoist the pad tensor operation
-/// defining the given OpOperand.
+/// Callback returning the number of loops to hoist the PadOp defining the given
+/// OpOperand.
 using PaddingHoistComputationFunction = std::function;
 
+/// Callback returning the transpose vector used to permute the result tensor
+/// dimensions of the PadOp defining the given OpOperand.
+using PaddingTransposeComputationFunction =
+    std::function(OpOperand &)>;
+
 struct LinalgPaddingOptions {
   /// Callback returning the padding value to use for a given OpOperand or
   /// failure for no padding. Padding operations are introduced if
@@ -506,10 +511,10 @@ struct LinalgPaddingOptions {
     return *this;
   }
 
-  /// Callback returning true if the pad tensor operation defining the given
-  /// OpOperand shall be marked as nofold to enable packing. A padding operation
-  /// is only marked nofold if `paddingNoFoldComputationFunction` is set and
-  /// returns true. Otherwise, the nofold attribute is set to false.
+  /// Callback returning true if the PadOp defining the given OpOperand shall be
+  /// marked as nofold to enable packing. A padding operation is only marked
+  /// nofold if `paddingNoFoldComputationFunction` is set and returns true.
+  /// Otherwise, the nofold attribute is set to false.
   PaddingNoFoldComputationFunction paddingNoFoldComputationFunction = nullptr;
 
   LinalgPaddingOptions &
@@ -518,8 +523,8 @@ struct LinalgPaddingOptions {
     return *this;
   }
 
-  /// Callback returning the number of loops to hoist the pad tensor operation
-  /// defining the given OpOperand.
+  /// Callback returning the number of loops to hoist the PadOp defining the
+  /// given OpOperand.
   PaddingHoistComputationFunction paddingHoistComputationFunction = nullptr;
 
   LinalgPaddingOptions &
@@ -527,6 +532,17 @@ struct LinalgPaddingOptions {
     paddingHoistComputationFunction = std::move(fun);
     return *this;
   }
+
+  /// Callback returning the transpose vector used to permute the result tensor
+  /// dimensions of the PadOp defining the given OpOperand.
+  PaddingTransposeComputationFunction paddingTransposeComputationFunction =
+      nullptr;
+
+  LinalgPaddingOptions &setPaddingTransposeComputationFunction(
+      PaddingTransposeComputationFunction fun) {
+    paddingTransposeComputationFunction = std::move(fun);
+    return *this;
+  }
 };
 
 struct LinalgTilingAndFusionOptions {
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index b466d7726f502..a2f08e79ac47c 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -117,18 +117,24 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp(
 /// Example:
 /// ```
 /// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1]
-/// %1 = linalg.pad_tensor %0 low[0, 0] high[...] { linalg.yield %cst }
+/// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst }
 /// %2 = linalg.matmul ins(...) outs(%1)
 /// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1]
 /// ```
 /// makeComposedPadHighOp(source=%3, pad=%cst) returns %2
 /// makeComposedPadHighOp(source=%3, pad=%other_cst) returns %4
 /// ```
-/// %4 = linalg.pad_tensor %3 low[0, 0] high[...] { linalg.yield %other_cst }
+/// %4 = tensor.pad %3 low[0, 0] high[...] { tensor.yield %other_cst }
 /// ```
 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
                             Value source, Value pad, bool nofold);
 
+/// Returns a GenericOp that tansposes `inputTensor` into `outputTensor` using
+/// `transposeVector` to permute the `inputTensor` dimensions.
+GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
+                          Value outputTensor,
+                          ArrayRef transposeVector);
+
 //===----------------------------------------------------------------------===//
 // Fusion / Tiling utilities
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index 21c92ee304dfa..83c9aa4a54a01 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -53,32 +53,32 @@ using namespace mlir::linalg;
 ///   8. There is no enclosing scf::ForOp that indexes the padded data.
 /// Other cases succeed and will trigger hoisting of the pad op.
 struct HoistingAnalysis {
-  HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops);
+  HoistingAnalysis(tensor::PadOp padOp, int numLoops);
 
   bool isValid() { return valid; }
 
   /// Footprint of the packedTensor, computed from the packingLoops.
   SmallVector getPackedTensorSizes(ImplicitLocOpBuilder &b);
 
-  /// The outermost loop, determined by `nLevels` above which `padTensorOp` will
+  /// The outermost loop, determined by `nLevels` above which `padOp` will
   /// be hoisted.
   scf::ForOp outermostEnclosingForOp;
 
-  /// Backward slice rooted at `padTensorOp` and nested under
+  /// Backward slice rooted at `padOp` and nested under
   /// `outermostEnclosingForOp`.
   SetVector backwardSlice;
 
-  /// The scf::ForOp immediately enclosing `padTensorOp` such that:
+  /// The scf::ForOp immediately enclosing `padOp` such that:
   ///  1. they are nested under `outermostEnclosingForOp` (inclusive)
   ///  2. whose induction variable is used, directly or indirectly, in the
-  ///     computation of `padTensorOp`.
+  ///     computation of `padOp`.
   /// The span of these loops determines the footprint of the packed tensor.
   SmallVector packingLoops;
 
 private:
-  /// Drop any non-index dependencies of `padTensorOp` and `sliceOp` from
+  /// Drop any non-index dependencies of `padOp` and `sliceOp` from
   /// `backwardSlice`. The method follows the use-def chains of the index
-  /// operands consumed by `padTensorOp` and `sliceOp` and drops the operations
+  /// operands consumed by `padOp` and `sliceOp` and drops the operations
   /// not part of this index computation. Afterwards, the filtered
   /// `backwardSlice` contains only the loops whose induction variable is used,
   /// directly or indirectly, to index the padded tensor. The method returns
@@ -94,24 +94,24 @@ struct HoistingAnalysis {
   ///       %ubi = affine.min #map(%i)
   ///       %ubj = affine.min #map(%j)
   ///       %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj]
-  ///       %padded_slice = linalg.pad_tensor %slice
+  ///       %padded_slice = tensor.pad %slice
   /// ```
   /// dropNonIndexDependencies(%padded_slice, %slice)
   /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice.
-  LogicalResult dropNonIndexDependencies(tensor::PadOp padTensorOp,
+  LogicalResult dropNonIndexDependencies(tensor::PadOp padOp,
                                          tensor::ExtractSliceOp sliceOp);
 
   /// Encodes whether the analysis is valid and hoisting can proceed.
   bool valid;
 };
 
-/// Return true if all uses of `padTensorOp` are an input tensor of some
+/// Return true if all uses of `padOp` are an input tensor of some
 /// LinalgOp.
-static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) {
-  for (OpOperand &use : padTensorOp.result().getUses()) {
+static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padOp) {
+  for (OpOperand &use : padOp.result().getUses()) {
     auto linalgUser = dyn_cast(use.getOwner());
     if (!linalgUser || !linalgUser.isInputTensor(&use)) {
-      LLVM_DEBUG(DBGS() << "Found a use of " << *(padTensorOp)
+      LLVM_DEBUG(DBGS() << "Found a use of " << *(padOp)
                         << "\nthat is not an input tensor of a LinalgOp, "
                         << "cannot hoist\n"
                         << *(use.getOwner()) << "\n");
@@ -126,12 +126,12 @@ static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) {
 /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm.
 /// Control-flow and other containing ops with regions are not modeled atm.
 static void
-getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels,
+getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels,
                          SmallVector &reverseEnclosingLoops) {
-  AsmState state(padTensorOp->getParentOfType());
+  AsmState state(padOp->getParentOfType());
   (void)state;
   scf::ForOp outermostEnclosingForOp = nullptr;
-  Operation *nextEnclosingOp = padTensorOp->getParentOp();
+  Operation *nextEnclosingOp = padOp->getParentOp();
   while (nLevels-- > 0 &&
          (outermostEnclosingForOp = dyn_cast(nextEnclosingOp))) {
     LLVM_DEBUG(
@@ -143,17 +143,38 @@ getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels,
   }
 }
 
-HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
+/// Returns the transposed `rankedTensorType` if `transposeVector` is non-empty.
+/// Fail if `transposeVector` is no permutation matching the tensor rank.
+static FailureOr
+computeTransposedType(RankedTensorType rankedTensorType,
+                      ArrayRef transposeVector) {
+  if (transposeVector.empty())
+    return rankedTensorType;
+  if (!isPermutation(transposeVector) ||
+      transposeVector.size() != static_cast(rankedTensorType.getRank()))
+    return failure();
+
+  SmallVector transposedShape(rankedTensorType.getShape().begin(),
+                                       rankedTensorType.getShape().end());
+  applyPermutationToVector(transposedShape, transposeVector);
+
+  using RTTBuilder = RankedTensorType::Builder;
+  RankedTensorType transposedTensorType =
+      RTTBuilder(rankedTensorType).setShape(transposedShape);
+  return transposedTensorType;
+}
+
+HoistingAnalysis::HoistingAnalysis(tensor::PadOp padOp, int numLoops) {
   valid = false;
 
-  // Bail on any use that isn't an input of a Linalg op.
+  // Bail on any use that isn't an input of a LinalgOp.
   // Hoisting of inplace updates happens after vectorization.
-  if (!isOnlyUsedAsInputOfLinalgOp(padTensorOp))
+  if (!isOnlyUsedAsInputOfLinalgOp(padOp))
     return;
 
   // Get at most `numLoops` of immediately enclosing loops.
   SmallVector reverseEnclosingLoops;
-  getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops);
+  getAtMostNEnclosingLoops(padOp, numLoops, reverseEnclosingLoops);
   if (reverseEnclosingLoops.empty()) {
     LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n");
     return;
@@ -161,7 +182,7 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
 
   outermostEnclosingForOp = reverseEnclosingLoops.back();
 
-  // Get the `sliceOp` that defines the source tensor of `padTensorOp` and
+  // Get the `sliceOp` that defines the source tensor of `padOp` and
   // check its source is defined outside of the outermost loop. This check
   // ensures the padded data is available for packing before entering the
   // outermost enclosing loop.
@@ -174,9 +195,9 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
   //   scf.for %j
   //     scf.for %k
   //       %slice = tensor.extract_slice %source [%i, %j]
-  //       %padded_slice = linalg.pad_tensor %slice
+  //       %padded_slice = tensor.pad %slice
   // ```
-  auto sliceOp = padTensorOp.source().getDefiningOp();
+  auto sliceOp = padOp.source().getDefiningOp();
   if (!sliceOp) {
     LLVM_DEBUG(DBGS() << "Cannot find the extract slice op -> skip\n");
     return;
@@ -186,32 +207,31 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
     return;
   }
 
-  // Check the region of `padTensorOp` depends on a constant only. Adding
+  // Check the region of `padOp` depends on a constant only. Adding
   // hoisting support for arbitrary padding regions would require cloning all
   // dependencies captured by the padding region.
-  Value paddingValue = padTensorOp.getConstantPaddingValue();
+  Value paddingValue = padOp.getConstantPaddingValue();
   if (!paddingValue ||
       !isa_and_nonnull(paddingValue.getDefiningOp())) {
     LLVM_DEBUG(DBGS() << "Cannot find constant padding value -> skip\n");
     return;
   }
 
-  // Get all the ops in the backwards slice starting from `padTensorOp` and that
+  // Get all the ops in the backwards slice starting from `padOp` and that
   // are dominated by the outermost enclosing loop.
   DominanceInfo domInfo(outermostEnclosingForOp);
-  getBackwardSlice(padTensorOp.getOperation(), &backwardSlice,
-                   [&](Operation *op) {
-                     return domInfo.dominates(outermostEnclosingForOp, op);
-                   });
+  getBackwardSlice(padOp.getOperation(), &backwardSlice, [&](Operation *op) {
+    return domInfo.dominates(outermostEnclosingForOp, op);
+  });
   if (backwardSlice.empty())
     return;
-  // Add `padTensorOp` itself to the backward slice.
-  backwardSlice.insert(padTensorOp.getOperation());
+  // Add `padOp` itself to the backward slice.
+  backwardSlice.insert(padOp.getOperation());
 
   // Remove all ops in the backward slice that are not used to index the padded
-  // tensor. In particular, keep `padTensorOp`, `sliceOp`, and the loop and
+  // tensor. In particular, keep `padOp`, `sliceOp`, and the loop and
   // affine operations used for the index computation.
-  if (failed(dropNonIndexDependencies(padTensorOp, sliceOp)))
+  if (failed(dropNonIndexDependencies(padOp, sliceOp)))
     return;
 
   // Add only the loops part of the filtered `backwardSlice` to the packing
@@ -232,7 +252,7 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
 }
 
 LogicalResult
-HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
+HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padOp,
                                            tensor::ExtractSliceOp sliceOp) {
   // Set of all values used for index computation.
   SetVector indexEdges;
@@ -252,7 +272,7 @@ HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
     });
   };
 
-  // Starting from `padTensorOp` and `sliceOp` walk the use-def edges of index
+  // Starting from `padOp` and `sliceOp` walk the use-def edges of index
   // type in `backwardSlice`. Add the index operands of an operation to
   // `indexEdges` and remove all operations from `backwardSlice` that are not
   // part of the index computation.
@@ -267,16 +287,16 @@ HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
   //       %ubi = affine.min #map(%i)
   //       %ubj = affine.min #map(%j)
   //       %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj]
-  //       %padded_slice = linalg.pad_tensor %slice
+  //       %padded_slice = tensor.pad %slice
   // ```
   // After iterating `backwardSlice` we obtain:
   // indexEdges = [%i, %j, %ubi, %ubj]
   // backwardSlice = backwardSlice / [linalg.fill(%cst, %arg1), scf.for %k]
   SetVector operationsToRemove;
   for (Operation *op : llvm::reverse(backwardSlice)) {
-    // Add the index operands of `padTensorOp` and `sliceOp` to start the
+    // Add the index operands of `padOp` and `sliceOp` to start the
     // exploration of the index computation.
-    if (op == padTensorOp || op == sliceOp) {
+    if (op == padOp || op == sliceOp) {
       addIndexOperandsToIndexEdges(op);
       continue;
     }
@@ -310,7 +330,7 @@ HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
       continue;
     }
     // Remove all other operations not used by the index computation. An
-    // exception are constant operations that may be used by `padTensorOp`.
+    // exception are constant operations that may be used by `padOp`.
     if (!isa(op))
       operationsToRemove.insert(op);
   }
@@ -373,9 +393,9 @@ static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer,
                                        ValueRange{ivVal, lbVal, stepVal});
 }
 
-FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
-                                                     int numLoops,
-                                                     tensor::PadOp &hoistedOp) {
+FailureOr mlir::linalg::hoistPaddingOnTensors(
+    tensor::PadOp opToHoist, int numLoops, ArrayRef transposeVector,
+    tensor::PadOp &hoistedOp, SmallVectorImpl &transposeOps) {
   LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops
                     << " loops\n");
   HoistingAnalysis analysis(opToHoist, numLoops);
@@ -396,14 +416,20 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   RankedTensorType paddedTensorType = opToHoist.getResultType();
   int paddedRank = paddedTensorType.getRank();
 
-  // Create the packed tensor into which we amortize
+  // Compute the type of the transposed padded tensor.
+  FailureOr transposedTensorType =
+      computeTransposedType(paddedTensorType, transposeVector);
+  if (failed(transposedTensorType))
+    return failure();
+
+  // Create the packed tensor into which we amortize
   // padding.
   SmallVector packedShape(nPackedLoops, ShapedType::kDynamicSize);
   // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
   // tensor.
-  llvm::append_range(packedShape, paddedTensorType.getShape());
-  auto packedTensorType =
-      RankedTensorType::get(packedShape, paddedTensorType.getElementType());
+  llvm::append_range(packedShape, transposedTensorType->getShape());
+  auto packedTensorType = RankedTensorType::get(
+      packedShape, transposedTensorType->getElementType());
   Value packedTensor = b.create(
       loc, dynamicTensorSizes, packedTensorType.getShape(),
       packedTensorType.getElementType());
@@ -413,9 +439,10 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   // The implementation proceeds in a stack-like fashion:
   //   1. Iteratively clone and step into the loops, pushing the `packedTensor`
   //      deeper in the stack.
-  //   2. Create a InsertSliceOp at the top of the stack.
-  //   3. Iteratively pop and yield the result of the InsertSliceOp across
-  //     the cloned loops.
+  //   2. Create a GenericOp if `transposeVector` is non-empty.
+  //   3. Create a InsertSliceOp at the top of the stack.
+  //   4. Iteratively pop and yield the result of the InsertSliceOp across
+  //      the cloned loops.
   SmallVector clonedLoopIvs, leadingPackedTensorIndexings;
   clonedLoopIvs.reserve(nPackedLoops);
   leadingPackedTensorIndexings.reserve(nPackedLoops);
@@ -455,16 +482,14 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
     packedTensor = clonedForOp.getRegionIterArgs().front();
   }
 
-  // Stack step 2. create InsertSliceOp at the top of the stack.
   // offsets = [clonedLoopIvs, 0 .. 0].
   SmallVector offsets(leadingPackedTensorIndexings.begin(),
                                     leadingPackedTensorIndexings.end());
   offsets.append(paddedRank, b.getIndexAttr(0));
-  // sizes = [1 .. 1, paddedShape].
+  // sizes = [1 .. 1, transposedShape].
   SmallVector sizes(nPackedLoops, b.getIndexAttr(1));
-  for (int64_t sz : paddedTensorType.getShape()) {
+  for (int64_t sz : transposedTensorType->getShape()) {
     // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
-    // tensor.
     assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes");
     sizes.push_back(b.getIndexAttr(sz));
   }
@@ -472,11 +497,21 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   SmallVector strides(nPackedLoops + paddedRank,
                                     b.getIndexAttr(1));
 
-  Value inserted =
-      b.create(loc, bvm.lookup(opToHoist.result()),
-                                      packedTensor, offsets, sizes, strides);
+  // Stack step 2. create GenericOp if `transposeVector` is non-empty.
+  Value paddedTensor = bvm.lookup(opToHoist.result());
+  if (!transposeVector.empty()) {
+    Value outputTensor = b.create(
+        loc, *transposedTensorType, packedTensor, offsets, sizes, strides);
+    transposeOps.push_back(
+        makeTransposeOp(b, loc, paddedTensor, outputTensor, transposeVector));
+    paddedTensor = transposeOps.back()->getResult(0);
+  }
 
-  // Stack step 3. iteratively pop the stack and propagate the yield.
+  // Stack step 3. create InsertSliceOp at the top of the stack.
+  Value inserted = b.create(
+      loc, paddedTensor, packedTensor, offsets, sizes, strides);
+
+  // Stack step 4. iteratively pop the stack and propagate the yield.
   Value valueToYield = inserted;
   for (Value iv : llvm::reverse(clonedLoopIvs)) {
     auto forOp = scf::getForInductionVarOwner(iv);
@@ -498,12 +533,22 @@ FailureOr mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   // offsets = [originalLoopIvs, 0 .. 0].
   offsets.assign(loopIterationCounts.begin(), loopIterationCounts.end());
   offsets.append(paddedRank, b.getIndexAttr(0));
-  // sizes = [1 .. 1, paddedShape] (definedabove).
+  // sizes = [1 .. 1, transposedShape] (definedabove).
   // strides = [1 .. 1] (defined above)
   packedTensor =
       scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0);
   Value newResult = b.create(
-      loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides);
+      loc, *transposedTensorType, packedTensor, offsets, sizes, strides);
+
+  // Transpose the packed tensor back to the original storage order.
+  if (!transposeVector.empty()) {
+    Value initTensor =
+        b.create(loc, ValueRange{}, paddedTensorType.getShape(),
+                               paddedTensorType.getElementType());
+    transposeOps.push_back(
+        makeTransposeOp(b, loc, newResult, initTensor, transposeVector));
+    newResult = transposeOps.back()->getResult(0);
+  }
 
   // Make the newly cloned `opToHoist` available to the caller.
   hoistedOp =
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 9eb7b7cfe751c..486a069a60ed0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -528,20 +528,29 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite(
   // Hoist the padding.
   for (const auto &en : enumerate(depths)) {
     OpOperand &opOperand = paddedOp->getOpOperand(en.index());
-    auto padTensorOp = opOperand.get().getDefiningOp();
-    if (!padTensorOp || en.value() == 0)
+    auto padOp = opOperand.get().getDefiningOp();
+    if (!padOp || en.value() == 0)
       continue;
     tensor::PadOp hoistedOp;
-    FailureOr newResult =
-        hoistPaddingOnTensors(padTensorOp, en.value(), hoistedOp);
+    SmallVector transposeOps;
+    SmallVector transposeVector =
+        options.paddingTransposeComputationFunction(opOperand);
+
+    FailureOr newResult = hoistPaddingOnTensors(
+        padOp, en.value(), transposeVector, hoistedOp, transposeOps);
     if (failed(newResult))
       continue;
-    rewriter.replaceOp(padTensorOp, newResult.getValue());
+    rewriter.replaceOp(padOp, newResult.getValue());
+
+    // Do not apply hoist padding to the newly introduced transpose operations.
+    for (GenericOp transposeOp : transposeOps)
+      filter.replaceLinalgTransformationFilter(rewriter, transposeOp);
   }
 
   // Replace the original operation to pad.
   rewriter.replaceOp(linalgOp, newResults.getValue());
   filter.replaceLinalgTransformationFilter(rewriter, paddedOp);
+
   return paddedOp;
 }
 
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index bf37719325ccb..1b0a6d7f2ba87 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -340,11 +340,11 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
     OpResult opResult = current.cast();
     current = linalgOp.getOutputOperand(opResult.getResultNumber())->get();
   }
-  auto padTensorOp = current ? current.getDefiningOp() : nullptr;
+  auto padOp = current ? current.getDefiningOp() : nullptr;
 
   // Exit if the search fails to match a tensor::PadOp at the end of the matched
   // LinalgOp sequence.
-  if (!padTensorOp)
+  if (!padOp)
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the padded result type does not match.
@@ -352,41 +352,77 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the LinalgOps are not high padded.
-  if (llvm::any_of(padTensorOp.getMixedLowPad(), [](OpFoldResult ofr) {
+  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
         return getConstantIntValue(ofr) != static_cast(0);
       }))
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
-  // Exit if `padTensorOpSliceOp`, which defines the slice used by
-  // `padTensorOp`, is rank-reducing.
-  auto padTensorOpSliceOp =
-      padTensorOp.source().getDefiningOp();
-  if (!padTensorOpSliceOp || sliceOp.getMixedSizes().size() !=
-                                 padTensorOpSliceOp.getMixedSizes().size())
+  // Exit if `padOpSliceOp`, which defines the slice used by
+  // `padOp`, is rank-reducing.
+  auto padOpSliceOp = padOp.source().getDefiningOp();
+  if (!padOpSliceOp ||
+      sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
-  // of the slice padded by `padTensorOp`.
-  if (llvm::any_of(llvm::zip(sliceOp.getMixedSizes(),
-                             padTensorOpSliceOp.getMixedSizes()),
-                   [](std::tuple it) {
-                     return !isEqualConstantIntOrValue(std::get<0>(it),
-                                                       std::get<1>(it));
-                   }))
+  // of the slice padded by `padOp`.
+  if (llvm::any_of(
+          llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
+          [](std::tuple it) {
+            return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
+          }))
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the padding values do not match.
-  Attribute padTensorOpPadAttr, padAttr;
-  Value padTensorOpPad = padTensorOp.getConstantPaddingValue();
-  if (!padTensorOpPad ||
-      !matchPattern(padTensorOpPad, m_Constant(&padTensorOpPadAttr)) ||
-      !matchPattern(pad, m_Constant(&padAttr)) || padTensorOpPadAttr != padAttr)
+  Attribute padOpPadAttr, padAttr;
+  Value padOpPad = padOp.getConstantPaddingValue();
+  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
+      !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Return the padded result if the padding values and sizes match.
   return sliceOp.source();
 }
 
+GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
+                          Value outputTensor,
+                          ArrayRef transposeVector) {
+  auto resultTensorType = outputTensor.getType().cast();
+  Type elementType = resultTensorType.getElementType();
+
+  assert(isPermutation(transposeVector) &&
+         "expect transpose vector to be a permutation");
+  assert(transposeVector.size() ==
+             static_cast(resultTensorType.getRank()) &&
+         "expect transpose vector size to match result tensor rank");
+
+  // Compute the transpose and the indentity indexing maps.
+  SmallVector indexingMaps = {
+      inversePermutation(AffineMap::getPermutationMap(
+          SmallVector(transposeVector.begin(), transposeVector.end()),
+          b.getContext())),
+      AffineMap::getMultiDimIdentityMap(transposeVector.size(),
+                                        b.getContext())};
+  SmallVector iteratorTypes(transposeVector.size(),
+                                             getParallelIteratorTypeName());
+
+  // Create a GenericOp to transpose `inputTensor` into `outputTensor`.
+  auto transposeOp = b.create(
+      loc, resultTensorType, inputTensor, outputTensor,
+      b.getAffineMapArrayAttr(indexingMaps), b.getStrArrayAttr(iteratorTypes),
+      /*doc=*/nullptr,
+      /*library_call=*/nullptr);
+  Region &body = transposeOp.getRegion();
+  body.push_back(new Block());
+  body.front().addArguments({elementType, elementType}, {loc, loc});
+
+  // Create the body of the transpose operation.
+  OpBuilder::InsertionGuard g(b);
+  b.setInsertionPointToEnd(&body.front());
+  b.create(loc, transposeOp.getRegion().front().getArgument(0));
+  return transposeOp;
+}
+
 /// Specialization to build an scf "for" nest.
 template <>
 void GenerateLoopNest::doit(
diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
index 416dfe37e93d0..534e37a1f9ee0 100644
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -1,4 +1,5 @@
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matvec pad hoist-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATVEC
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matvec pad hoist-paddings=1,1,0 transpose-paddings=1:0,0,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=TRANSP
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad hoist-paddings=1,2,1 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATMUL
 
 //  MATVEC-DAG: #[[DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)>
@@ -30,7 +31,7 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>,
     //  MATVEC-DAG:   %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = tensor.pad %2 nofold low[%c0] high[%c0]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -81,11 +82,11 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>,
     %3 = tensor.extract_slice %arg1[%arg3] [%1] [1] : tensor<12xf32> to tensor
     %4 = affine.apply #map1(%1)
     %5 = tensor.pad %2 low[%c0, %c0] high[%c0, %4]  {
-    ^bb0(%arg5: index, %arg6: index):  
+    ^bb0(%arg5: index, %arg6: index):
       tensor.yield %cst : f32
     } : tensor<24x?xf32> to tensor<24x5xf32>
     %6 = tensor.pad %3 low[%c0] high[%4]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %cst : f32
     } : tensor to tensor<5xf32>
 
@@ -141,11 +142,11 @@ func @dynamic_size(%arg0: tensor<24x?xf32>,
     %4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor to tensor
     %5 = affine.apply #map1(%2)
     %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5]  {
-    ^bb0(%arg5: index, %arg6: index):  
+    ^bb0(%arg5: index, %arg6: index):
       tensor.yield %cst : f32
     } : tensor<24x?xf32> to tensor<24x4xf32>
     %7 = tensor.pad %4 nofold low[%c0] high[%5]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %cst : f32
     } : tensor to tensor<4xf32>
 
@@ -177,7 +178,7 @@ func @non_constant_padding(%arg0: tensor<24x12xf32>,
     //      MATVEC:  %[[T1:.*]] = tensor.pad %[[T0]]
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = tensor.pad %2 nofold low[%c0] high[%c0]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       %5 = arith.index_cast %arg3 : index to i32
       %6 = arith.sitofp %5 : i32 to f32
       tensor.yield %6 : f32
@@ -214,7 +215,7 @@ func @non_constant_op_padding(%arg0: tensor<24x12xf32>,
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = tensor.extract %arg1[%arg3] : tensor<12xf32>
     %4 = tensor.pad %2 nofold low[%c0] high[%c0]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %3 : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -251,7 +252,7 @@ func @non_index_operand(%arg0: tensor<24x12xf32>,
     %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = arith.index_cast %arg3 : i32 to index
     %4 = tensor.pad %2 nofold low[%3] high[%3]  {
-    ^bb0(%arg6: index):  
+    ^bb0(%arg6: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -288,7 +289,7 @@ func @memory_effect(%arg0: tensor<24x12xf32>,
     %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = memref.load %arg3[%c0] : memref
     %4 = tensor.pad %2 nofold low[%3] high[%3]  {
-    ^bb0(%arg6: index):  
+    ^bb0(%arg6: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -328,7 +329,7 @@ func @index_result_loop(%arg0: tensor<24x12xf32>,
       scf.yield %6 : index
     }
     %4 = tensor.pad %2 nofold low[%3] high[%3]  {
-    ^bb0(%arg6: index):  
+    ^bb0(%arg6: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -373,7 +374,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
 
     // Check the fused and padded fill op does not prevent hoisting.
     %4 = tensor.pad %2 nofold low[%c0, %c0] high[%3, %c0]  {
-    ^bb0(%arg5: index, %arg6: index):  
+    ^bb0(%arg5: index, %arg6: index):
       tensor.yield %cst : f32
     } : tensor to tensor<5x24xf32>
     %5 = linalg.fill(%cst, %4) : f32, tensor<5x24xf32> -> tensor<5x24xf32>
@@ -394,18 +395,18 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
       %10 = tensor.extract_slice %arg1[%arg5, 0] [3, 24] [1, 1] : tensor<6x24xf32> to tensor<3x24xf32>
       %11 = tensor.extract_slice %arg6[0, 0] [%1, 24] [1, 1] : tensor to tensor
       %12 = tensor.pad %9 nofold low[%c0, %c0] high[%3, %c0]  {
-      ^bb0(%arg7: index, %arg8: index):  
+      ^bb0(%arg7: index, %arg8: index):
         tensor.yield %cst : f32
       } : tensor to tensor<5x3xf32>
       %13 = tensor.pad %10 nofold low[%c0, %c0] high[%c0, %c0]  {
-      ^bb0(%arg7: index, %arg8: index):  
+      ^bb0(%arg7: index, %arg8: index):
         tensor.yield %cst : f32
       } : tensor<3x24xf32> to tensor<3x24xf32>
 
       // Check the output padding is not hoisted.
       //      MATMUL:   %[[T8:.*]] = tensor.pad
       %14 = tensor.pad %11 nofold low[%c0, %c0] high[%3, %c0]  {
-      ^bb0(%arg7: index, %arg8: index):  
+      ^bb0(%arg7: index, %arg8: index):
         tensor.yield %cst : f32
       } : tensor to tensor<5x24xf32>
 
@@ -421,3 +422,59 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
   }
   return %0 : tensor<12x24xf32>
 }
+
+// -----
+
+#map0 = affine_map<(d0)[s0] -> (4, -d0 + s0)>
+#map1 = affine_map<(d0) -> (-d0 + 4)>
+
+//      TRANSP:  transpose
+// TRANSP-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x?xf32>
+func @transpose(%arg0: tensor<24x?xf32>,
+                %arg1: tensor,
+                %arg2: tensor<24xf32>) -> tensor<24xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %0 = tensor.dim %arg0, %c1 : tensor<24x?xf32>
+
+  // Transpose the padded matrix.
+  //      TRANSP:  %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = {{.*}}iter_args(%[[T1:.*]] =
+  //        TRANSP:   %[[T2:.*]] = tensor.pad
+  //        TRANSP:   %[[T3:.*]] = tensor.extract_slice %[[T1]]
+  //        TRANSP:   %[[T4:.*]] = linalg.generic
+  //   TRANSP-SAME:     ins(%[[T2]] : tensor<24x4xf32>
+  //   TRANSP-SAME:     outs(%[[T3]] : tensor<4x24xf32>
+  //        TRANSP:   %[[T5:.*]] = tensor.insert_slice %[[T4]] into %[[T1]]
+  //        TRANSP:   scf.yield %[[T5:.*]]
+
+  //      TRANSP:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
+  %1 = scf.for %arg3 = %c0 to %0 step %c4 iter_args(%arg4 = %arg2) -> (tensor<24xf32>) {
+    %2 = affine.min #map0(%arg3)[%0]
+    %3 = tensor.extract_slice %arg0[0, %arg3] [24, %2] [1, 1] : tensor<24x?xf32> to tensor<24x?xf32>
+
+    // Index the packed vector and transpose back.
+    //      TRANSP:   %[[T6:.*]] = tensor.extract_slice %[[T0]]
+    //      TRANSP:   %[[T7:.*]] = linalg.init_tensor
+    //      TRANSP:   %[[T8:.*]] = linalg.generic
+    // TRANSP-SAME:     ins(%[[T6]] : tensor<4x24xf32>
+    // TRANSP-SAME:     outs(%[[T7]] : tensor<24x4xf32>
+    %4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor to tensor
+    %5 = affine.apply #map1(%2)
+    %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5]  {
+    ^bb0(%arg5: index, %arg6: index):  // no predecessors
+      tensor.yield %cst : f32
+    } : tensor<24x?xf32> to tensor<24x4xf32>
+    %7 = tensor.pad %4 nofold low[%c0] high[%5]  {
+    ^bb0(%arg5: index):  // no predecessors
+      tensor.yield %cst : f32
+    } : tensor to tensor<4xf32>
+
+    // Check matvec uses the packed input vector.
+    //      TRANSP:    = linalg.matvec ins(%[[T8]]
+    %8 = linalg.matvec ins(%6, %7 : tensor<24x4xf32>, tensor<4xf32>) outs(%arg4 : tensor<24xf32>) -> tensor<24xf32>
+    scf.yield %8 : tensor<24xf32>
+  }
+  return %1 : tensor<24xf32>
+}
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
index b79059a3ebb64..5bfd6412854b6 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
@@ -109,6 +109,17 @@ struct TestLinalgCodegenStrategy
       *this, "hoist-paddings",
       llvm::cl::desc("Operand hoisting depths when test-pad-pattern."),
       llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
+  ListOption transposePaddings{
+      *this, "transpose-paddings",
+      llvm::cl::desc(
+          "Transpose paddings when test-pad-pattern. Specify a "
+          "operand dimension interchange using the following format:\n"
+          "-transpose-paddings=1:0:2,0:1,0:1\n"
+          "It defines the interchange [1, 0, 2] for operand one and "
+          "the interchange [0, 1] (no transpose) for the remaining operands."
+          "All interchange vectors have to be permuations matching the "
+          "operand rank."),
+      llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
   Option generalize{*this, "generalize",
                           llvm::cl::desc("Generalize named operations."),
                           llvm::cl::init(false)};
@@ -257,9 +268,21 @@ void TestLinalgCodegenStrategy::runOnOperation() {
                ? hoistPaddings[opOperand.getOperandNumber()]
                : 0;
   };
+  auto transposeFunc = [&](OpOperand &opOperand) {
+    SmallVector transposeVector = {};
+    if (opOperand.getOperandNumber() >= transposePaddings.size())
+      return transposeVector;
+    SmallVector elems;
+    StringRef(transposePaddings[opOperand.getOperandNumber()])
+        .split(elems, ':');
+    for (StringRef elem : elems)
+      transposeVector.push_back(std::stoi(elem.str()));
+    return transposeVector;
+  };
   paddingOptions.setPaddingValueComputationFunction(getNeutralOfLinalgOp);
   paddingOptions.setPaddingNoFoldComputationFunction(packFunc);
   paddingOptions.setPaddingHoistComputationFunction(hoistingFunc);
+  paddingOptions.setPaddingTransposeComputationFunction(transposeFunc);
 
   // Compute input padding values only an return failure for output operands.
   if (padInputsOnly) {

From cfe17986c952e552a731237da99f4879def3a02b Mon Sep 17 00:00:00 2001
From: Casey Carter 
Date: Wed, 29 Dec 2021 14:30:52 -0800
Subject: [PATCH 408/946] [libcxx][test] {move,reverse}_iterator cannot be
 instantiated for a type with no `operator*`

Since their nested reference types are defined in terms of `iter_reference_t`, which examines `decltype(*declval())`.

Differential Revision: https://reviews.llvm.org/D117371
---
 .../move.iter.ops/move.iter.op=/move_iterator.pass.cpp        | 4 +++-
 .../reverse.iterators/reverse.iter.cmp/three-way.pass.cpp     | 4 ++++
 .../reverse.iterators/reverse.iter.cons/assign.pass.cpp       | 2 ++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp
index 42a28cf14cfd0..d4857d7b0082b 100644
--- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp
@@ -42,7 +42,7 @@ struct ToIter {
     typedef char *pointer;
     typedef char &reference;
     typedef char value_type;
-    typedef value_type difference_type;
+    typedef signed char difference_type;
 
     explicit TEST_CONSTEXPR_CXX17 ToIter() : m_value(0) {}
     TEST_CONSTEXPR_CXX17 ToIter(const ToIter &src) : m_value(src.m_value) {}
@@ -57,6 +57,8 @@ struct ToIter {
         return *this;
     }
     char *m_value;
+
+    reference operator*() const;
 };
 
 TEST_CONSTEXPR_CXX17 bool test_conv_assign()
diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp
index e8379e24cffae..a48a7ea924c52 100644
--- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp
@@ -42,6 +42,8 @@ struct Iter {
 
     constexpr Iter(double value): m_value(value) {}
     double m_value;
+
+    reference operator*() const;
 private:
     friend constexpr bool operator==(const Iter& l, const Iter& r) = default;
     friend constexpr std::partial_ordering operator<=>(const Iter& l, const Iter& r) = default;
@@ -57,6 +59,8 @@ struct ConstIter {
     constexpr ConstIter(double value): m_value(value) {}
     constexpr ConstIter(Iter it): m_value(it.m_value) {}
     double m_value;
+
+    reference operator*() const;
 private:
     friend constexpr bool operator==(const ConstIter& l, const ConstIter& r) = default;
     friend constexpr std::partial_ordering operator<=>(const ConstIter& l, const ConstIter& r) = default;
diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp
index c3dbfa9a41b66..0e5123a49e2b5 100644
--- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp
@@ -51,6 +51,8 @@ struct ToIter {
         return *this;
     }
     char *m_value;
+
+    reference operator*() const;
 };
 
 TEST_CONSTEXPR_CXX17 bool tests() {

From 699e22a083f2bae120e68ce7254fcddb4aaf97b3 Mon Sep 17 00:00:00 2001
From: Sander de Smalen 
Date: Mon, 24 Jan 2022 09:39:31 +0000
Subject: [PATCH 409/946] [ISEL] Move trivial step_vector folds to
 FoldConstantArithmetic.

Given that step_vector is practically a constant, doing this early
helps with DAGCombine folds that happen before type legalization.

There is currently no way to test this happens earlier, although existing
tests for step_vector folds continue protect the folds happening at all.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D117863
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 199dee9b0105f..e305041f7490d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5418,6 +5418,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     }
   }
 
+  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
+  //      (shl step_vector(C0), C1) -> (step_vector(C0 << C1))
+  if ((Opcode == ISD::MUL || Opcode == ISD::SHL) &&
+      Ops[0].getOpcode() == ISD::STEP_VECTOR) {
+    APInt RHSVal;
+    if (ISD::isConstantSplatVector(Ops[1].getNode(), RHSVal)) {
+      APInt NewStep = Opcode == ISD::MUL
+                          ? Ops[0].getConstantOperandAPInt(0) * RHSVal
+                          : Ops[0].getConstantOperandAPInt(0) << RHSVal;
+      return getStepVector(DL, VT, NewStep);
+    }
+  }
+
   auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
     return !Op.getValueType().isVector() ||
            Op.getValueType().getVectorElementCount() == NumElts;

From 6997f4d07fa4b462dd3a02838a2cfed45db9c8a0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Mon, 24 Jan 2022 16:44:27 +0000
Subject: [PATCH 410/946] [X86] combineSetCCMOVMSK - fold allof(cmpeq(x,y)) ->
 ptest(sub(x,y)) (PR53379)

As suggested on PR53379, for all-of icmp-eq patterns, we can use ptest(sub(x,y)) on SSE41+ targets

This is a generalization of the existing allof(cmpeq(x,0)) -> ptest(x) pattern

We can probably extend this further, in particularly to handle 256-bit cases on pre-AVX2 targets, but this part of the generalization is pretty trivial

Fixes Issue #53379
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 18 +++-
 llvm/test/CodeGen/X86/movmsk-cmp.ll           | 10 +-
 .../test/CodeGen/X86/vector-compare-all_of.ll | 15 ++-
 .../test/CodeGen/X86/vector-compare-any_of.ll | 10 +-
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 92 ++++++++++---------
 5 files changed, 75 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 10fdcc3d6ca81..2f8e97e63fd49 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44457,12 +44457,15 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
 
   // MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).
   // MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
+  // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
+  // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
   if (IsAllOf && Subtarget.hasSSE41()) {
     SDValue BC = peekThroughBitcasts(Vec);
-    if (BC.getOpcode() == X86ISD::PCMPEQ &&
-        ISD::isBuildVectorAllZeros(BC.getOperand(1).getNode())) {
+    if (BC.getOpcode() == X86ISD::PCMPEQ) {
       MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
-      SDValue V = DAG.getBitcast(TestVT, BC.getOperand(0));
+      SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
+                              BC.getOperand(0), BC.getOperand(1));
+      V = DAG.getBitcast(TestVT, V);
       return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
     }
   }
@@ -44500,7 +44503,14 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
         VecOp1.getConstantOperandAPInt(1) == 8 &&
         (IsAnyOf || (SignExt0 && SignExt1))) {
       SDLoc DL(EFLAGS);
-      SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0));
+      SDValue Result = peekThroughBitcasts(VecOp0.getOperand(0));
+      if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) {
+        SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
+                                Result.getOperand(0), Result.getOperand(1));
+        V = DAG.getBitcast(MVT::v4i64, V);
+        return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+      }
+      Result = DAG.getBitcast(MVT::v32i8, Result);
       Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
       unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
       if (!SignExt0 || !SignExt1) {
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 4488aeb273f70..68adeda025d7c 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4162,17 +4162,15 @@ define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ;
 ; SSE41-LABEL: movmsk_or_v2i64:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
-; SSE41-NEXT:    movmskpd %xmm0, %eax
-; SSE41-NEXT:    cmpl $3, %eax
+; SSE41-NEXT:    psubq %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: movmsk_or_v2i64:
 ; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT:    cmpl $3, %eax
+; AVX1OR2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vptest %xmm0, %xmm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index a4a8367f93294..b399712553606 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1249,9 +1249,8 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; AVX2-LABEL: bool_reduction_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm1
-; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1301,9 +1300,8 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ;
 ; AVX2-LABEL: bool_reduction_v16i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1354,9 +1352,8 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ;
 ; AVX2-LABEL: bool_reduction_v32i8:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index ee7e5f983c582..9af3d5e4969e8 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -959,17 +959,15 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE-LABEL: bool_reduction_v4i32:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %eax
-; SSE-NEXT:    cmpl $15, %eax
+; SSE-NEXT:    psubd %xmm1, %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm0
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: bool_reduction_v4i32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    cmpl $15, %eax
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index c9aef7b8e7404..be9ebf466eb0d 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1210,10 +1210,7 @@ define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
 ;
 ; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1679,17 +1676,15 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
 ;
 ; SSE41-LABEL: icmp_v2i64_v2i1:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
-; SSE41-NEXT:    movmskpd %xmm0, %eax
-; SSE41-NEXT:    cmpb $3, %al
+; SSE41-NEXT:    psubq %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v2i64_v2i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    cmpb $3, %al
+; AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1728,19 +1723,25 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
 }
 
 define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
-; SSE-LABEL: icmp_v4i32_v4i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %eax
-; SSE-NEXT:    cmpb $15, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v4i32_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i32_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubd %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v4i32_v4i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    cmpb $15, %al
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1832,27 +1833,32 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
 }
 
 define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
-; SSE-LABEL: icmp_v16i8_v16i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v16i8_v16i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v16i8_v16i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v16i8_v16i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    cmpw $-1, %ax
+; AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: icmp_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
-; AVX512F-NEXT:    cmpw $-1, %ax
+; AVX512F-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vptest %xmm0, %xmm0
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    retq
 ;
@@ -1917,9 +1923,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
 ;
 ; AVX2-LABEL: icmp_v4i64_v4i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; AVX2-NEXT:    cmpb $15, %al
+; AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1986,9 +1991,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
 ;
 ; AVX2-LABEL: icmp_v8i32_v8i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -2054,9 +2058,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
 ;
 ; AVX2-LABEL: icmp_v16i16_v16i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -2119,9 +2122,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
 ;
 ; AVX2-LABEL: icmp_v32i8_v32i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From c03fdd340356c9a29242975f39786529eb99f194 Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Mon, 24 Jan 2022 09:03:21 -0800
Subject: [PATCH 411/946] [ELF] Fix the branch range computation when reusing a
 thunk

Notation: dst is `t->getThunkTargetSym()->getVA()`

On AArch64, when `src-0x8000000-r_addend <= dst < src-0x8000000`, the condition
`target->inBranchRange(rel.type, src, rel.sym->getVA(rel.addend))` may
incorrectly consider a thunk reusable.
`rel.addend = -getPCBias(rel.type)` resets the addend to 0 for AArch64/PPC
and the zero addend is used by `rel.sym->getVA(rel.addend)` to check
out-of-range relocations.

See the test for a case this computation is wrong:
`error: a.o:(.text_high+0x4): relocation R_AARCH64_JUMP26 out of range: -134217732 is not in [-134217728, 134217727]`
I have seen a real world case with r_addend=19960.

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D117734
---
 lld/ELF/Relocations.cpp            |  5 +--
 lld/test/ELF/aarch64-thunk-reuse.s | 49 ++++++++++++++++++++++++++++
 lld/test/ELF/arm-thunk-reuse.s     | 52 ++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 lld/test/ELF/aarch64-thunk-reuse.s
 create mode 100644 lld/test/ELF/arm-thunk-reuse.s

diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 3a4cbde6ab698..074bc71509916 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -2042,7 +2042,8 @@ std::pair ThunkCreator::getThunk(InputSection *isec,
   // out in the relocation addend. We compensate for the PC bias so that
   // an Arm and Thumb relocation to the same destination get the same keyAddend,
   // which is usually 0.
-  int64_t keyAddend = rel.addend + getPCBias(rel.type);
+  const int64_t pcBias = getPCBias(rel.type);
+  const int64_t keyAddend = rel.addend + pcBias;
 
   // We use a ((section, offset), addend) pair to find the thunk position if
   // possible so that we create only one thunk for aliased symbols or ICFed
@@ -2061,7 +2062,7 @@ std::pair ThunkCreator::getThunk(InputSection *isec,
     if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) &&
         t->isCompatibleWith(*isec, rel) &&
         target->inBranchRange(rel.type, src,
-                              t->getThunkTargetSym()->getVA(rel.addend)))
+                              t->getThunkTargetSym()->getVA(-pcBias)))
       return std::make_pair(t, false);
 
   // No existing compatible Thunk in range, create a new one
diff --git a/lld/test/ELF/aarch64-thunk-reuse.s b/lld/test/ELF/aarch64-thunk-reuse.s
new file mode 100644
index 0000000000000..bdbd08ab5f282
--- /dev/null
+++ b/lld/test/ELF/aarch64-thunk-reuse.s
@@ -0,0 +1,49 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld -pie -T %t/lds %t/a.o -o %t/a
+# RUN: llvm-objdump -d --no-show-raw-insn %t/a | FileCheck %s
+
+## We create a thunk for dest.
+# CHECK-LABEL: :
+# CHECK-NEXT:   8010008:       b       0x801000c <__AArch64ADRPThunk_>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__AArch64ADRPThunk_>:
+# CHECK-NEXT:   801000c:       adrp    x16, 0x10000
+# CHECK-NEXT:                  add     x16, x16, #4
+# CHECK-NEXT:                  br      x16
+
+## The first instruction can reuse the thunk but the second can't.
+## If we reuse the thunk for b, we will get an "out of range" error.
+# CHECK-LABEL: :
+# CHECK-NEXT:  1001000c:       bl      0x801000c <__AArch64ADRPThunk_>
+# CHECK-NEXT:                  b       0x10010014 <__AArch64ADRPThunk_>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__AArch64ADRPThunk_>:
+# CHECK-NEXT:  10010014:       adrp    x16, 0x10000
+# CHECK-NEXT:                  add     x16, x16, #4
+# CHECK-NEXT:                  br      x16
+
+#--- a.s
+.section .text_low, "ax", %progbits
+.globl _start
+_start:
+  nop
+dest:
+  ret
+
+.section .text_mid, "ax", %progbits
+mid:
+  b dest
+
+.section .text_high, "ax", %progbits
+high:
+  bl dest
+  b dest
+
+#--- lds
+SECTIONS {
+  .text_low 0x10000: { *(.text_low) }
+  .text_mid 0x8010008 : { *(.text_mid) }
+  .text_high 0x1001000c : { *(.text_high) }
+}
diff --git a/lld/test/ELF/arm-thunk-reuse.s b/lld/test/ELF/arm-thunk-reuse.s
new file mode 100644
index 0000000000000..3959c3cd695f4
--- /dev/null
+++ b/lld/test/ELF/arm-thunk-reuse.s
@@ -0,0 +1,52 @@
+# REQUIRES: arm
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=armv7-a-none-eabi --arm-add-build-attributes %t/a.s -o %t/a.o
+# RUN: ld.lld -pie -T %t/lds %t/a.o -o %t/a
+# RUN: llvm-objdump -d --no-show-raw-insn %t/a | FileCheck %s
+
+## We create a thunk for dest.
+# CHECK-LABEL: :
+# CHECK-NEXT:   2010004:     b       0x2010008 <__ARMV7PILongThunk_dest>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__ARMV7PILongThunk_dest>:
+# CHECK-NEXT:   2010008:     movw    r12, #65516
+# CHECK-NEXT:                movt    r12, #65023
+# CHECK-NEXT:                add     r12, r12, pc
+# CHECK-NEXT:                bx      r12
+
+## The first instruction can reuse the thunk but the second can't.
+## If we reuse the thunk for b, we will get an "out of range" error.
+# CHECK-LABEL: :
+# CHECK-NEXT:   4010000:      bl      0x2010008 <__ARMV7PILongThunk_dest>
+# CHECK-NEXT:                 b       0x4010008 <__ARMV7PILongThunk_dest>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__ARMV7PILongThunk_dest>:
+# CHECK-NEXT:   4010008:      movw    r12, #65516
+# CHECK-NEXT:                 movt    r12, #64511
+# CHECK-NEXT:                 add     r12, r12, pc
+# CHECK-NEXT:                 bx      r12
+
+#--- a.s
+.section .text_low, "ax", %progbits
+
+.globl _start
+_start:
+  nop
+dest:
+  bx lr
+
+.section .text_mid, "ax", %progbits
+mid:
+  b dest
+
+.section .text_high, "ax", %progbits
+high:
+  bl dest
+  b dest
+
+#--- lds
+SECTIONS {
+  .text_low 0x10000: { *(.text_low) }
+  .text_mid 0x2010004 : { *(.text_mid) }
+  .text_high 0x4010000 : { *(.text_high) }
+}

From a2afc8249a9950da861bce5fd724c1ccf6c4eea4 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Mon, 24 Jan 2022 12:05:09 -0500
Subject: [PATCH 412/946] [libc++] Fix benchmark failure

---
 libcxx/benchmarks/filesystem.bench.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/benchmarks/filesystem.bench.cpp b/libcxx/benchmarks/filesystem.bench.cpp
index 95aecd50473fd..4fe4fc503ca13 100644
--- a/libcxx/benchmarks/filesystem.bench.cpp
+++ b/libcxx/benchmarks/filesystem.bench.cpp
@@ -83,7 +83,7 @@ void BM_PathIterateMultipleTimes(benchmark::State &st, GenInputs gen) {
     PP /= Part;
   benchmark::DoNotOptimize(PP.native().data());
   while (st.KeepRunning()) {
-    for (auto &E : PP) {
+    for (auto const& E : PP) {
       benchmark::DoNotOptimize(E.native().data());
     }
     benchmark::ClobberMemory();
@@ -104,7 +104,7 @@ void BM_PathIterateOnce(benchmark::State &st, GenInputs gen) {
   benchmark::DoNotOptimize(PP.native().data());
   while (st.KeepRunning()) {
     const path P = PP.native();
-    for (auto &E : P) {
+    for (auto const& E : P) {
       benchmark::DoNotOptimize(E.native().data());
     }
     benchmark::ClobberMemory();

From 6be77561f82d781cd957c316a7f53660510683a3 Mon Sep 17 00:00:00 2001
From: eopXD 
Date: Sat, 22 Jan 2022 23:19:01 -0800
Subject: [PATCH 413/946] [SLP][NFC] Add debug logs for entry.

Tell the users they are specifying something without vector register.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D117980
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8bfd3aa525d55..99c265fc5101a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8079,8 +8079,11 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 
   // If the target claims to have no vector registers don't attempt
   // vectorization.
-  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
+  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) {
+    LLVM_DEBUG(
+        dbgs() << "SLP: Didn't find any vector registers for target, abort.\n");
     return false;
+  }
 
   // Don't vectorize when the attribute NoImplicitFloat is used.
   if (F.hasFnAttribute(Attribute::NoImplicitFloat))

From 7cd441ff537e00c743236658bfbcfc16c30ce031 Mon Sep 17 00:00:00 2001
From: Owen Pan 
Date: Mon, 24 Jan 2022 08:44:54 -0800
Subject: [PATCH 414/946] [clang][NFC] Wrap TYPE_SWITCH in "do while (0)" in
 the interpreter

Wraps the expansions of TYPE_SWITCH and COMPOSITE_TYPE_SWITCH in
the constexpr interpreter with "do { ... } while (0)" so that these
macros can be used like this:

if (llvm::Optional T = Ctx.classify(FieldTy))
  TYPE_SWITCH(*T, Ok &= ReturnValue(FP.deref(), Value));
else
  Ok &= Composite(FieldTy, FP, Value);

This bug was found while testing D116316. See also review comment:
https://reviews.llvm.org/D64146?id=208520#inline-584131

Also cleaned up the macro definitions by removing the superfluous
do-while statements and removed the unused INT_TPYE_SWITCH macro.

Differential Revision: https://reviews.llvm.org/D117301
---
 clang/lib/AST/Interp/PrimType.h | 50 ++++++++++++++-------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/clang/lib/AST/Interp/PrimType.h b/clang/lib/AST/Interp/PrimType.h
index f5f4f8e5c32d6..de4bf9bf802e0 100644
--- a/clang/lib/AST/Interp/PrimType.h
+++ b/clang/lib/AST/Interp/PrimType.h
@@ -81,35 +81,27 @@ inline bool isPrimitiveIntegral(PrimType Type) {
 /// Helper macro to simplify type switches.
 /// The macro implicitly exposes a type T in the scope of the inner block.
 #define TYPE_SWITCH_CASE(Name, B) \
-  case Name: { using T = PrimConv::T; do {B;} while(0); break; }
+  case Name: { using T = PrimConv::T; B; break; }
 #define TYPE_SWITCH(Expr, B)                                                   \
-  switch (Expr) {                                                              \
-    TYPE_SWITCH_CASE(PT_Sint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Uint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Sint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint64, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint64, B)                                             \
-    TYPE_SWITCH_CASE(PT_Bool, B)                                               \
-    TYPE_SWITCH_CASE(PT_Ptr, B)                                                \
-  }
+  do {                                                                         \
+    switch (Expr) {                                                            \
+      TYPE_SWITCH_CASE(PT_Sint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Uint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Sint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Bool, B)                                             \
+      TYPE_SWITCH_CASE(PT_Ptr, B)                                              \
+    }                                                                          \
+  } while (0)
 #define COMPOSITE_TYPE_SWITCH(Expr, B, D)                                      \
-  switch (Expr) {                                                              \
-    TYPE_SWITCH_CASE(PT_Ptr, B)                                                \
-    default: do { D; } while(0); break;                                        \
-  }
-#define INT_TYPE_SWITCH(Expr, B)                                               \
-  switch (Expr) {                                                              \
-    TYPE_SWITCH_CASE(PT_Sint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Uint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Sint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint64, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint64, B)                                             \
-    default: llvm_unreachable("not an integer");                               \
-  }
+  do {                                                                         \
+    switch (Expr) {                                                            \
+      TYPE_SWITCH_CASE(PT_Ptr, B)                                              \
+      default: { D; break; }                                                   \
+    }                                                                          \
+  } while (0)
 #endif

From 80532ebb508d0ca62f96df5f253db8caed969397 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer 
Date: Mon, 24 Jan 2022 18:06:33 +0100
Subject: [PATCH 415/946] [AMDGPU][InstCombine] Remove zero image offset

Remove the offset parameter if it is zero.

Differential Revision: https://reviews.llvm.org/D117876
---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     |  17 +
 llvm/lib/Target/AMDGPU/MIMGInstructions.td    |  58 ++
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |   1 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   8 +
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 565 ++++++++++++++++++
 5 files changed, 649 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 84363d3c6aa1a..c3a326945557e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -182,6 +182,23 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
     }
   }
 
+  // Optimize _offset away when 'offset' is zero
+  if (const auto *OffsetMappingInfo =
+          AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
+    if (auto *ConstantOffset =
+            dyn_cast(II.getOperand(ImageDimIntr->OffsetIndex))) {
+      if (ConstantOffset->isZero()) {
+        const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+            AMDGPU::getImageDimIntrinsicByBaseOpcode(
+                OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
+        return modifyIntrinsicCall(
+            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+              Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
+            });
+      }
+    }
+  }
+
   // Try to use A16 or G16
   if (!ST->hasA16() && !ST->hasG16())
     return None;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 49eaa1499bb76..cf03fd6821438 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -147,6 +147,22 @@ def MIMGBiasMappingTable : GenericTable {
   let PrimaryKeyName = "getMIMGBiasMappingInfo";
 }
 
+class MIMGOffsetMapping {
+  MIMGBaseOpcode Offset = offset;
+  MIMGBaseOpcode NoOffset = nooffset;
+}
+
+def MIMGOffsetMappingTable : GenericTable {
+  let FilterClass = "MIMGOffsetMapping";
+  let CppTypeName = "MIMGOffsetMappingInfo";
+  let Fields = ["Offset", "NoOffset"];
+  string TypeOf_Offset = "MIMGBaseOpcode";
+  string TypeOf_NoOffset = "MIMGBaseOpcode";
+
+  let PrimaryKey = ["Offset"];
+  let PrimaryKeyName = "getMIMGOffsetMappingInfo";
+}
+
 class MIMGG16Mapping {
   MIMGBaseOpcode G = g;
   MIMGBaseOpcode G16 = g16;
@@ -1174,6 +1190,48 @@ def : MIMGBiasMapping;
 def : MIMGBiasMapping;
 def : MIMGBiasMapping;
 
+// Offset to NoOffset Optimization Mapping
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+def : MIMGOffsetMapping;
+
 // G to G16 Optimization Mapping
 def : MIMGG16Mapping;
 def : MIMGG16Mapping;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index fa1fa5b850d57..1e96266eb06c3 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -133,6 +133,7 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
 #define GET_MIMGLZMappingTable_IMPL
 #define GET_MIMGMIPMappingTable_IMPL
 #define GET_MIMGBiasMappingTable_IMPL
+#define GET_MIMGOffsetMappingTable_IMPL
 #define GET_MIMGG16MappingTable_IMPL
 #include "AMDGPUGenSearchableTables.inc"
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index cabae3d1ab7e1..89f928eb8b925 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -336,6 +336,11 @@ struct MIMGBiasMappingInfo {
   MIMGBaseOpcode NoBias;
 };
 
+struct MIMGOffsetMappingInfo {
+  MIMGBaseOpcode Offset;
+  MIMGBaseOpcode NoOffset;
+};
+
 struct MIMGG16MappingInfo {
   MIMGBaseOpcode G;
   MIMGBaseOpcode G16;
@@ -350,6 +355,9 @@ const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
 LLVM_READONLY
 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
 
+LLVM_READONLY
+const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
+
 LLVM_READONLY
 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
 
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index bac4c7826a4fe..709d531b8cee6 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -4630,6 +4630,571 @@ main_body:
   ret void
 }
 
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample offset zero
+; --------------------------------------------------------------------
+
+define amdgpu_kernel void @offset_sample_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; CHECK-LABEL: @offset_sample_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; CHECK-LABEL: @offset_sample_c_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
+; CHECK-LABEL: @offset_sample_b_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
+; CHECK-LABEL: @offset_sample_c_b_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_b_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_b_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_b_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_b_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_d_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_d_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_c_d_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_d_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_d_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_d_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_d_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_d_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_cd_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_cd_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_c_cd_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_cd_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_cd_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_cd_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cd_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cd_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+; CHECK-LABEL: @offset_sample_l_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+; CHECK-LABEL: @offset_sample_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+; CHECK-LABEL: @offset_sample_c_l_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+; CHECK-LABEL: @offset_sample_c_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; CHECK-LABEL: @offset_sample_lz_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_lz_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; CHECK-LABEL: @offset_sample_c_lz_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_lz_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.is.shared
 ; --------------------------------------------------------------------

From 74db5c8c95e2aed40d288bb2df92eb859b87c827 Mon Sep 17 00:00:00 2001
From: Jeremy Morse 
Date: Mon, 24 Jan 2022 17:04:13 +0000
Subject: [PATCH 416/946] Revert rG6a605b97a200 due to excessive memory use

Over in the comments for D116821, some use-cases have cropped up where
there's a substantial increase in memory usage. A quick inspection
shows that a) it's a lot of memory and b) there are several things to
be done to reduce it. Reverting (via disabling this feature by default)
to avoid bothering people in the meantime.
---
 llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp | 5 ++++-
 llvm/test/DebugInfo/X86/instr-ref-flag.ll            | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 2913506853aa4..3149729b92313 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -124,10 +124,13 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
 
 bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) {
   // Enable by default on x86_64, disable if explicitly turned off on cmdline.
+  // Disabled while https://reviews.llvm.org/D116821 is investigated.
+#if 0
   if (T.getArch() == llvm::Triple::x86_64 &&
       ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE)
     return true;
+#endif
 
-  // Otherwise: enable if explicitly requestedo n command line.
+  // Otherwise: enable if explicitly requested on command line.
   return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE;
 }
diff --git a/llvm/test/DebugInfo/X86/instr-ref-flag.ll b/llvm/test/DebugInfo/X86/instr-ref-flag.ll
index f9d5f99edf77f..56d34aedabd02 100644
--- a/llvm/test/DebugInfo/X86/instr-ref-flag.ll
+++ b/llvm/test/DebugInfo/X86/instr-ref-flag.ll
@@ -13,6 +13,10 @@
 ;; by llc by default, and that it can be turned explicitly on or off as
 ;; desired.
 
+;; Xfail due to faults found in the discussion on
+;; https://reviews.llvm.org/D116821
+; XFAIL: *
+
 ; INSTRREFON: DBG_INSTR_REF
 ; INSTRREFOFF: DBG_VALUE
 

From 50999e82e8844615b1ae53edb9d56cdcace91b04 Mon Sep 17 00:00:00 2001
From: Marek Kurdej 
Date: Mon, 24 Jan 2022 17:57:21 +0100
Subject: [PATCH 417/946] [clang-format] Space between attribute closing
 parenthesis and qualified type colon.

Fixes https://github.com/llvm/llvm-project/issues/35711.

Reviewed By: MyDeveloperDay, HazardyKnusperkeks, owenpan

Differential Revision: https://reviews.llvm.org/D117894
---
 clang/lib/Format/TokenAnnotator.cpp   | 3 +++
 clang/unittests/Format/FormatTest.cpp | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index b9535f7965598..a8cd1e30f74e5 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3339,6 +3339,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
         Line.First->is(Keywords.kw_import))
       return false;
+    // Space in __attribute__((attr)) ::type.
+    if (Left.is(TT_AttributeParen) && Right.is(tok::coloncolon))
+      return true;
 
     if (Left.is(tok::kw_operator))
       return Right.is(tok::coloncolon);
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 0ddbac48d716d..c4e0e14ce5bcd 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -10028,6 +10028,7 @@ TEST_F(FormatTest, UnderstandsAttributes) {
   verifyFormat("SomeType s __attribute__((unused)) (InitValue);");
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa __attribute__((unused))\n"
                "aaaaaaaaaaaaaaaaaaaaaaa(int i);");
+  verifyFormat("__attribute__((nodebug)) ::qualified_type f();");
   FormatStyle AfterType = getLLVMStyle();
   AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All;
   verifyFormat("__attribute__((nodebug)) void\n"
@@ -10131,6 +10132,7 @@ TEST_F(FormatTest, UnderstandsSquareAttributes) {
   verifyFormat("class [[nodiscard]] f {\npublic:\n  f() {}\n}");
   verifyFormat("class [[deprecated(\"so sorry\")]] f {\npublic:\n  f() {}\n}");
   verifyFormat("class [[gnu::unused]] f {\npublic:\n  f() {}\n}");
+  verifyFormat("[[nodiscard]] ::qualified_type f();");
 
   // Make sure we do not mistake attributes for array subscripts.
   verifyFormat("int a() {}\n"

From db2944e34b16387bf4326ddfd2a8c420594572f4 Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Mon, 14 Dec 2020 17:39:15 +0100
Subject: [PATCH 418/946] [libc++][format] Adds formatter floating-point.

This properly implements the formatter for floating-point types.

Completes:
- P1652R1 Printf corner cases in std::format
- LWG 3250 std::format: # (alternate form) for NaN and inf
- LWG 3243 std::format and negative zeroes

Implements parts of:
- P0645 Text Formatting

Reviewed By: #libc, ldionne, vitaut

Differential Revision: https://reviews.llvm.org/D114001
---
 libcxx/benchmarks/formatter_float.bench.cpp   |  241 +++
 libcxx/docs/Status/Cxx20Issues.csv            |    4 +-
 libcxx/docs/Status/Cxx20Papers.csv            |    2 +-
 libcxx/include/CMakeLists.txt                 |    1 +
 libcxx/include/__format/formatter.h           |   28 +
 .../__format/formatter_floating_point.h       |  717 +++++++
 libcxx/include/__format/formatter_integral.h  |    2 +-
 .../include/__format/parser_std_format_spec.h |  158 +-
 libcxx/include/format                         |    1 +
 libcxx/include/module.modulemap               |   29 +-
 ...formatter_floating_point.module.verify.cpp |   15 +
 .../std_format_spec_floating_point.pass.cpp   |  353 ++++
 .../formatter.floating_point.pass.cpp         |  418 +++-
 .../format/format.functions/format_tests.h    | 1502 +++++++++++++-
 .../locale-specific_form.pass.cpp             | 1717 +++++++++++++++++
 15 files changed, 5118 insertions(+), 70 deletions(-)
 create mode 100644 libcxx/benchmarks/formatter_float.bench.cpp
 create mode 100644 libcxx/include/__format/formatter_floating_point.h
 create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp
 create mode 100644 libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp

diff --git a/libcxx/benchmarks/formatter_float.bench.cpp b/libcxx/benchmarks/formatter_float.bench.cpp
new file mode 100644
index 0000000000000..3190b3779c5d9
--- /dev/null
+++ b/libcxx/benchmarks/formatter_float.bench.cpp
@@ -0,0 +1,241 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "CartesianBenchmarks.h"
+#include "benchmark/benchmark.h"
+
+// *** Localization ***
+enum class LocalizationE { False, True };
+struct AllLocalizations : EnumValuesAsTuple {
+  static constexpr const char* Names[] = {"LocFalse", "LocTrue"};
+};
+
+template 
+struct Localization {};
+
+template <>
+struct Localization {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct Localization {
+  static constexpr const char* fmt = "L";
+};
+
+// *** Types ***
+enum class TypeE { Float, Double, LongDouble };
+// TODO FMT Set to 3 after to_chars has long double suport.
+struct AllTypes : EnumValuesAsTuple {
+  static constexpr const char* Names[] = {"Float", "Double", "LongDouble"};
+};
+
+template 
+struct Type {};
+
+template <>
+struct Type {
+  using type = float;
+};
+
+template <>
+struct Type {
+  using type = double;
+};
+
+template <>
+struct Type {
+  using type = long double;
+};
+
+// *** Values ***
+enum class ValueE { Inf, Random };
+struct AllValues : EnumValuesAsTuple {
+  static constexpr const char* Names[] = {"Inf", "Random"};
+};
+
+template 
+struct Value {};
+
+template <>
+struct Value {
+  template 
+  static std::array make_data() {
+    std::array result;
+    std::fill(result.begin(), result.end(), -std::numeric_limits::infinity());
+    return result;
+  }
+};
+
+template <>
+struct Value {
+  template 
+  static std::array make_data() {
+    std::random_device seed;
+    std::mt19937 generator(seed());
+    std::uniform_int_distribution> distribution;
+
+    std::array result;
+    std::generate(result.begin(), result.end(), [&] {
+      while (true) {
+        auto result = std::bit_cast(distribution(generator));
+        if (std::isfinite(result))
+          return result;
+      }
+    });
+    return result;
+  }
+};
+
+// *** Display Type ***
+enum class DisplayTypeE {
+  Default,
+  Hex,
+  Scientific,
+  Fixed,
+  General,
+};
+struct AllDisplayTypes : EnumValuesAsTuple {
+  static constexpr const char* Names[] = {"DisplayDefault", "DisplayHex", "DisplayScientific", "DisplayFixed",
+                                          "DisplayGeneral"};
+};
+
+template 
+struct DisplayType {};
+
+template <>
+struct DisplayType {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct DisplayType {
+  static constexpr const char* fmt = "a";
+};
+
+template <>
+struct DisplayType {
+  static constexpr const char* fmt = "e";
+};
+
+template <>
+struct DisplayType {
+  static constexpr const char* fmt = "f";
+};
+
+template <>
+struct DisplayType {
+  static constexpr const char* fmt = "g";
+};
+
+// *** Alignment ***
+enum class AlignmentE { None, Left, Center, Right, ZeroPadding };
+struct AllAlignments : EnumValuesAsTuple {
+  static constexpr const char* Names[] = {"AlignNone", "AlignmentLeft", "AlignmentCenter", "AlignmentRight",
+                                          "ZeroPadding"};
+};
+
+template 
+struct Alignment {};
+
+template <>
+struct Alignment {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct Alignment {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "0<17500";
+};
+
+template <>
+struct Alignment {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "0^17500";
+};
+
+template <>
+struct Alignment {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "0>17500";
+};
+
+template <>
+struct Alignment {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "017500";
+};
+
+enum class PrecisionE { None, Zero, Small, Huge };
+struct AllPrecisions : EnumValuesAsTuple {
+  static constexpr const char* Names[] = {"PrecNone", "PrecZero", "PrecSmall", "PrecHuge"};
+};
+
+template 
+struct Precision {};
+
+template <>
+struct Precision {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct Precision {
+  static constexpr const char* fmt = ".0";
+};
+
+template <>
+struct Precision {
+  static constexpr const char* fmt = ".10";
+};
+
+template <>
+struct Precision {
+  // The maximum precision for a minimal sub normal long double is ±0x1p-16494.
+  // This value is always larger than that value forcing the trailing zero path
+  // to be executed.
+  static constexpr const char* fmt = ".17000";
+};
+
+template 
+struct FloatingPoint {
+  using F = typename Type::type;
+
+  void run(benchmark::State& state) const {
+    std::array data{Value::template make_data()};
+    std::array output;
+    std::string fmt{std::string("{:") + Alignment::fmt + Precision::fmt +
+                    Localization::fmt + DisplayType::fmt + "}"};
+
+    while (state.KeepRunningBatch(1000))
+      for (F value : data)
+        benchmark::DoNotOptimize(std::format_to(output.begin(), fmt, value));
+  }
+
+  std::string name() const {
+    return "FloatingPoint" + L::name() + DT::name() + T::name() + V::name() + A::name() + P::name();
+  }
+};
+
+int main(int argc, char** argv) {
+  benchmark::Initialize(&argc, argv);
+  if (benchmark::ReportUnrecognizedArguments(argc, argv))
+    return 1;
+
+  makeCartesianProductBenchmark();
+
+  benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv
index f93e66fda1849..034fac3ca22f8 100644
--- a/libcxx/docs/Status/Cxx20Issues.csv
+++ b/libcxx/docs/Status/Cxx20Issues.csv
@@ -203,10 +203,10 @@
 "`3237 `__","LWG 3038 and 3190 have inconsistent PRs","Prague","|Complete|","14.0"
 "`3238 `__","Insufficiently-defined behavior of ``std::function``\  deduction guides","Prague","",""
 "`3242 `__","``std::format``\ : missing rules for ``arg-id``\  in ``width``\  and ``precision``\ ","Prague","|Complete|","Clang 14","|format|"
-"`3243 `__","``std::format``\  and negative zeroes","Prague","","","|format|"
+"`3243 `__","``std::format``\  and negative zeroes","Prague","|Complete|","14.0","|format|"
 "`3247 `__","``ranges::iter_move``\  should perform ADL-only lookup of ``iter_move``\ ","Prague","","","|ranges|"
 "`3248 `__","``std::format``\  ``#b``\ , ``#B``\ , ``#o``\ , ``#x``\ , and ``#X``\   presentation types misformat negative numbers","Prague","|Complete|","14.0","|format|"
-"`3250 `__","``std::format``\ : ``#``\  (alternate form) for NaN and inf","Prague","","","|format|"
+"`3250 `__","``std::format``\ : ``#``\  (alternate form) for NaN and inf","Prague","|Complete|","14.0","|format|"
 "`3251 `__","Are ``std::format``\  alignment specifiers applied to string arguments?","Prague","","","|format|"
 "`3252 `__","Parse locale's aware modifiers for commands are not consistent with POSIX spec","Prague","","","|chrono|"
 "`3254 `__","Strike ``stop_token``\ 's ``operator!=``\ ","Prague","",""
diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv
index d0927e09426ce..5a974a798f7bd 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -129,7 +129,7 @@
 "`P1644R0 `__","LWG","Add wait/notify to atomic","Cologne","",""
 "`P1650R0 `__","LWG","Output std::chrono::days with 'd' suffix","Cologne","",""
 "`P1651R0 `__","LWG","bind_front should not unwrap reference_wrapper","Cologne","|Complete|","13.0"
-"`P1652R1 `__","LWG","Printf corner cases in std::format","Cologne","|In Progress|",""
+"`P1652R1 `__","LWG","Printf corner cases in std::format","Cologne","|Complete|","14.0"
 "`P1661R1 `__","LWG","Remove dedicated precalculated hash lookup interface","Cologne","|Nothing To Do|",""
 "`P1754R1 `__","LWG","Rename concepts to standard_case for C++20, while we still can","Cologne","|In Progress|",""
 "","","","","",""
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index b7222540846fa..3886ddba8e4e5 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -178,6 +178,7 @@ set(files
   __format/formatter.h
   __format/formatter_bool.h
   __format/formatter_char.h
+  __format/formatter_floating_point.h
   __format/formatter_integer.h
   __format/formatter_integral.h
   __format/formatter_string.h
diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h
index 1adce75a86110..0c0c02ba19173 100644
--- a/libcxx/include/__format/formatter.h
+++ b/libcxx/include/__format/formatter.h
@@ -190,6 +190,34 @@ __write(output_iterator auto __out_it, const _CharT* __first,
   return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
 }
 
+/**
+ * @overload
+ *
+ * Writes additional zero's for the precision before the exponent.
+ * This is used when the precision requested in the format string is larger
+ * than the maximum precision of the floating-point type. These precision
+ * digits are always 0.
+ *
+ * @param __exponent           The location of the exponent character.
+ * @param __num_trailing_zeros The number of 0's to write before the exponent
+ *                             character.
+ */
+template 
+_LIBCPP_HIDE_FROM_ABI auto __write(output_iterator auto __out_it, const _CharT* __first,
+                                   const _CharT* __last, size_t __size, size_t __width, _Fill __fill,
+                                   __format_spec::_Flags::_Alignment __alignment, const _CharT* __exponent,
+                                   size_t __num_trailing_zeros) -> decltype(__out_it) {
+  _LIBCPP_ASSERT(__first <= __last, "Not a valid range");
+  _LIBCPP_ASSERT(__num_trailing_zeros > 0, "The overload not writing trailing zeros should have been used");
+
+  __padding_size_result __padding = __padding_size(__size + __num_trailing_zeros, __width, __alignment);
+  __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill);
+  __out_it = _VSTD::copy(__first, __exponent, _VSTD::move(__out_it));
+  __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0'));
+  __out_it = _VSTD::copy(__exponent, __last, _VSTD::move(__out_it));
+  return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
+}
+
 /**
  * @overload
  *
diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h
new file mode 100644
index 0000000000000..2e710b409deb6
--- /dev/null
+++ b/libcxx/include/__format/formatter_floating_point.h
@@ -0,0 +1,717 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H
+#define _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/copy_n.h>
+#include <__algorithm/fill_n.h>
+#include <__algorithm/find.h>
+#include <__algorithm/min.h>
+#include <__algorithm/rotate.h>
+#include <__algorithm/transform.h>
+#include <__concepts/arithmetic.h>
+#include <__config>
+#include <__debug>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/format_string.h>
+#include <__format/formatter.h>
+#include <__format/formatter_integral.h>
+#include <__format/parser_std_format_spec.h>
+#include <__utility/move.h>
+#include 
+#include 
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include 
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#  if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template 
+_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value) {
+  to_chars_result __r = _VSTD::to_chars(__first, __last, __value);
+  _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
+  return __r.ptr;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value, chars_format __fmt) {
+  to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __fmt);
+  _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
+  return __r.ptr;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value, chars_format __fmt, int __precision) {
+  to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __fmt, __precision);
+  _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
+  return __r.ptr;
+}
+
+// https://en.cppreference.com/w/cpp/language/types#cite_note-1
+// float             min subnormal: +/-0x1p-149   max: +/- 3.402,823,4 10^38
+// double            min subnormal: +/-0x1p-1074  max  +/- 1.797,693,134,862,315,7 10^308
+// long double (x86) min subnormal: +/-0x1p-16446 max: +/- 1.189,731,495,357,231,765,021 10^4932
+//
+// The maximum number of digits required for the integral part is based on the
+// maximum's value power of 10. Every power of 10 requires one additional
+// decimal digit.
+// The maximum number of digits required for the fractional part is based on
+// the minimal subnormal hexadecimal output's power of 10. Every division of a
+// fraction's binary 1 by 2, requires one additional decimal digit.
+//
+// The maximum size of a formatted value depends on the selected output format.
+// Ignoring the fact the format string can request a precision larger than the
+// values maximum required, these values are:
+//
+// sign                    1 code unit
+// __max_integral
+// radix point             1 code unit
+// __max_fractional
+// exponent character      1 code unit
+// sign                    1 code unit
+// __max_fractional_value
+// -----------------------------------
+// total                   4 code units extra required.
+//
+// TODO FMT Optimize the storage to avoid storing digits that are known to be zero.
+// https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/
+
+// TODO FMT Add long double specialization when to_chars has proper long double support.
+template 
+struct __traits;
+
+template 
+static constexpr size_t __float_buffer_size(int __precision) {
+  using _Traits = __traits<_Fp>;
+  return 4 + _Traits::__max_integral + __precision + _Traits::__max_fractional_value;
+}
+
+template <>
+struct __traits {
+  static constexpr int __max_integral = 38;
+  static constexpr int __max_fractional = 149;
+  static constexpr int __max_fractional_value = 3;
+  static constexpr size_t __stack_buffer_size = 256;
+
+  static constexpr int __hex_precision_digits = 3;
+};
+
+template <>
+struct __traits {
+  static constexpr int __max_integral = 308;
+  static constexpr int __max_fractional = 1074;
+  static constexpr int __max_fractional_value = 4;
+  static constexpr size_t __stack_buffer_size = 1024;
+
+  static constexpr int __hex_precision_digits = 4;
+};
+
+/// Helper class to store the conversion buffer.
+///
+/// Depending on the maxium size required for a value, the buffer is allocated
+/// on the stack or the heap.
+template 
+class _LIBCPP_TEMPLATE_VIS __float_buffer {
+  using _Traits = __traits<_Fp>;
+
+public:
+  // TODO FMT Improve this constructor to do a better estimate.
+  // When using a scientific formatting with a precision of 6 a stack buffer
+  // will always suffice. At the moment that isn't important since floats and
+  // doubles use a stack buffer, unless the precision used in the format string
+  // is large.
+  // When supporting long doubles the __max_integral part becomes 4932 which
+  // may be too much for some platforms. For these cases a better estimate is
+  // required.
+  explicit _LIBCPP_HIDE_FROM_ABI __float_buffer(int __precision)
+      : __precision_(__precision != -1 ? __precision : _Traits::__max_fractional) {
+
+    // When the precision is larger than _Traits::__max_fractional the digits in
+    // the range (_Traits::__max_fractional, precision] will contain the value
+    // zero. There's no need to request to_chars to write these zeros:
+    // - When the value is large a temporary heap buffer needs to be allocated.
+    // - When to_chars writes the values they need to be "copied" to the output:
+    //   - char: std::fill on the output iterator is faster than std::copy.
+    //   - wchar_t: same argument as char, but additional std::copy won't work.
+    //     The input is always a char buffer, so every char in the buffer needs
+    //     to be converted from a char to a wchar_t.
+    if (__precision_ > _Traits::__max_fractional) {
+      __num_trailing_zeros_ = __precision_ - _Traits::__max_fractional;
+      __precision_ = _Traits::__max_fractional;
+    }
+
+    __size_ = __format_spec::__float_buffer_size<_Fp>(__precision_);
+    if (__size_ > _Traits::__stack_buffer_size)
+      // The allocated buffer's contents don't need initialization.
+      __begin_ = allocator{}.allocate(__size_);
+    else
+      __begin_ = __buffer_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI ~__float_buffer() {
+    if (__size_ > _Traits::__stack_buffer_size)
+      allocator{}.deallocate(__begin_, __size_);
+  }
+  _LIBCPP_HIDE_FROM_ABI __float_buffer(const __float_buffer&) = delete;
+  _LIBCPP_HIDE_FROM_ABI __float_buffer& operator=(const __float_buffer&) = delete;
+
+  _LIBCPP_HIDE_FROM_ABI char* begin() const { return __begin_; }
+  _LIBCPP_HIDE_FROM_ABI char* end() const { return __begin_ + __size_; }
+
+  _LIBCPP_HIDE_FROM_ABI int __precision() const { return __precision_; }
+  _LIBCPP_HIDE_FROM_ABI int __num_trailing_zeros() const { return __num_trailing_zeros_; }
+  _LIBCPP_HIDE_FROM_ABI void __remove_trailing_zeros() { __num_trailing_zeros_ = 0; }
+
+private:
+  int __precision_;
+  int __num_trailing_zeros_{0};
+  size_t __size_;
+  char* __begin_;
+  char __buffer_[_Traits::__stack_buffer_size];
+};
+
+struct __float_result {
+  /// Points at the beginning of the integral part in the buffer.
+  ///
+  /// When there's no sign character this points at the start of the buffer.
+  char* __integral;
+
+  /// Points at the radix point, when not present it's the same as \ref __last.
+  char* __radix_point;
+
+  /// Points at the exponent character, when not present it's the same as \ref __last.
+  char* __exponent;
+
+  /// Points beyond the last written element in the buffer.
+  char* __last;
+};
+
+/// Finds the position of the exponent character 'e' at the end of the buffer.
+///
+/// Assuming there is an exponent the input will terminate with
+/// eSdd and eSdddd (S = sign, d = digit)
+///
+/// \returns a pointer to the exponent or __last when not found.
+constexpr inline _LIBCPP_HIDE_FROM_ABI char* __find_exponent(char* __first, char* __last) {
+  ptrdiff_t __size = __last - __first;
+  if (__size > 4) {
+    __first = __last - _VSTD::min(__size, ptrdiff_t(6));
+    for (; __first != __last - 3; ++__first) {
+      if (*__first == 'e')
+        return __first;
+    }
+  }
+  return __last;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_default(const __float_buffer<_Fp>& __buffer, _Tp __value,
+                                                             char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value);
+
+  __result.__exponent = __format_spec::__find_exponent(__result.__integral, __result.__last);
+
+  // Constrains:
+  // - There's at least one decimal digit before the radix point.
+  // - The radix point, when present, is placed before the exponent.
+  __result.__radix_point = _VSTD::find(__result.__integral + 1, __result.__exponent, '.');
+
+  // When the radix point isn't found its position is the exponent instead of
+  // __result.__last.
+  if (__result.__radix_point == __result.__exponent)
+    __result.__radix_point = __result.__last;
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent == __result.__last || *__result.__exponent == 'e'),
+                 "Post-condition failure.");
+  // clang-format on
+
+  return __result;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_hexadecimal_lower_case(const __float_buffer<_Fp>& __buffer,
+                                                                            _Tp __value, int __precision,
+                                                                            char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  if (__precision == -1)
+    __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex);
+  else
+    __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex, __precision);
+
+  // H = one or more hex-digits
+  // S = sign
+  // D = one or more decimal-digits
+  // When the fractional part is zero and no precision the output is 0p+0
+  // else the output is                                              0.HpSD
+  // So testing the second position can differentiate between these two cases.
+  char* __first = __integral + 1;
+  if (*__first == '.') {
+    __result.__radix_point = __first;
+    // One digit is the minimum
+    // 0.hpSd
+    //       ^-- last
+    //     ^---- integral = end of search
+    // ^-------- start of search
+    // 0123456
+    //
+    // Four digits is the maximum
+    // 0.hpSdddd
+    //          ^-- last
+    //        ^---- integral = end of search
+    //    ^-------- start of search
+    // 0123456789
+    static_assert(__traits<_Fp>::__hex_precision_digits <= 4, "Guard against possible underflow.");
+
+    char* __last = __result.__last - 2;
+    __first = __last - __traits<_Fp>::__hex_precision_digits;
+    __result.__exponent = _VSTD::find(__first, __last, 'p');
+  } else {
+    __result.__radix_point = __result.__last;
+    __result.__exponent = __first;
+  }
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent != __result.__last && *__result.__exponent == 'p'),
+                 "Post-condition failure.");
+  // clang-format on
+
+  return __result;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_hexadecimal_upper_case(const __float_buffer<_Fp>& __buffer,
+                                                                            _Tp __value, int __precision,
+                                                                            char* __integral) {
+  __float_result __result =
+      __format_spec::__format_buffer_hexadecimal_lower_case(__buffer, __value, __precision, __integral);
+  _VSTD::transform(__result.__integral, __result.__exponent, __result.__integral, __hex_to_upper);
+  *__result.__exponent = 'P';
+  return __result;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_lower_case(const __float_buffer<_Fp>& __buffer,
+                                                                           _Tp __value, int __precision,
+                                                                           char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last =
+      __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::scientific, __precision);
+
+  char* __first = __integral + 1;
+  _LIBCPP_ASSERT(__first != __result.__last, "No exponent present");
+  if (*__first == '.') {
+    __result.__radix_point = __first;
+    __result.__exponent = __format_spec::__find_exponent(__first + 1, __result.__last);
+  } else {
+    __result.__radix_point = __result.__last;
+    __result.__exponent = __first;
+  }
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent != __result.__last && *__result.__exponent == 'e'),
+                 "Post-condition failure.");
+  // clang-format on
+  return __result;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_upper_case(const __float_buffer<_Fp>& __buffer,
+                                                                           _Tp __value, int __precision,
+                                                                           char* __integral) {
+  __float_result __result =
+      __format_spec::__format_buffer_scientific_lower_case(__buffer, __value, __precision, __integral);
+  *__result.__exponent = 'E';
+  return __result;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_fixed(const __float_buffer<_Fp>& __buffer, _Tp __value,
+                                                           int __precision, char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::fixed, __precision);
+
+  // When there's no precision there's no radix point.
+  // Else the radix point is placed at __precision + 1 from the end.
+  // By converting __precision to a bool the subtraction can be done
+  // unconditionally.
+  __result.__radix_point = __result.__last - (__precision + bool(__precision));
+  __result.__exponent = __result.__last;
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent == __result.__last),
+                 "Post-condition failure.");
+  // clang-format on
+  return __result;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_general_lower_case(__float_buffer<_Fp>& __buffer, _Tp __value,
+                                                                        int __precision, char* __integral) {
+
+  __buffer.__remove_trailing_zeros();
+
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::general, __precision);
+
+  char* __first = __integral + 1;
+  if (__first == __result.__last) {
+    __result.__radix_point = __result.__last;
+    __result.__exponent = __result.__last;
+  } else {
+    __result.__exponent = __format_spec::__find_exponent(__first, __result.__last);
+    if (__result.__exponent != __result.__last)
+      // In scientific mode if there's a radix point it will always be after
+      // the first digit. (This is the position __first points at).
+      __result.__radix_point = *__first == '.' ? __first : __result.__last;
+    else {
+      // In fixed mode the algorithm truncates trailing spaces and possibly the
+      // radix point. There's no good guess for the position of the radix point
+      // therefore scan the output after the first digit.
+      __result.__radix_point = _VSTD::find(__first, __result.__last, '.');
+    }
+  }
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent == __result.__last || *__result.__exponent == 'e'),
+                 "Post-condition failure.");
+  // clang-format on
+
+  return __result;
+}
+
+template 
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_general_upper_case(__float_buffer<_Fp>& __buffer, _Tp __value,
+                                                                        int __precision, char* __integral) {
+  __float_result __result =
+      __format_spec::__format_buffer_general_lower_case(__buffer, __value, __precision, __integral);
+  if (__result.__exponent != __result.__last)
+    *__result.__exponent = 'E';
+  return __result;
+}
+
+#    ifndef _LIBCPP_HAS_NO_LOCALIZATION
+template 
+_LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form(_OutIt __out_it, const __float_buffer<_Fp>& __buffer,
+                                                           const __float_result& __result, _VSTD::locale __loc,
+                                                           size_t __width, _Flags::_Alignment __alignment,
+                                                           _CharT __fill) {
+  const auto& __np = use_facet>(__loc);
+  string __grouping = __np.grouping();
+  char* __first = __result.__integral;
+  // When no radix point or exponent are present __last will be __result.__last.
+  char* __last = _VSTD::min(__result.__radix_point, __result.__exponent);
+
+  ptrdiff_t __digits = __last - __first;
+  if (!__grouping.empty()) {
+    if (__digits <= __grouping[0])
+      __grouping.clear();
+    else
+      __grouping = __determine_grouping(__digits, __grouping);
+  }
+
+  size_t __size = __result.__last - __buffer.begin() + // Formatted string
+                  __buffer.__num_trailing_zeros() +    // Not yet rendered zeros
+                  __grouping.size() -                  // Grouping contains one
+                  !__grouping.empty();                 // additional character
+
+  __formatter::__padding_size_result __padding = {0, 0};
+  bool __zero_padding = __alignment == _Flags::_Alignment::__default;
+  if (__size < __width) {
+    if (__zero_padding) {
+      __alignment = _Flags::_Alignment::__right;
+      __fill = _CharT('0');
+    }
+
+    __padding = __formatter::__padding_size(__size, __width, __alignment);
+  }
+
+  // sign and (zero padding or alignment)
+  if (__zero_padding && __first != __buffer.begin())
+    *__out_it++ = *__buffer.begin();
+  __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill);
+  if (!__zero_padding && __first != __buffer.begin())
+    *__out_it++ = *__buffer.begin();
+
+  // integral part
+  if (__grouping.empty()) {
+    __out_it = _VSTD::copy_n(__first, __digits, _VSTD::move(__out_it));
+  } else {
+    auto __r = __grouping.rbegin();
+    auto __e = __grouping.rend() - 1;
+    _CharT __sep = __np.thousands_sep();
+    // The output is divided in small groups of numbers to write:
+    // - A group before the first separator.
+    // - A separator and a group, repeated for the number of separators.
+    // - A group after the last separator.
+    // This loop achieves that process by testing the termination condition
+    // midway in the loop.
+    while (true) {
+      __out_it = _VSTD::copy_n(__first, *__r, _VSTD::move(__out_it));
+      __first += *__r;
+
+      if (__r == __e)
+        break;
+
+      ++__r;
+      *__out_it++ = __sep;
+    }
+  }
+
+  // fractional part
+  if (__result.__radix_point != __result.__last) {
+    *__out_it++ = __np.decimal_point();
+    __out_it = _VSTD::copy(__result.__radix_point + 1, __result.__exponent, _VSTD::move(__out_it));
+    __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __buffer.__num_trailing_zeros(), _CharT('0'));
+  }
+
+  // exponent
+  if (__result.__exponent != __result.__last)
+    __out_it = _VSTD::copy(__result.__exponent, __result.__last, _VSTD::move(__out_it));
+
+  // alignment
+  return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
+}
+
+#    endif // _LIBCPP_HAS_NO_LOCALIZATION
+
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatter_floating_point : public __parser_floating_point<_CharT> {
+public:
+  template 
+  _LIBCPP_HIDE_FROM_ABI auto format(_Tp __value, auto& __ctx) -> decltype(__ctx.out()) {
+    if (this->__width_needs_substitution())
+      this->__substitute_width_arg_id(__ctx.arg(this->__width));
+
+    bool __negative = _VSTD::signbit(__value);
+
+    if (!_VSTD::isfinite(__value)) [[unlikely]]
+      return __format_non_finite(__ctx.out(), __negative, _VSTD::isnan(__value));
+
+    bool __has_precision = this->__has_precision_field();
+    if (this->__precision_needs_substitution())
+      this->__substitute_precision_arg_id(__ctx.arg(this->__precision));
+
+    // Depending on the std-format-spec string the sign and the value
+    // might not be outputted together:
+    // - zero-padding may insert additional '0' characters.
+    // Therefore the value is processed as a non negative value.
+    // The function @ref __insert_sign will insert a '-' when the value was
+    // negative.
+
+    if (__negative)
+      __value = _VSTD::copysign(__value, +1.0);
+
+    // TODO FMT _Fp should just be _Tp when to_chars has proper long double support.
+    using _Fp = conditional_t, double, _Tp>;
+    // Force the type of the precision to avoid -1 to become an unsigned value.
+    __float_buffer<_Fp> __buffer(__has_precision ? int(this->__precision) : -1);
+    __float_result __result = __format_buffer(__buffer, __value, __negative, __has_precision);
+
+    if (this->__alternate_form && __result.__radix_point == __result.__last) {
+      *__result.__last++ = '.';
+
+      // When there is an exponent the point needs to be moved before the
+      // exponent. When there's no exponent the rotate does nothing. Since
+      // rotate tests whether the operation is a nop, call it unconditionally.
+      _VSTD::rotate(__result.__exponent, __result.__last - 1, __result.__last);
+      __result.__radix_point = __result.__exponent;
+
+      // The radix point is always placed before the exponent.
+      // - No exponent needs to point to the new last.
+      // - An exponent needs to move one position to the right.
+      // So it's safe to increment the value unconditionally.
+      ++__result.__exponent;
+    }
+
+#    ifndef _LIBCPP_HAS_NO_LOCALIZATION
+    if (this->__locale_specific_form)
+      return __format_spec::__format_locale_specific_form(__ctx.out(), __buffer, __result, __ctx.locale(),
+                                                          this->__width, this->__alignment, this->__fill);
+#    endif
+
+    ptrdiff_t __size = __result.__last - __buffer.begin();
+    int __num_trailing_zeros = __buffer.__num_trailing_zeros();
+    if (__size + __num_trailing_zeros >= this->__width) {
+      if (__num_trailing_zeros && __result.__exponent != __result.__last)
+        // Insert trailing zeros before exponent character.
+        return _VSTD::copy(__result.__exponent, __result.__last,
+                           _VSTD::fill_n(_VSTD::copy(__buffer.begin(), __result.__exponent, __ctx.out()),
+                                         __num_trailing_zeros, _CharT('0')));
+
+      return _VSTD::fill_n(_VSTD::copy(__buffer.begin(), __result.__last, __ctx.out()), __num_trailing_zeros,
+                           _CharT('0'));
+    }
+
+    auto __out_it = __ctx.out();
+    char* __first = __buffer.begin();
+    if (this->__alignment == _Flags::_Alignment::__default) {
+      // When there is a sign output it before the padding. Note the __size
+      // doesn't need any adjustment, regardless whether the sign is written
+      // here or in __formatter::__write.
+      if (__first != __result.__integral)
+        *__out_it++ = *__first++;
+      // After the sign is written, zero padding is the same a right alignment
+      // with '0'.
+      this->__alignment = _Flags::_Alignment::__right;
+      this->__fill = _CharT('0');
+    }
+
+    if (__num_trailing_zeros)
+      return __formatter::__write(_VSTD::move(__out_it), __first, __result.__last, __size, this->__width, this->__fill,
+                                  this->__alignment, __result.__exponent, __num_trailing_zeros);
+
+    return __formatter::__write(_VSTD::move(__out_it), __first, __result.__last, __size, this->__width, this->__fill,
+                                this->__alignment);
+  }
+
+private:
+  template 
+  _LIBCPP_HIDE_FROM_ABI _OutIt __format_non_finite(_OutIt __out_it, bool __negative, bool __isnan) {
+    char __buffer[4];
+    char* __last = __insert_sign(__buffer, __negative, this->__sign);
+
+    // to_char can return inf, infinity, nan, and nan(n-char-sequence).
+    // The format library requires inf and nan.
+    // All in one expression to avoid dangling references.
+    __last = _VSTD::copy_n(&("infnanINFNAN"[6 * (this->__type == _Flags::_Type::__float_hexadecimal_upper_case ||
+                                                 this->__type == _Flags::_Type::__scientific_upper_case ||
+                                                 this->__type == _Flags::_Type::__fixed_upper_case ||
+                                                 this->__type == _Flags::_Type::__general_upper_case) +
+                                            3 * __isnan]),
+                           3, __last);
+
+    // [format.string.std]/13
+    // A zero (0) character preceding the width field pads the field with
+    // leading zeros (following any indication of sign or base) to the field
+    // width, except when applied to an infinity or NaN.
+    if (this->__alignment == _Flags::_Alignment::__default)
+      this->__alignment = _Flags::_Alignment::__right;
+
+    ptrdiff_t __size = __last - __buffer;
+    if (__size >= this->__width)
+      return _VSTD::copy_n(__buffer, __size, _VSTD::move(__out_it));
+
+    return __formatter::__write(_VSTD::move(__out_it), __buffer, __last, __size, this->__width, this->__fill,
+                                this->__alignment);
+  }
+
+  /// Fills the buffer with the data based on the requested formatting.
+  ///
+  /// This function, when needed, turns the characters to upper case and
+  /// determines the "interesting" locations which are returned to the caller.
+  ///
+  /// This means the caller never has to convert the contents of the buffer to
+  /// upper case or search for radix points and the location of the exponent.
+  /// This gives a bit of overhead. The original code didn't do that, but due
+  /// to the number of possible additional work needed to turn this number to
+  /// the proper output the code was littered with tests for upper cases and
+  /// searches for radix points and exponents.
+  /// - When a precision larger than the type's precision is selected
+  ///   additional zero characters need to be written before the exponent.
+  /// - alternate form needs to add a radix point when not present.
+  /// - localization needs to do grouping in the integral part.
+  template 
+  // TODO FMT _Fp should just be _Tp when to_chars has proper long double support.
+  _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer(__float_buffer<_Fp>& __buffer, _Tp __value, bool __negative,
+                                                       bool __has_precision) {
+    char* __first = __insert_sign(__buffer.begin(), __negative, this->__sign);
+    switch (this->__type) {
+    case _Flags::_Type::__default:
+      return __format_spec::__format_buffer_default(__buffer, __value, __first);
+
+    case _Flags::_Type::__float_hexadecimal_lower_case:
+      return __format_spec::__format_buffer_hexadecimal_lower_case(
+          __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first);
+
+    case _Flags::_Type::__float_hexadecimal_upper_case:
+      return __format_spec::__format_buffer_hexadecimal_upper_case(
+          __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first);
+
+    case _Flags::_Type::__scientific_lower_case:
+      return __format_spec::__format_buffer_scientific_lower_case(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__scientific_upper_case:
+      return __format_spec::__format_buffer_scientific_upper_case(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__fixed_lower_case:
+    case _Flags::_Type::__fixed_upper_case:
+      return __format_spec::__format_buffer_fixed(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__general_lower_case:
+      return __format_spec::__format_buffer_general_lower_case(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__general_upper_case:
+      return __format_spec::__format_buffer_general_upper_case(__buffer, __value, __buffer.__precision(), __first);
+
+    default:
+      _LIBCPP_ASSERT(false, "The parser should have validated the type");
+      _LIBCPP_UNREACHABLE();
+    }
+  }
+};
+
+} //namespace __format_spec
+
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
+    : public __format_spec::__formatter_floating_point<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
+    : public __format_spec::__formatter_floating_point<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
+    : public __format_spec::__formatter_floating_point<_CharT> {};
+
+#  endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H
diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
index 73e2fed8c0b7b..f164ee6109748 100644
--- a/libcxx/include/__format/formatter_integral.h
+++ b/libcxx/include/__format/formatter_integral.h
@@ -82,7 +82,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 namespace __format_spec {
 
 /** Wrapper around @ref to_chars, returning the output pointer. */
-template 
+template 
 _LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last,
                                         _Tp __value, int __base) {
   // TODO FMT Evaluate code overhead due to not calling the internal function
diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index 6eae27d9ad188..75fc626c84ac1 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -475,6 +475,21 @@ __parse_type(const _CharT* __begin, _Flags& __flags) {
   return ++__begin;
 }
 
+/**
+ * Process the parsed alignment and zero-padding state of arithmetic types.
+ *
+ * [format.string.std]/13
+ *   If the 0 character and an align option both appear, the 0 character is
+ *   ignored.
+ *
+ * For the formatter a @ref __default alignment means zero-padding.
+ */
+_LIBCPP_HIDE_FROM_ABI constexpr void __process_arithmetic_alignment(_Flags& __flags) {
+  __flags.__zero_padding &= __flags.__alignment == _Flags::_Alignment::__default;
+  if (!__flags.__zero_padding && __flags.__alignment == _Flags::_Alignment::__default)
+    __flags.__alignment = _Flags::_Alignment::__right;
+}
+
 /**
  * The parser for the std-format-spec.
  *
@@ -648,23 +663,9 @@ class _LIBCPP_TEMPLATE_VIS __parser_integral
     return __begin;
   }
 
-  /**
-   * Handles the post-parsing updates for the integer types.
-   *
-   * Updates the zero-padding and alignment for integer types.
-   *
-   * [format.string.std]/13
-   *   If the 0 character and an align option both appear, the 0 character is
-   *   ignored.
-   *
-   * For the formatter a @ref __default alignment means zero-padding. Update
-   * the alignment based on parsed format string.
-   */
+  /** Handles the post-parsing updates for the integer types. */
   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept {
-    this->__zero_padding &= this->__alignment == _Flags::_Alignment::__default;
-    if (!this->__zero_padding &&
-        this->__alignment == _Flags::_Alignment::__default)
-      this->__alignment = _Flags::_Alignment::__right;
+    __process_arithmetic_alignment(static_cast<_Flags&>(*this));
   }
 
   /**
@@ -701,7 +702,130 @@ class _LIBCPP_TEMPLATE_VIS __parser_integral
   }
 };
 
-// TODO FMT Add a parser for floating-point values.
+/**
+ * The parser for the std-format-spec.
+ *
+ * This implements the parser for the floating-point types.
+ *
+ * See @ref __parser_string.
+ */
+template 
+class _LIBCPP_TEMPLATE_VIS __parser_floating_point
+    : public __parser_width,              // provides __width(|as_arg)
+      public __parser_precision,          // provides __precision(|as_arg)
+      public __parser_fill_align<_CharT>, // provides __fill and uses __flags
+      public _Flags                       // provides __flags
+{
+public:
+  using char_type = _CharT;
+
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
+      -> decltype(__parse_ctx.begin()) {
+    auto __it = __parse(__parse_ctx);
+    __process_arithmetic_alignment(static_cast<_Flags&>(*this));
+    __process_display_type();
+    return __it;
+  }
+protected:
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
+      -> decltype(__parse_ctx.begin()) {
+    auto __begin = __parse_ctx.begin();
+    auto __end = __parse_ctx.end();
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
+                                                   static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_sign(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_precision::__parse(__begin, __end, __parse_ctx);
+    if (__begin == __end)
+      return __begin;
+
+    __begin =
+        __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
+
+    if (__begin != __end && *__begin != _CharT('}'))
+      __throw_format_error(
+          "The format-spec should consume the input or end with a '}'");
+
+    return __begin;
+  }
+
+  /** Processes the parsed std-format-spec based on the parsed display type. */
+  _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
+    switch (this->__type) {
+    case _Flags::_Type::__default:
+      // When no precision specified then it keeps default since that
+      // formatting differs from the other types.
+      if (this->__has_precision_field())
+        this->__type = _Flags::_Type::__general_lower_case;
+      break;
+    case _Flags::_Type::__float_hexadecimal_lower_case:
+    case _Flags::_Type::__float_hexadecimal_upper_case:
+      // Precision specific behavior will be handled later.
+      break;
+    case _Flags::_Type::__scientific_lower_case:
+    case _Flags::_Type::__scientific_upper_case:
+    case _Flags::_Type::__fixed_lower_case:
+    case _Flags::_Type::__fixed_upper_case:
+    case _Flags::_Type::__general_lower_case:
+    case _Flags::_Type::__general_upper_case:
+      if (!this->__has_precision_field()) {
+        // Set the default precision for the call to to_chars.
+        this->__precision = 6;
+        this->__precision_as_arg = false;
+      }
+      break;
+
+    default:
+      __throw_format_error("The format-spec type has a type not supported for "
+                           "a floating-point argument");
+    }
+  }
+};
+
 // TODO FMT Add a parser for pointer values.
 
 /** Helper struct returned from @ref __get_string_alignment. */
diff --git a/libcxx/include/format b/libcxx/include/format
index 788b9c299abc0..3a186469dd5c0 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -277,6 +277,7 @@ namespace std {
 #include <__format/formatter.h>
 #include <__format/formatter_bool.h>
 #include <__format/formatter_char.h>
+#include <__format/formatter_floating_point.h>
 #include <__format/formatter_integer.h>
 #include <__format/formatter_string.h>
 #include <__format/parser_std_format_spec.h>
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index a927f9d0e6700..c940a6d11d816 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -488,25 +488,26 @@ module std [system] {
     export *
 
     module __format {
-      module format_arg             { private header "__format/format_arg.h" }
-      module format_args            { private header "__format/format_args.h" }
+      module format_arg               { private header "__format/format_arg.h" }
+      module format_args              { private header "__format/format_args.h" }
       module format_context {
         private header "__format/format_context.h"
         export optional
         export locale
       }
-      module format_error           { private header "__format/format_error.h" }
-      module format_fwd             { private header "__format/format_fwd.h" }
-      module format_parse_context   { private header "__format/format_parse_context.h" }
-      module format_string          { private header "__format/format_string.h" }
-      module format_to_n_result     { private header "__format/format_to_n_result.h" }
-      module formatter              { private header "__format/formatter.h" }
-      module formatter_bool         { private header "__format/formatter_bool.h" }
-      module formatter_char         { private header "__format/formatter_char.h" }
-      module formatter_integer      { private header "__format/formatter_integer.h" }
-      module formatter_integral     { private header "__format/formatter_integral.h" }
-      module formatter_string       { private header "__format/formatter_string.h" }
-      module parser_std_format_spec { private header "__format/parser_std_format_spec.h" }
+      module format_error             { private header "__format/format_error.h" }
+      module format_fwd               { private header "__format/format_fwd.h" }
+      module format_parse_context     { private header "__format/format_parse_context.h" }
+      module format_string            { private header "__format/format_string.h" }
+      module format_to_n_result       { private header "__format/format_to_n_result.h" }
+      module formatter                { private header "__format/formatter.h" }
+      module formatter_bool           { private header "__format/formatter_bool.h" }
+      module formatter_char           { private header "__format/formatter_char.h" }
+      module formatter_floating_point { private header "__format/formatter_floating_point.h" }
+      module formatter_integer        { private header "__format/formatter_integer.h" }
+      module formatter_integral       { private header "__format/formatter_integral.h" }
+      module formatter_string         { private header "__format/formatter_string.h" }
+      module parser_std_format_spec   { private header "__format/parser_std_format_spec.h" }
     }
   }
   module forward_list {
diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp
new file mode 100644
index 0000000000000..35f19dcf76a73
--- /dev/null
+++ b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: modules-build
+
+// WARNING: This test was generated by 'generate_private_header_tests.py'
+// and should not be edited manually.
+
+// expected-error@*:* {{use of private header from outside its module: '__format/formatter_floating_point.h'}}
+#include <__format/formatter_floating_point.h>
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp
new file mode 100644
index 0000000000000..caf38a7e520a0
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp
@@ -0,0 +1,353 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// 
+
+// Tests the parsing of the format string as specified in [format.string.std].
+// It validates whether the std-format-spec is valid for a floating-point type.
+
+#include 
+#include 
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include 
+#endif
+
+#include "concepts_precision.h"
+#include "test_macros.h"
+#include "make_string.h"
+#include "test_exception.h"
+
+#define CSTR(S) MAKE_CSTRING(CharT, S)
+
+using namespace std::__format_spec;
+
+template 
+using Parser = __parser_floating_point;
+
+template 
+struct Expected {
+  CharT fill = CharT(' ');
+  _Flags::_Alignment alignment = _Flags::_Alignment::__right;
+  _Flags::_Sign sign = _Flags::_Sign::__default;
+  bool alternate_form = false;
+  bool zero_padding = false;
+  uint32_t width = 0;
+  bool width_as_arg = false;
+  uint32_t precision = std::__format::__number_max;
+  bool precision_as_arg = true;
+  bool locale_specific_form = false;
+  _Flags::_Type type = _Flags::_Type::__default;
+};
+
+template 
+constexpr void test(Expected expected, size_t size, std::basic_string_view fmt) {
+  // Initialize parser with sufficient arguments to avoid the parsing to fail
+  // due to insufficient arguments.
+  std::basic_format_parse_context parse_ctx(fmt, std::__format::__number_max);
+  auto begin = parse_ctx.begin();
+  auto end = parse_ctx.end();
+  Parser parser;
+  auto it = parser.parse(parse_ctx);
+
+  assert(begin == parse_ctx.begin());
+  assert(end == parse_ctx.end());
+
+  assert(begin + size == it);
+  assert(parser.__fill == expected.fill);
+  assert(parser.__alignment == expected.alignment);
+  assert(parser.__sign == expected.sign);
+  assert(parser.__alternate_form == expected.alternate_form);
+  assert(parser.__zero_padding == expected.zero_padding);
+  assert(parser.__width == expected.width);
+  assert(parser.__width_as_arg == expected.width_as_arg);
+  assert(parser.__precision == expected.precision);
+  assert(parser.__precision_as_arg == expected.precision_as_arg);
+  assert(parser.__locale_specific_form == expected.locale_specific_form);
+  assert(parser.__type == expected.type);
+}
+
+template 
+constexpr void test(Expected expected, size_t size, const CharT* f) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  std::basic_string_view fmt{f};
+  assert(fmt.back() == CharT('}') && "Pre-condition failure");
+
+  test(expected, size, fmt);
+  fmt.remove_suffix(1);
+  test(expected, size, fmt);
+}
+
+template 
+constexpr void test() {
+  Parser parser;
+
+  assert(parser.__fill == CharT(' '));
+  assert(parser.__alignment == _Flags::_Alignment::__default);
+  assert(parser.__sign == _Flags::_Sign::__default);
+  assert(parser.__alternate_form == false);
+  assert(parser.__zero_padding == false);
+  assert(parser.__width == 0);
+  assert(parser.__width_as_arg == false);
+  assert(parser.__precision == std::__format::__number_max);
+  assert(parser.__precision_as_arg == true);
+  assert(parser.__locale_specific_form == false);
+  assert(parser.__type == _Flags::_Type::__default);
+
+  // Depending on whether or not a precision is specified the results differ.
+  // Table 65: Meaning of type options for floating-point types [tab:format.type.float]
+
+  test({}, 0, CSTR("}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 2, CSTR(".0}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 4, CSTR(".{1}}"));
+
+  test({.type = _Flags::_Type::__float_hexadecimal_lower_case}, 1, CSTR("a}"));
+  test({.type = _Flags::_Type::__float_hexadecimal_upper_case}, 1, CSTR("A}"));
+
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_lower_case}, 1, CSTR("e}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__scientific_lower_case}, 3, CSTR(".0e}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__scientific_lower_case}, 5, CSTR(".{1}e}"));
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_upper_case}, 1, CSTR("E}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__scientific_upper_case}, 3, CSTR(".0E}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__scientific_upper_case}, 5, CSTR(".{1}E}"));
+
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_lower_case}, 1, CSTR("f}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__fixed_lower_case}, 3, CSTR(".0f}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__fixed_lower_case}, 5, CSTR(".{1}f}"));
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_upper_case}, 1, CSTR("F}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__fixed_upper_case}, 3, CSTR(".0F}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__fixed_upper_case}, 5, CSTR(".{1}F}"));
+
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 1, CSTR("g}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 3, CSTR(".0g}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 5, CSTR(".{1}g}"));
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_upper_case}, 1, CSTR("G}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_upper_case}, 3, CSTR(".0G}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_upper_case}, 5, CSTR(".{1}G}"));
+
+  // *** Align-fill ***
+  test({.alignment = _Flags::_Alignment::__left}, 1, CSTR("<}"));
+  test({.alignment = _Flags::_Alignment::__center}, 1, "^}");
+  test({.alignment = _Flags::_Alignment::__right}, 1, ">}");
+
+  test({.fill = CharT('L'), .alignment = _Flags::_Alignment::__left}, 2, CSTR("L<}"));
+  test({.fill = CharT('#'), .alignment = _Flags::_Alignment::__center}, 2, CSTR("#^}"));
+  test({.fill = CharT('0'), .alignment = _Flags::_Alignment::__right}, 2, CSTR("0>}"));
+
+  test_exception>("The format-spec fill field contains an invalid character", CSTR("{<"));
+  test_exception>("The format-spec fill field contains an invalid character", CSTR("}<"));
+
+  // *** Sign ***
+  test({.sign = _Flags::_Sign::__minus}, 1, CSTR("-}"));
+  test({.sign = _Flags::_Sign::__plus}, 1, CSTR("+}"));
+  test({.sign = _Flags::_Sign::__space}, 1, CSTR(" }"));
+
+  // *** Alternate form ***
+  test({.alternate_form = true}, 1, CSTR("#}"));
+
+  // *** Zero padding ***
+  // TODO FMT What to do with zero-padding without a width?
+  // [format.string.std]/13
+  //   A zero (0) character preceding the width field pads the field with
+  //   leading zeros (following any indication of sign or base) to the field
+  //   width, except when applied to an infinity or NaN.
+  // Obviously it makes no sense, but should it be allowed or is it a format
+  // error?
+  test({.alignment = _Flags::_Alignment::__default, .zero_padding = true}, 1, CSTR("0}"));
+  test({.alignment = _Flags::_Alignment::__left, .zero_padding = false}, 2, CSTR("<0}"));
+  test({.alignment = _Flags::_Alignment::__center, .zero_padding = false}, 2, CSTR("^0}"));
+  test({.alignment = _Flags::_Alignment::__right, .zero_padding = false}, 2, CSTR(">0}"));
+
+  // *** Width ***
+  test({.width = 0, .width_as_arg = false}, 0, CSTR("}"));
+  test({.width = 1, .width_as_arg = false}, 1, CSTR("1}"));
+  test({.width = 10, .width_as_arg = false}, 2, CSTR("10}"));
+  test({.width = 1000, .width_as_arg = false}, 4, CSTR("1000}"));
+  test({.width = 1000000, .width_as_arg = false}, 7, CSTR("1000000}"));
+
+  test({.width = 0, .width_as_arg = true}, 2, CSTR("{}}"));
+  test({.width = 0, .width_as_arg = true}, 3, CSTR("{0}}"));
+  test({.width = 1, .width_as_arg = true}, 3, CSTR("{1}}"));
+
+  test_exception>("A format-spec width field shouldn't have a leading zero", CSTR("00"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  test({.width = 2'147'483'647, .width_as_arg = false}, 10, CSTR("2147483647}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("2147483648"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("5000000000"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("10000000000"));
+
+  test_exception>("End of input while parsing format-spec arg-id", CSTR("{"));
+  test_exception>("Invalid arg-id", CSTR("{0"));
+  test_exception>("The arg-id of the format-spec starts with an invalid character", CSTR("{a"));
+  test_exception>("Invalid arg-id", CSTR("{1"));
+  test_exception>("Invalid arg-id", CSTR("{9"));
+  test_exception>("Invalid arg-id", CSTR("{9:"));
+  test_exception>("Invalid arg-id", CSTR("{9a"));
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  // Note the static_assert tests whether the arg-id is valid.
+  // Therefore the following should be true arg-id < __format::__number_max.
+  test({.width = 2'147'483'646, .width_as_arg = true}, 12, CSTR("{2147483646}}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("{2147483648}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("{5000000000}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("{10000000000}"));
+
+  // *** Precision ***
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 2, CSTR(".0}"));
+  test({.precision = 1, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 2, CSTR(".1}"));
+  test({.precision = 10, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 3, CSTR(".10}"));
+  test({.precision = 1000, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 5, CSTR(".1000}"));
+  test({.precision = 1000000, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 8,
+       CSTR(".1000000}"));
+
+  test({.precision = 0, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 3, CSTR(".{}}"));
+  test({.precision = 0, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 4, CSTR(".{0}}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 4, CSTR(".{1}}"));
+
+  test_exception>("The format-spec precision field doesn't contain a value or arg-id", CSTR(".a"));
+  test_exception>("The format-spec precision field doesn't contain a value or arg-id", CSTR(".:"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  test({.precision = 2'147'483'647, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 11,
+       CSTR(".2147483647}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR(".2147483648"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR(".5000000000"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR(".10000000000"));
+
+  test_exception>("End of input while parsing format-spec arg-id", CSTR(".{"));
+  test_exception>("Invalid arg-id", CSTR(".{0"));
+  test_exception>("The arg-id of the format-spec starts with an invalid character", CSTR(".{a"));
+  test_exception>("Invalid arg-id", CSTR(".{1"));
+  test_exception>("Invalid arg-id", CSTR(".{9"));
+  test_exception>("Invalid arg-id", CSTR(".{9:"));
+  test_exception>("Invalid arg-id", CSTR(".{9a"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  // Note the static_assert tests whether the arg-id is valid.
+  // Therefore the following should be true arg-id < __format::__number_max.
+  test({.precision = 2'147'483'646, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 13,
+       CSTR(".{2147483646}}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR(".{2147483648}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR(".{5000000000}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR(".{10000000000}"));
+
+  // *** Width & Precision ***
+  test({.width = 1,
+        .width_as_arg = false,
+        .precision = 0,
+        .precision_as_arg = false,
+        .type = _Flags::_Type::__general_lower_case},
+       3, CSTR("1.0}"));
+  test({.width = 0,
+        .width_as_arg = true,
+        .precision = 1,
+        .precision_as_arg = true,
+        .type = _Flags::_Type::__general_lower_case},
+       5, CSTR("{}.{}}"));
+  test({.width = 10,
+        .width_as_arg = true,
+        .precision = 9,
+        .precision_as_arg = true,
+        .type = _Flags::_Type::__general_lower_case},
+       8, CSTR("{10}.{9}}"));
+
+  // *** Locale-specific form ***
+  test({.locale_specific_form = true}, 1, CSTR("L}"));
+
+  // *** Type ***
+  {
+    const char* unsuported_type = "The format-spec type has a type not supported for a floating-point argument";
+    const char* not_a_type = "The format-spec should consume the input or end with a '}'";
+
+    test({.type = _Flags::_Type::__float_hexadecimal_upper_case}, 1, CSTR("A}"));
+    test_exception>(unsuported_type, CSTR("B}"));
+    test_exception>(not_a_type, CSTR("C}"));
+    test_exception>(not_a_type, CSTR("D}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_upper_case}, 1, CSTR("E}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_upper_case}, 1, CSTR("F}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_upper_case}, 1, CSTR("G}"));
+    test_exception>(not_a_type, CSTR("H}"));
+    test_exception>(not_a_type, CSTR("I}"));
+    test_exception>(not_a_type, CSTR("J}"));
+    test_exception>(not_a_type, CSTR("K}"));
+    test({.locale_specific_form = true}, 1, CSTR("L}"));
+    test_exception>(not_a_type, CSTR("M}"));
+    test_exception>(not_a_type, CSTR("N}"));
+    test_exception>(not_a_type, CSTR("O}"));
+    test_exception>(not_a_type, CSTR("P}"));
+    test_exception>(not_a_type, CSTR("Q}"));
+    test_exception>(not_a_type, CSTR("R}"));
+    test_exception>(not_a_type, CSTR("S}"));
+    test_exception>(not_a_type, CSTR("T}"));
+    test_exception>(not_a_type, CSTR("U}"));
+    test_exception>(not_a_type, CSTR("V}"));
+    test_exception>(not_a_type, CSTR("W}"));
+    test_exception>(unsuported_type, CSTR("X}"));
+    test_exception>(not_a_type, CSTR("Y}"));
+    test_exception>(not_a_type, CSTR("Z}"));
+
+    test({.type = _Flags::_Type::__float_hexadecimal_lower_case}, 1, CSTR("a}"));
+    test_exception>(unsuported_type, CSTR("b}"));
+    test_exception>(unsuported_type, CSTR("c}"));
+    test_exception>(unsuported_type, CSTR("d}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_lower_case}, 1, CSTR("e}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_lower_case}, 1, CSTR("f}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 1, CSTR("g}"));
+    test_exception>(not_a_type, CSTR("h}"));
+    test_exception>(not_a_type, CSTR("i}"));
+    test_exception>(not_a_type, CSTR("j}"));
+    test_exception>(not_a_type, CSTR("k}"));
+    test_exception>(not_a_type, CSTR("l}"));
+    test_exception>(not_a_type, CSTR("m}"));
+    test_exception>(not_a_type, CSTR("n}"));
+    test_exception>(unsuported_type, CSTR("o}"));
+    test_exception>(unsuported_type, CSTR("p}"));
+    test_exception>(not_a_type, CSTR("q}"));
+    test_exception>(not_a_type, CSTR("r}"));
+    test_exception>(unsuported_type, CSTR("s}"));
+    test_exception>(not_a_type, CSTR("t}"));
+    test_exception>(not_a_type, CSTR("u}"));
+    test_exception>(not_a_type, CSTR("v}"));
+    test_exception>(not_a_type, CSTR("w}"));
+    test_exception>(unsuported_type, CSTR("x}"));
+    test_exception>(not_a_type, CSTR("y}"));
+    test_exception>(not_a_type, CSTR("z}"));
+  }
+  // **** General ***
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR("ss"));
+}
+
+constexpr bool test() {
+  test();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test();
+#endif
+
+  return true;
+}
+
+int main(int, char**) {
+#if !defined(_WIN32) && !defined(_AIX)
+  // Make sure the parsers match the expectations. The layout of the
+  // subobjects is chosen to minimize the size required.
+  static_assert(sizeof(Parser) == 3 * sizeof(uint32_t));
+#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  static_assert(sizeof(Parser) == (sizeof(wchar_t) <= 2 ? 3 * sizeof(uint32_t) : 4 * sizeof(uint32_t)));
+#  endif
+#endif
+
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp
index 1ba1189b512c5..0e8e95d2762f6 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp
@@ -8,7 +8,6 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
-// UNSUPPORTED: LIBCXX-DEBUG-FIXME
 
 // 
 
@@ -25,11 +24,14 @@
 // - double
 // - long double
 
-// TODO FMT Enable after floating-point support has been enabled
-#if 0
 #include 
+
+#include 
 #include 
+#include 
+#include 
 #include 
+#include 
 #include 
 
 #include "test_macros.h"
@@ -37,9 +39,8 @@
 
 #define STR(S) MAKE_STRING(CharT, S)
 
-template 
-void test(StringViewT fmt, ArithmeticT arg) {
-  using CharT = typename StringViewT::value_type;
+template 
+void test(std::basic_string_view fmt, ArithmeticT arg, std::basic_string expected) {
   auto parse_ctx = std::basic_format_parse_context(fmt);
   std::formatter formatter;
   static_assert(std::semiregular);
@@ -51,15 +52,19 @@ void test(StringViewT fmt, ArithmeticT arg) {
   auto out = std::back_inserter(result);
   using FormatCtxT = std::basic_format_context;
 
-  auto format_ctx = std::__format_context_create(
-      out, std::make_format_args(arg));
+  auto format_ctx = std::__format_context_create(out, std::make_format_args(arg));
   formatter.format(arg, format_ctx);
-  std::string expected = std::to_string(arg);
-  assert(result == std::basic_string(expected.begin(), expected.end()));
+
+  if (expected.empty()) {
+    std::array buffer;
+    expected.append(buffer.begin(), std::to_chars(buffer.begin(), buffer.end(), arg).ptr);
+  }
+
+  assert(result == expected);
 }
 
 template 
-void test_termination_condition(StringT f, ArithmeticT arg) {
+void test_termination_condition(StringT f, ArithmeticT arg, StringT expected = {}) {
   // The format-spec is valid if completely consumed or terminates at a '}'.
   // The valid inputs all end with a '}'. The test is executed twice:
   // - first with the terminating '}',
@@ -68,40 +73,398 @@ void test_termination_condition(StringT f, ArithmeticT arg) {
   std::basic_string_view fmt{f};
   assert(fmt.back() == CharT('}') && "Pre-condition failure");
 
-  test(fmt, arg);
+  test(fmt, arg, expected);
   fmt.remove_suffix(1);
-  test(fmt, arg);
+  test(fmt, arg, expected);
+}
+
+template 
+void test_hex_lower_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  test_termination_condition(STR(".20000a}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000a}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000a}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000a}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000a}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  test_termination_condition(STR(".20000La}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000La}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000La}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000La}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000La}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_hex_upper_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000A}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000A}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000A}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000A}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000A}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LA}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LA}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LA}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LA}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LA}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_scientific_lower_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  test_termination_condition(STR(".20000e}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000e}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000e}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000e}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000e}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  test_termination_condition(STR(".20000Le}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000Le}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000Le}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000Le}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000Le}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_scientific_upper_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000E}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000E}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000E}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000E}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000E}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LE}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LE}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LE}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LE}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LE}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_fixed_lower_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  test_termination_condition(STR(".20000f}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000f}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000f}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000f}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000f}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  test_termination_condition(STR(".20000Lf}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000Lf}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000Lf}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000Lf}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000Lf}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_fixed_upper_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000F}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000F}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000F}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000F}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000F}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LF}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LF}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LF}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LF}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LF}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_general_lower_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  test_termination_condition(STR(".20000g}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000g}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000g}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000g}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000g}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  test_termination_condition(STR(".20000Lg}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000Lg}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000Lg}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000Lg}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000Lg}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_general_upper_case_precision(ArithmeticT value) {
+  std::array buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000G}"), value, std::basic_string{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000G}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000G}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000G}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000G}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LG}"), value, std::basic_string{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LG}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LG}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LG}"), value, std::basic_string{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LG}"), value, std::basic_string{buffer.begin(), buffer.end()});
+#endif
+}
+
+template 
+void test_value(ArithmeticT value) {
+  test_hex_lower_case_precision(value);
+  test_hex_upper_case_precision(value);
+
+  test_scientific_lower_case_precision(value);
+  test_scientific_upper_case_precision(value);
+
+  test_fixed_lower_case_precision(value);
+  test_fixed_upper_case_precision(value);
+
+  test_general_lower_case_precision(value);
+  test_general_upper_case_precision(value);
+}
+
+template 
+void test_special_values() {
+  using A = ArithmeticT;
+
+  test_value(-std::numeric_limits::max());
+  test_value(A(-1.0));
+  test_value(-std::numeric_limits::min());
+  test_value(-std::numeric_limits::denorm_min());
+  test_value(A(-0.0));
+
+  test_value(A(0.0));
+  test_value(std::numeric_limits::denorm_min());
+  test_value(A(1.0));
+  test_value(std::numeric_limits::min());
+  test_value(std::numeric_limits::max());
 }
 
 template 
 void test_float_type() {
   using A = ArithmeticT;
+
   test_termination_condition(STR("}"), A(-std::numeric_limits::max()));
   test_termination_condition(STR("}"), A(-std::numeric_limits::min()));
   test_termination_condition(STR("}"), A(-0.0));
+
   test_termination_condition(STR("}"), A(0.0));
   test_termination_condition(STR("}"), A(std::numeric_limits::min()));
   test_termination_condition(STR("}"), A(std::numeric_limits::max()));
   if (sizeof(A) > sizeof(float)) {
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits::max()));
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits::min()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits::max()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits::min()));
     test_termination_condition(STR("}"), A(std::numeric_limits::min()));
     test_termination_condition(STR("}"), A(std::numeric_limits::max()));
   }
   if (sizeof(A) > sizeof(double)) {
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits::max()));
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits::min()));
-    test_termination_condition(STR("}"),
-                               A(std::numeric_limits::min()));
-    test_termination_condition(STR("}"),
-                               A(std::numeric_limits::max()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits::max()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits::min()));
+    test_termination_condition(STR("}"), A(std::numeric_limits::min()));
+    test_termination_condition(STR("}"), A(std::numeric_limits::max()));
   }
 
-  // TODO FMT Also test with special floating point values: +/-Inf NaN.
+  // The results of inf and nan may differ from the result of to_chars.
+  test_termination_condition(STR("}"), A(-std::numeric_limits::infinity()), STR("-inf"));
+  test_termination_condition(STR("}"), A(std::numeric_limits::infinity()), STR("inf"));
+
+  A nan = std::numeric_limits::quiet_NaN();
+  test_termination_condition(STR("}"), std::copysign(nan, -1.0), STR("-nan"));
+  test_termination_condition(STR("}"), nan, STR("nan"));
+
+  // TODO FMT Enable long double testing
+  if constexpr (!std::same_as)
+    test_special_values();
 }
 
 template 
@@ -119,6 +482,3 @@ int main(int, char**) {
 
   return 0;
 }
-#else
-int main(int, char**) { return 0; }
-#endif
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 470da03b8b083..c2eeb236f99d0 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -13,6 +13,8 @@
 #include "make_string.h"
 #include "test_macros.h"
 
+#include 
+
 // In this file the following template types are used:
 // TestFunction must be callable as check(expected-result, string-to-format, args-to-format...)
 // ExceptionTest must be callable as check_exception(expected-exception, string-to-format, args-to-format...)
@@ -992,6 +994,1496 @@ void format_test_char_as_integer(TestFunction check,
         fmt, '*');
 }
 
+template 
+void format_test_floating_point_hex_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // Test whether the hexadecimal letters are the proper case.
+  // The precision is too large for float, so two tests are used.
+  check(STR("answer is '1.abcp+0'"), STR("answer is '{:a}'"), F(0x1.abcp+0));
+  check(STR("answer is '1.defp+0'"), STR("answer is '{:a}'"), F(0x1.defp+0));
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1p-2'"), STR("answer is '{:7a}'"), F(0.25));
+  check(STR("answer is '   1p-2'"), STR("answer is '{:>7a}'"), F(0.25));
+  check(STR("answer is '1p-2   '"), STR("answer is '{:<7a}'"), F(0.25));
+  check(STR("answer is ' 1p-2  '"), STR("answer is '{:^7a}'"), F(0.25));
+
+  check(STR("answer is '---1p-3'"), STR("answer is '{:->7a}'"), F(125e-3));
+  check(STR("answer is '1p-3---'"), STR("answer is '{:-<7a}'"), F(125e-3));
+  check(STR("answer is '-1p-3--'"), STR("answer is '{:-^7a}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6a}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6a}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6a}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6a}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7a}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7a}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7a}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1p-2'"), STR("answer is '{:>07a}'"), F(0.25));
+  check(STR("answer is '1p-2   '"), STR("answer is '{:<07a}'"), F(0.25));
+  check(STR("answer is ' 1p-2  '"), STR("answer is '{:^07a}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0p+0'"), STR("answer is '{:a}'"), F(0));
+  check(STR("answer is '0p+0'"), STR("answer is '{:-a}'"), F(0));
+  check(STR("answer is '+0p+0'"), STR("answer is '{:+a}'"), F(0));
+  check(STR("answer is ' 0p+0'"), STR("answer is '{: a}'"), F(0));
+
+  check(STR("answer is '-0p+0'"), STR("answer is '{:a}'"), F(-0.));
+  check(STR("answer is '-0p+0'"), STR("answer is '{:-a}'"), F(-0.));
+  check(STR("answer is '-0p+0'"), STR("answer is '{:+a}'"), F(-0.));
+  check(STR("answer is '-0p+0'"), STR("answer is '{: a}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:a}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+a}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: a}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:a}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-a}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+a}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: a}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: a}'"), nan_neg);
+
+  // *** alternate form ***
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0p+0'"), STR("answer is '{:a}'"), F(0));
+  check(STR("answer is '0.p+0'"), STR("answer is '{:#a}'"), F(0));
+
+  check(STR("answer is '1p+1'"), STR("answer is '{:.0a}'"), F(2.5));
+  check(STR("answer is '1.p+1'"), STR("answer is '{:#.0a}'"), F(2.5));
+  check(STR("answer is '1.4p+1'"), STR("answer is '{:#a}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#a}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#a}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1p-5'"), STR("answer is '{:04a}'"), 0.03125);
+  check(STR("answer is '+1p-5'"), STR("answer is '{:+05a}'"), 0.03125);
+  check(STR("answer is '+01p-5'"), STR("answer is '{:+06a}'"), 0.03125);
+
+  check(STR("answer is '0001p-5'"), STR("answer is '{:07a}'"), 0.03125);
+  check(STR("answer is '0001p-5'"), STR("answer is '{:-07a}'"), 0.03125);
+  check(STR("answer is '+001p-5'"), STR("answer is '{:+07a}'"), 0.03125);
+  check(STR("answer is ' 001p-5'"), STR("answer is '{: 07a}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010a}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010a}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010a}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010a}'"), nan_neg);
+
+  // *** precision ***
+  // See format_test_floating_point_hex_lower_case_precision
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_hex_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // Test whether the hexadecimal letters are the proper case.
+  // The precision is too large for float, so two tests are used.
+  check(STR("answer is '1.ABCP+0'"), STR("answer is '{:A}'"), F(0x1.abcp+0));
+  check(STR("answer is '1.DEFP+0'"), STR("answer is '{:A}'"), F(0x1.defp+0));
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1P-2'"), STR("answer is '{:7A}'"), F(0.25));
+  check(STR("answer is '   1P-2'"), STR("answer is '{:>7A}'"), F(0.25));
+  check(STR("answer is '1P-2   '"), STR("answer is '{:<7A}'"), F(0.25));
+  check(STR("answer is ' 1P-2  '"), STR("answer is '{:^7A}'"), F(0.25));
+
+  check(STR("answer is '---1P-3'"), STR("answer is '{:->7A}'"), F(125e-3));
+  check(STR("answer is '1P-3---'"), STR("answer is '{:-<7A}'"), F(125e-3));
+  check(STR("answer is '-1P-3--'"), STR("answer is '{:-^7A}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6A}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6A}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6A}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6A}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7A}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7A}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7A}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1P-2'"), STR("answer is '{:>07A}'"), F(0.25));
+  check(STR("answer is '1P-2   '"), STR("answer is '{:<07A}'"), F(0.25));
+  check(STR("answer is ' 1P-2  '"), STR("answer is '{:^07A}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0P+0'"), STR("answer is '{:A}'"), F(0));
+  check(STR("answer is '0P+0'"), STR("answer is '{:-A}'"), F(0));
+  check(STR("answer is '+0P+0'"), STR("answer is '{:+A}'"), F(0));
+  check(STR("answer is ' 0P+0'"), STR("answer is '{: A}'"), F(0));
+
+  check(STR("answer is '-0P+0'"), STR("answer is '{:A}'"), F(-0.));
+  check(STR("answer is '-0P+0'"), STR("answer is '{:-A}'"), F(-0.));
+  check(STR("answer is '-0P+0'"), STR("answer is '{:+A}'"), F(-0.));
+  check(STR("answer is '-0P+0'"), STR("answer is '{: A}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:A}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+A}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: A}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:A}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-A}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+A}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: A}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: A}'"), nan_neg);
+
+  // *** alternate form ***
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0P+0'"), STR("answer is '{:A}'"), F(0));
+  check(STR("answer is '0.P+0'"), STR("answer is '{:#A}'"), F(0));
+
+  check(STR("answer is '1P+1'"), STR("answer is '{:.0A}'"), F(2.5));
+  check(STR("answer is '1.P+1'"), STR("answer is '{:#.0A}'"), F(2.5));
+  check(STR("answer is '1.4P+1'"), STR("answer is '{:#A}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#A}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#A}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1P-5'"), STR("answer is '{:04A}'"), 0.03125);
+  check(STR("answer is '+1P-5'"), STR("answer is '{:+05A}'"), 0.03125);
+  check(STR("answer is '+01P-5'"), STR("answer is '{:+06A}'"), 0.03125);
+
+  check(STR("answer is '0001P-5'"), STR("answer is '{:07A}'"), 0.03125);
+  check(STR("answer is '0001P-5'"), STR("answer is '{:-07A}'"), 0.03125);
+  check(STR("answer is '+001P-5'"), STR("answer is '{:+07A}'"), 0.03125);
+  check(STR("answer is ' 001P-5'"), STR("answer is '{: 07A}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010A}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010A}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010A}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010A}'"), nan_neg);
+
+  // *** precision ***
+  // See format_test_floating_point_hex_upper_case_precision
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_hex_lower_case_precision(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1.000000p-2'"), STR("answer is '{:14.6a}'"), F(0.25));
+  check(STR("answer is '   1.000000p-2'"), STR("answer is '{:>14.6a}'"), F(0.25));
+  check(STR("answer is '1.000000p-2   '"), STR("answer is '{:<14.6a}'"), F(0.25));
+  check(STR("answer is ' 1.000000p-2  '"), STR("answer is '{:^14.6a}'"), F(0.25));
+
+  check(STR("answer is '---1.000000p-3'"), STR("answer is '{:->14.6a}'"), F(125e-3));
+  check(STR("answer is '1.000000p-3---'"), STR("answer is '{:-<14.6a}'"), F(125e-3));
+  check(STR("answer is '-1.000000p-3--'"), STR("answer is '{:-^14.6a}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6.6a}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7.6a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6.6a}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6.6a}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6.6a}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7.6a}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7.6a}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7.6a}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1.000000p-2'"), STR("answer is '{:>014.6a}'"), F(0.25));
+  check(STR("answer is '1.000000p-2   '"), STR("answer is '{:<014.6a}'"), F(0.25));
+  check(STR("answer is ' 1.000000p-2  '"), STR("answer is '{:^014.6a}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000p+0'"), STR("answer is '{:.6a}'"), F(0));
+  check(STR("answer is '0.000000p+0'"), STR("answer is '{:-.6a}'"), F(0));
+  check(STR("answer is '+0.000000p+0'"), STR("answer is '{:+.6a}'"), F(0));
+  check(STR("answer is ' 0.000000p+0'"), STR("answer is '{: .6a}'"), F(0));
+
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{:.6a}'"), F(-0.));
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{:-.6a}'"), F(-0.));
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{:+.6a}'"), F(-0.));
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{: .6a}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: .6a}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: .6a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:.6a}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-.6a}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+.6a}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: .6a}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:.6a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-.6a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+.6a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: .6a}'"), nan_neg);
+
+  // *** alternate form ***
+  check(STR("answer is '1.400000p+1'"), STR("answer is '{:#.6a}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#.6a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#.6a}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#.6a}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1.000000p-5'"), STR("answer is '{:011.6a}'"), 0.03125);
+  check(STR("answer is '+1.000000p-5'"), STR("answer is '{:+012.6a}'"), 0.03125);
+  check(STR("answer is '+01.000000p-5'"), STR("answer is '{:+013.6a}'"), 0.03125);
+
+  check(STR("answer is '0001.000000p-5'"), STR("answer is '{:014.6a}'"), 0.03125);
+  check(STR("answer is '0001.000000p-5'"), STR("answer is '{:-014.6a}'"), 0.03125);
+  check(STR("answer is '+001.000000p-5'"), STR("answer is '{:+014.6a}'"), 0.03125);
+  check(STR("answer is ' 001.000000p-5'"), STR("answer is '{: 014.6a}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010.6a}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010.6a}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010.6a}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010.6a}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010.6a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010.6a}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010.6a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010.6a}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010.6a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010.6a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010.6a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010.6a}'"), nan_neg);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_hex_upper_case_precision(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1.000000P-2'"), STR("answer is '{:14.6A}'"), F(0.25));
+  check(STR("answer is '   1.000000P-2'"), STR("answer is '{:>14.6A}'"), F(0.25));
+  check(STR("answer is '1.000000P-2   '"), STR("answer is '{:<14.6A}'"), F(0.25));
+  check(STR("answer is ' 1.000000P-2  '"), STR("answer is '{:^14.6A}'"), F(0.25));
+
+  check(STR("answer is '---1.000000P-3'"), STR("answer is '{:->14.6A}'"), F(125e-3));
+  check(STR("answer is '1.000000P-3---'"), STR("answer is '{:-<14.6A}'"), F(125e-3));
+  check(STR("answer is '-1.000000P-3--'"), STR("answer is '{:-^14.6A}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6.6A}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7.6A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6.6A}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6.6A}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6.6A}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7.6A}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7.6A}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7.6A}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1.000000P-2'"), STR("answer is '{:>014.6A}'"), F(0.25));
+  check(STR("answer is '1.000000P-2   '"), STR("answer is '{:<014.6A}'"), F(0.25));
+  check(STR("answer is ' 1.000000P-2  '"), STR("answer is '{:^014.6A}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000P+0'"), STR("answer is '{:.6A}'"), F(0));
+  check(STR("answer is '0.000000P+0'"), STR("answer is '{:-.6A}'"), F(0));
+  check(STR("answer is '+0.000000P+0'"), STR("answer is '{:+.6A}'"), F(0));
+  check(STR("answer is ' 0.000000P+0'"), STR("answer is '{: .6A}'"), F(0));
+
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{:.6A}'"), F(-0.));
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{:-.6A}'"), F(-0.));
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{:+.6A}'"), F(-0.));
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{: .6A}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: .6A}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: .6A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:.6A}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-.6A}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+.6A}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: .6A}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:.6A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-.6A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+.6A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: .6A}'"), nan_neg);
+
+  // *** alternate form ***
+  check(STR("answer is '1.400000P+1'"), STR("answer is '{:#.6A}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#.6A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#.6A}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#.6A}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1.000000P-5'"), STR("answer is '{:011.6A}'"), 0.03125);
+  check(STR("answer is '+1.000000P-5'"), STR("answer is '{:+012.6A}'"), 0.03125);
+  check(STR("answer is '+01.000000P-5'"), STR("answer is '{:+013.6A}'"), 0.03125);
+
+  check(STR("answer is '0001.000000P-5'"), STR("answer is '{:014.6A}'"), 0.03125);
+  check(STR("answer is '0001.000000P-5'"), STR("answer is '{:-014.6A}'"), 0.03125);
+  check(STR("answer is '+001.000000P-5'"), STR("answer is '{:+014.6A}'"), 0.03125);
+  check(STR("answer is ' 001.000000P-5'"), STR("answer is '{: 014.6A}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010.6A}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010.6A}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010.6A}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010.6A}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010.6A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010.6A}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010.6A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010.6A}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010.6A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010.6A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010.6A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010.6A}'"), nan_neg);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_scientific_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   2.500000e-01'"), STR("answer is '{:15e}'"), F(0.25));
+  check(STR("answer is '   2.500000e-01'"), STR("answer is '{:>15e}'"), F(0.25));
+  check(STR("answer is '2.500000e-01   '"), STR("answer is '{:<15e}'"), F(0.25));
+  check(STR("answer is ' 2.500000e-01  '"), STR("answer is '{:^15e}'"), F(0.25));
+
+  check(STR("answer is '---1.250000e-01'"), STR("answer is '{:->15e}'"), F(125e-3));
+  check(STR("answer is '1.250000e-01---'"), STR("answer is '{:-<15e}'"), F(125e-3));
+  check(STR("answer is '-1.250000e-01--'"), STR("answer is '{:-^15e}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6e}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6e}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6e}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7e}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6e}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6e}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6e}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7e}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7e}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7e}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   2.500000e-01'"), STR("answer is '{:>015e}'"), F(0.25));
+  check(STR("answer is '2.500000e-01   '"), STR("answer is '{:<015e}'"), F(0.25));
+  check(STR("answer is ' 2.500000e-01  '"), STR("answer is '{:^015e}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000e+00'"), STR("answer is '{:e}'"), F(0));
+  check(STR("answer is '0.000000e+00'"), STR("answer is '{:-e}'"), F(0));
+  check(STR("answer is '+0.000000e+00'"), STR("answer is '{:+e}'"), F(0));
+  check(STR("answer is ' 0.000000e+00'"), STR("answer is '{: e}'"), F(0));
+
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{:e}'"), F(-0.));
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{:-e}'"), F(-0.));
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{:+e}'"), F(-0.));
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{: e}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:e}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-e}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+e}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: e}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: e}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:e}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-e}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+e}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: e}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:e}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-e}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+e}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: e}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0e+00'"), STR("answer is '{:.0e}'"), F(0));
+  check(STR("answer is '0.e+00'"), STR("answer is '{:#.0e}'"), F(0));
+
+  check(STR("answer is '0.000000e+00'"), STR("answer is '{:#e}'"), F(0));
+  check(STR("answer is '2.500000e+00'"), STR("answer is '{:#e}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#e}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#e}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#e}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#e}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '3.125000e-02'"), STR("answer is '{:07e}'"), 0.03125);
+  check(STR("answer is '+3.125000e-02'"), STR("answer is '{:+07e}'"), 0.03125);
+  check(STR("answer is '+3.125000e-02'"), STR("answer is '{:+08e}'"), 0.03125);
+  check(STR("answer is '+3.125000e-02'"), STR("answer is '{:+09e}'"), 0.03125);
+
+  check(STR("answer is '003.125000e-02'"), STR("answer is '{:014e}'"), 0.03125);
+  check(STR("answer is '003.125000e-02'"), STR("answer is '{:-014e}'"), 0.03125);
+  check(STR("answer is '+03.125000e-02'"), STR("answer is '{:+014e}'"), 0.03125);
+  check(STR("answer is ' 03.125000e-02'"), STR("answer is '{: 014e}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010e}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010e}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010e}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010e}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010e}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010e}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010e}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010e}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010e}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010e}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010e}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010e}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010e}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010e}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '3e-02'"), STR("answer is '{:.0e}'"), 0.03125);
+  check(STR("answer is '3.1e-02'"), STR("answer is '{:.1e}'"), 0.03125);
+  check(STR("answer is '3.125e-02'"), STR("answer is '{:.3e}'"), 0.03125);
+  check(STR("answer is '3.1250000000e-02'"), STR("answer is '{:.10e}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_scientific_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   2.500000E-01'"), STR("answer is '{:15E}'"), F(0.25));
+  check(STR("answer is '   2.500000E-01'"), STR("answer is '{:>15E}'"), F(0.25));
+  check(STR("answer is '2.500000E-01   '"), STR("answer is '{:<15E}'"), F(0.25));
+  check(STR("answer is ' 2.500000E-01  '"), STR("answer is '{:^15E}'"), F(0.25));
+
+  check(STR("answer is '---1.250000E-01'"), STR("answer is '{:->15E}'"), F(125e-3));
+  check(STR("answer is '1.250000E-01---'"), STR("answer is '{:-<15E}'"), F(125e-3));
+  check(STR("answer is '-1.250000E-01--'"), STR("answer is '{:-^15E}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6E}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6E}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6E}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7E}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6E}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6E}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6E}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7E}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7E}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7E}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   2.500000E-01'"), STR("answer is '{:>015E}'"), F(0.25));
+  check(STR("answer is '2.500000E-01   '"), STR("answer is '{:<015E}'"), F(0.25));
+  check(STR("answer is ' 2.500000E-01  '"), STR("answer is '{:^015E}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000E+00'"), STR("answer is '{:E}'"), F(0));
+  check(STR("answer is '0.000000E+00'"), STR("answer is '{:-E}'"), F(0));
+  check(STR("answer is '+0.000000E+00'"), STR("answer is '{:+E}'"), F(0));
+  check(STR("answer is ' 0.000000E+00'"), STR("answer is '{: E}'"), F(0));
+
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{:E}'"), F(-0.));
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{:-E}'"), F(-0.));
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{:+E}'"), F(-0.));
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{: E}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:E}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-E}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+E}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: E}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: E}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:E}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-E}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+E}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: E}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:E}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-E}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+E}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: E}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0E+00'"), STR("answer is '{:.0E}'"), F(0));
+  check(STR("answer is '0.E+00'"), STR("answer is '{:#.0E}'"), F(0));
+
+  check(STR("answer is '0.000000E+00'"), STR("answer is '{:#E}'"), F(0));
+  check(STR("answer is '2.500000E+00'"), STR("answer is '{:#E}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#E}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#E}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#E}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#E}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '3.125000E-02'"), STR("answer is '{:07E}'"), 0.03125);
+  check(STR("answer is '+3.125000E-02'"), STR("answer is '{:+07E}'"), 0.03125);
+  check(STR("answer is '+3.125000E-02'"), STR("answer is '{:+08E}'"), 0.03125);
+  check(STR("answer is '+3.125000E-02'"), STR("answer is '{:+09E}'"), 0.03125);
+
+  check(STR("answer is '003.125000E-02'"), STR("answer is '{:014E}'"), 0.03125);
+  check(STR("answer is '003.125000E-02'"), STR("answer is '{:-014E}'"), 0.03125);
+  check(STR("answer is '+03.125000E-02'"), STR("answer is '{:+014E}'"), 0.03125);
+  check(STR("answer is ' 03.125000E-02'"), STR("answer is '{: 014E}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010E}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010E}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010E}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010E}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010E}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010E}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010E}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010E}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010E}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010E}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010E}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010E}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010E}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010E}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '3E-02'"), STR("answer is '{:.0E}'"), 0.03125);
+  check(STR("answer is '3.1E-02'"), STR("answer is '{:.1E}'"), 0.03125);
+  check(STR("answer is '3.125E-02'"), STR("answer is '{:.3E}'"), 0.03125);
+  check(STR("answer is '3.1250000000E-02'"), STR("answer is '{:.10E}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_fixed_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.250000'"), STR("answer is '{:11f}'"), F(0.25));
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>11f}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<11f}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^11f}'"), F(0.25));
+
+  check(STR("answer is '---0.125000'"), STR("answer is '{:->11f}'"), F(125e-3));
+  check(STR("answer is '0.125000---'"), STR("answer is '{:-<11f}'"), F(125e-3));
+  check(STR("answer is '-0.125000--'"), STR("answer is '{:-^11f}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6f}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6f}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6f}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7f}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6f}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6f}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6f}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7f}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7f}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7f}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>011f}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<011f}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^011f}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000'"), STR("answer is '{:f}'"), F(0));
+  check(STR("answer is '0.000000'"), STR("answer is '{:-f}'"), F(0));
+  check(STR("answer is '+0.000000'"), STR("answer is '{:+f}'"), F(0));
+  check(STR("answer is ' 0.000000'"), STR("answer is '{: f}'"), F(0));
+
+  check(STR("answer is '-0.000000'"), STR("answer is '{:f}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:-f}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:+f}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{: f}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:f}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-f}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+f}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: f}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: f}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:f}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-f}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+f}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: f}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:f}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-f}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+f}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: f}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0f}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0f}'"), F(0));
+
+  check(STR("answer is '0.000000'"), STR("answer is '{:#f}'"), F(0));
+  check(STR("answer is '2.500000'"), STR("answer is '{:#f}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#f}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#f}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#f}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#f}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.031250'"), STR("answer is '{:07f}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+07f}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+08f}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+09f}'"), 0.03125);
+
+  check(STR("answer is '000.031250'"), STR("answer is '{:010f}'"), 0.03125);
+  check(STR("answer is '000.031250'"), STR("answer is '{:-010f}'"), 0.03125);
+  check(STR("answer is '+00.031250'"), STR("answer is '{:+010f}'"), 0.03125);
+  check(STR("answer is ' 00.031250'"), STR("answer is '{: 010f}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010f}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010f}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010f}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010f}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010f}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010f}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010f}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010f}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010f}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010f}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010f}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010f}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010f}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010f}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0'"), STR("answer is '{:.0f}'"), 0.03125);
+  check(STR("answer is '0.0'"), STR("answer is '{:.1f}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5f}'"), 0.03125);
+  check(STR("answer is '0.0312500000'"), STR("answer is '{:.10f}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_fixed_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.250000'"), STR("answer is '{:11F}'"), F(0.25));
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>11F}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<11F}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^11F}'"), F(0.25));
+
+  check(STR("answer is '---0.125000'"), STR("answer is '{:->11F}'"), F(125e-3));
+  check(STR("answer is '0.125000---'"), STR("answer is '{:-<11F}'"), F(125e-3));
+  check(STR("answer is '-0.125000--'"), STR("answer is '{:-^11F}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6F}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6F}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6F}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7F}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6F}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6F}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6F}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7F}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7F}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7F}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>011F}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<011F}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^011F}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000'"), STR("answer is '{:F}'"), F(0));
+  check(STR("answer is '0.000000'"), STR("answer is '{:-F}'"), F(0));
+  check(STR("answer is '+0.000000'"), STR("answer is '{:+F}'"), F(0));
+  check(STR("answer is ' 0.000000'"), STR("answer is '{: F}'"), F(0));
+
+  check(STR("answer is '-0.000000'"), STR("answer is '{:F}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:-F}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:+F}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{: F}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:F}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-F}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+F}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: F}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: F}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:F}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-F}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+F}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: F}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:F}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-F}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+F}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: F}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0F}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0F}'"), F(0));
+
+  check(STR("answer is '0.000000'"), STR("answer is '{:#F}'"), F(0));
+  check(STR("answer is '2.500000'"), STR("answer is '{:#F}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#F}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#F}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#F}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#F}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.031250'"), STR("answer is '{:07F}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+07F}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+08F}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+09F}'"), 0.03125);
+
+  check(STR("answer is '000.031250'"), STR("answer is '{:010F}'"), 0.03125);
+  check(STR("answer is '000.031250'"), STR("answer is '{:-010F}'"), 0.03125);
+  check(STR("answer is '+00.031250'"), STR("answer is '{:+010F}'"), 0.03125);
+  check(STR("answer is ' 00.031250'"), STR("answer is '{: 010F}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010F}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010F}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010F}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010F}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010F}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010F}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010F}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010F}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010F}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010F}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010F}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010F}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010F}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010F}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0'"), STR("answer is '{:.0F}'"), 0.03125);
+  check(STR("answer is '0.0'"), STR("answer is '{:.1F}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5F}'"), 0.03125);
+  check(STR("answer is '0.0312500000'"), STR("answer is '{:.10F}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_general_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7g}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7g}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7g}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7g}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8g}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8g}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8g}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6g}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6g}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6g}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7g}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6g}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6g}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6g}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7g}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7g}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7g}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07g}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07g}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07g}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:g}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-g}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+g}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: g}'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:g}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-g}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+g}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: g}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:g}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-g}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+g}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: g}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: g}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:g}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-g}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+g}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: g}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:g}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-g}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+g}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: g}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0g}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0g}'"), F(0));
+
+  check(STR("answer is '0.'"), STR("answer is '{:#g}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#g}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#g}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#g}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#g}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#g}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:06g}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+06g}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07g}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08g}'"), 0.03125);
+
+  check(STR("answer is '000.03125'"), STR("answer is '{:09g}'"), 0.03125);
+  check(STR("answer is '000.03125'"), STR("answer is '{:-09g}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09g}'"), 0.03125);
+  check(STR("answer is ' 00.03125'"), STR("answer is '{: 09g}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010g}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010g}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010g}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010g}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010g}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010g}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010g}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010g}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010g}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010g}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010g}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010g}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010g}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010g}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0.03'"), STR("answer is '{:.0g}'"), 0.03125);
+  check(STR("answer is '0.03'"), STR("answer is '{:.1g}'"), 0.03125);
+  check(STR("answer is '0.031'"), STR("answer is '{:.2g}'"), 0.03125);
+  check(STR("answer is '0.0312'"), STR("answer is '{:.3g}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.4g}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5g}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.10g}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_general_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7G}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7G}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7G}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7G}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8G}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8G}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8G}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6G}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6G}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6G}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7G}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6G}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6G}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6G}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7G}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7G}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7G}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07G}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07G}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07G}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:G}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-G}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+G}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: G}'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:G}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-G}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+G}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: G}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:G}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-G}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+G}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: G}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: G}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:G}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-G}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+G}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: G}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:G}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-G}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+G}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: G}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0G}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0G}'"), F(0));
+
+  check(STR("answer is '0.'"), STR("answer is '{:#G}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#G}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#G}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#G}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#G}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#G}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:06G}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+06G}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07G}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08G}'"), 0.03125);
+
+  check(STR("answer is '000.03125'"), STR("answer is '{:09G}'"), 0.03125);
+  check(STR("answer is '000.03125'"), STR("answer is '{:-09G}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09G}'"), 0.03125);
+  check(STR("answer is ' 00.03125'"), STR("answer is '{: 09G}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010G}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010G}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010G}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010G}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010G}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010G}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010G}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010G}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010G}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010G}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010G}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010G}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010G}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010G}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0.03'"), STR("answer is '{:.0G}'"), 0.03125);
+  check(STR("answer is '0.03'"), STR("answer is '{:.1G}'"), 0.03125);
+  check(STR("answer is '0.031'"), STR("answer is '{:.2G}'"), 0.03125);
+  check(STR("answer is '0.0312'"), STR("answer is '{:.3G}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.4G}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5G}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.10G}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_default(TestFunction check) {
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: }'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: }'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: }'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: }'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: }'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: }'"), nan_neg);
+
+  // *** alternate form ***
+  check(STR("answer is '0.'"), STR("answer is '{:#}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:07}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09}'"), 0.03125);
+
+  check(STR("answer is '0000.03125'"), STR("answer is '{:010}'"), 0.03125);
+  check(STR("answer is '0000.03125'"), STR("answer is '{:-010}'"), 0.03125);
+  check(STR("answer is '+000.03125'"), STR("answer is '{:+010}'"), 0.03125);
+  check(STR("answer is ' 000.03125'"), STR("answer is '{: 010}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010}'"), nan_neg);
+
+  // *** precision ***
+  // See format_test_floating_point_default_precision
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point_default_precision(TestFunction check) {
+
+  auto nan_pos = std::numeric_limits::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7.6}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7.6}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7.6}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7.6}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8.6}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8.6}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8.6}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6.6}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7.6}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6.6}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6.6}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6.6}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7.6}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7.6}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7.6}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07.6}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07.6}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07.6}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:.6}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-.6}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+.6}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: .6}'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:.6}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-.6}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+.6}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: .6}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: .6}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: .6}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:.6}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-.6}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+.6}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: .6}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:.6}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-.6}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+.6}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: .6}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0}'"), F(0));
+
+  check(STR("answer is '0.'"), STR("answer is '{:#.6}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#.6}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#.6}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#.6}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#.6}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:06.6}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+06.6}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07.6}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08.6}'"), 0.03125);
+
+  check(STR("answer is '000.03125'"), STR("answer is '{:09.6}'"), 0.03125);
+  check(STR("answer is '000.03125'"), STR("answer is '{:-09.6}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09.6}'"), 0.03125);
+  check(STR("answer is ' 00.03125'"), STR("answer is '{: 09.6}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010.6}'"), std::numeric_limits::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010.6}'"), std::numeric_limits::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010.6}'"), -std::numeric_limits::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010.6}'"), -std::numeric_limits::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010.6}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010.6}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010.6}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010.6}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010.6}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010.6}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010.6}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010.6}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0.03'"), STR("answer is '{:.0}'"), 0.03125);
+  check(STR("answer is '0.03'"), STR("answer is '{:.1}'"), 0.03125);
+  check(STR("answer is '0.031'"), STR("answer is '{:.2}'"), 0.03125);
+  check(STR("answer is '0.0312'"), STR("answer is '{:.3}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.4}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.10}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template 
+void format_test_floating_point(TestFunction check, ExceptionTest check_exception) {
+  format_test_floating_point_hex_lower_case(check);
+  format_test_floating_point_hex_upper_case(check);
+  format_test_floating_point_hex_lower_case_precision(check);
+  format_test_floating_point_hex_upper_case_precision(check);
+
+  format_test_floating_point_scientific_lower_case(check);
+  format_test_floating_point_scientific_upper_case(check);
+
+  format_test_floating_point_fixed_lower_case(check);
+  format_test_floating_point_fixed_upper_case(check);
+
+  format_test_floating_point_general_lower_case(check);
+  format_test_floating_point_general_upper_case(check);
+
+  format_test_floating_point_default(check);
+  format_test_floating_point_default_precision(check);
+
+  // *** type ***
+  for (const auto& fmt : invalid_types("aAeEfFgG"))
+    check_exception("The format-spec type has a type not supported for a floating-point argument", fmt, F(1));
+}
+
+template 
+void format_test_floating_point(TestFunction check, ExceptionTest check_exception) {
+  format_test_floating_point(check, check_exception);
+  format_test_floating_point(check, check_exception);
+  format_test_floating_point(check, check_exception);
+}
+
 template 
 void format_tests(TestFunction check, ExceptionTest check_exception) {
   // *** Test escaping  ***
@@ -1115,12 +2607,10 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   format_test_unsigned_integer(check, check_exception);
 
   // *** Test floating point format argument ***
-// TODO FMT Enable after floating-point support has been enabled
-#if 0
-  check(STR("hello 42.000000"), STR("hello {}"), static_cast(42));
-  check(STR("hello 42.000000"), STR("hello {}"), static_cast(42));
-  check(STR("hello 42.000000"), STR("hello {}"), static_cast(42));
-#endif
+  check(STR("hello 42"), STR("hello {}"), static_cast(42));
+  check(STR("hello 42"), STR("hello {}"), static_cast(42));
+  check(STR("hello 42"), STR("hello {}"), static_cast(42));
+  format_test_floating_point(check, check_exception);
 }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
index 7fe6907f2f4bd..5d86e46be7e6b 100644
--- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
@@ -110,6 +110,7 @@ struct numpunct : std::numpunct {
 
   std::string do_grouping() const override { return "\1\2\3\2\1"; };
   char do_thousands_sep() const override { return '_'; }
+  char do_decimal_point() const override { return '#'; }
 };
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
@@ -120,6 +121,7 @@ struct numpunct : std::numpunct {
 
   std::string do_grouping() const override { return "\1\2\3\2\1"; };
   wchar_t do_thousands_sep() const override { return L'_'; }
+  wchar_t do_decimal_point() const override { return L'#'; }
 };
 #endif
 
@@ -607,10 +609,1725 @@ void test_integer() {
   test(STR("-0X004_A"), loc, STR("{:#08LX}"), -0x4a);
 }
 
+template 
+void test_floating_point_hex_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23456p-3"), STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1.23456p-2"), STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1.23456p-1"), STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1.23456p+0"), STR("{:La}"), F(0x1.23456p0));
+  test(STR("1.23456p+1"), STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1.23456p+2"), STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1.23456p+3"), STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1.23456p+20"), STR("{:La}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#23456p-3"), STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1#23456p-2"), STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1#23456p-1"), STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1#23456p+0"), STR("{:La}"), F(0x1.23456p0));
+  test(STR("1#23456p+1"), STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1#23456p+2"), STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1#23456p+3"), STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1#23456p+20"), STR("{:La}"), F(0x1.23456p+20));
+
+  test(STR("1.23456p-3"), en_US, STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1.23456p-2"), en_US, STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1.23456p-1"), en_US, STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1.23456p+0"), en_US, STR("{:La}"), F(0x1.23456p0));
+  test(STR("1.23456p+1"), en_US, STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1.23456p+2"), en_US, STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1.23456p+3"), en_US, STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1.23456p+20"), en_US, STR("{:La}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#23456p-3"), loc, STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1#23456p-2"), loc, STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1#23456p-1"), loc, STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1#23456p+0"), loc, STR("{:La}"), F(0x1.23456p0));
+  test(STR("1#23456p+1"), loc, STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1#23456p+2"), loc, STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1#23456p+3"), loc, STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1#23456p+20"), loc, STR("{:La}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.23456p+3$$$"), STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1.23456p+3"), STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1.23456p+3$$"), STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001.23456p+3"), STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1.23456p+3$$$"), STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456p+3"), STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1.23456p+3$$"), STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001.23456p+3"), STR("{:014La}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#23456p+3$$$"), STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1#23456p+3"), STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1#23456p+3$$"), STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001#23456p+3"), STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1#23456p+3$$$"), STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456p+3"), STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1#23456p+3$$"), STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001#23456p+3"), STR("{:014La}"), F(-0x1.23456p3));
+
+  test(STR("1.23456p+3$$$"), en_US, STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1.23456p+3"), en_US, STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1.23456p+3$$"), en_US, STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001.23456p+3"), en_US, STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1.23456p+3$$$"), en_US, STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456p+3"), en_US, STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1.23456p+3$$"), en_US, STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001.23456p+3"), en_US, STR("{:014La}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#23456p+3$$$"), loc, STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1#23456p+3"), loc, STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1#23456p+3$$"), loc, STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001#23456p+3"), loc, STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1#23456p+3$$$"), loc, STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456p+3"), loc, STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1#23456p+3$$"), loc, STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001#23456p+3"), loc, STR("{:014La}"), F(-0x1.23456p3));
+}
+
+template 
+void test_floating_point_hex_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23456P-3"), STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1.23456P-2"), STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1.23456P-1"), STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1.23456P+0"), STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1.23456P+1"), STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1.23456P+2"), STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1.23456P+3"), STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1.23456P+20"), STR("{:LA}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#23456P-3"), STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1#23456P-2"), STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1#23456P-1"), STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1#23456P+0"), STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1#23456P+1"), STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1#23456P+2"), STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1#23456P+3"), STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1#23456P+20"), STR("{:LA}"), F(0x1.23456p+20));
+
+  test(STR("1.23456P-3"), en_US, STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1.23456P-2"), en_US, STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1.23456P-1"), en_US, STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1.23456P+0"), en_US, STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1.23456P+1"), en_US, STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1.23456P+2"), en_US, STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1.23456P+3"), en_US, STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1.23456P+20"), en_US, STR("{:LA}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#23456P-3"), loc, STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1#23456P-2"), loc, STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1#23456P-1"), loc, STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1#23456P+0"), loc, STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1#23456P+1"), loc, STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1#23456P+2"), loc, STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1#23456P+3"), loc, STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1#23456P+20"), loc, STR("{:LA}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero Padding ***
+  std::locale::global(en_US);
+  test(STR("1.23456P+3$$$"), STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1.23456P+3"), STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1.23456P+3$$"), STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001.23456P+3"), STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1.23456P+3$$$"), STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456P+3"), STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1.23456P+3$$"), STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001.23456P+3"), STR("{:014LA}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#23456P+3$$$"), STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1#23456P+3"), STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1#23456P+3$$"), STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001#23456P+3"), STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1#23456P+3$$$"), STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456P+3"), STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1#23456P+3$$"), STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001#23456P+3"), STR("{:014LA}"), F(-0x1.23456p3));
+
+  test(STR("1.23456P+3$$$"), en_US, STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1.23456P+3"), en_US, STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1.23456P+3$$"), en_US, STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001.23456P+3"), en_US, STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1.23456P+3$$$"), en_US, STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456P+3"), en_US, STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1.23456P+3$$"), en_US, STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001.23456P+3"), en_US, STR("{:014LA}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#23456P+3$$$"), loc, STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1#23456P+3"), loc, STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1#23456P+3$$"), loc, STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001#23456P+3"), loc, STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1#23456P+3$$$"), loc, STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456P+3"), loc, STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1#23456P+3$$"), loc, STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001#23456P+3"), loc, STR("{:014LA}"), F(-0x1.23456p3));
+}
+
+template 
+void test_floating_point_hex_lower_case_precision() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234560p-3"), STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1.234560p-2"), STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1.234560p-1"), STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1.234560p+0"), STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1.234560p+1"), STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1.234560p+2"), STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1.234560p+3"), STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1.234560p+20"), STR("{:.6La}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#234560p-3"), STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1#234560p-2"), STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1#234560p-1"), STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1#234560p+0"), STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1#234560p+1"), STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1#234560p+2"), STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1#234560p+3"), STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1#234560p+20"), STR("{:.6La}"), F(0x1.23456p+20));
+
+  test(STR("1.234560p-3"), en_US, STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1.234560p-2"), en_US, STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1.234560p-1"), en_US, STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1.234560p+0"), en_US, STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1.234560p+1"), en_US, STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1.234560p+2"), en_US, STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1.234560p+3"), en_US, STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1.234560p+20"), en_US, STR("{:.6La}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#234560p-3"), loc, STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1#234560p-2"), loc, STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1#234560p-1"), loc, STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1#234560p+0"), loc, STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1#234560p+1"), loc, STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1#234560p+2"), loc, STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1#234560p+3"), loc, STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1#234560p+20"), loc, STR("{:.6La}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.234560p+3$$$"), STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1.234560p+3"), STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1.234560p+3$$"), STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001.234560p+3"), STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1.234560p+3$$$"), STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560p+3"), STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1.234560p+3$$"), STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001.234560p+3"), STR("{:015.6La}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#234560p+3$$$"), STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1#234560p+3"), STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1#234560p+3$$"), STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001#234560p+3"), STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1#234560p+3$$$"), STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560p+3"), STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1#234560p+3$$"), STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001#234560p+3"), STR("{:015.6La}"), F(-0x1.23456p3));
+
+  test(STR("1.234560p+3$$$"), en_US, STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1.234560p+3"), en_US, STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1.234560p+3$$"), en_US, STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001.234560p+3"), en_US, STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1.234560p+3$$$"), en_US, STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560p+3"), en_US, STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1.234560p+3$$"), en_US, STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001.234560p+3"), en_US, STR("{:015.6La}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#234560p+3$$$"), loc, STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1#234560p+3"), loc, STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1#234560p+3$$"), loc, STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001#234560p+3"), loc, STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1#234560p+3$$$"), loc, STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560p+3"), loc, STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1#234560p+3$$"), loc, STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001#234560p+3"), loc, STR("{:015.6La}"), F(-0x1.23456p3));
+}
+
+template 
+void test_floating_point_hex_upper_case_precision() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234560P-3"), STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1.234560P-2"), STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1.234560P-1"), STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1.234560P+0"), STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1.234560P+1"), STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1.234560P+2"), STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1.234560P+3"), STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1.234560P+20"), STR("{:.6LA}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#234560P-3"), STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1#234560P-2"), STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1#234560P-1"), STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1#234560P+0"), STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1#234560P+1"), STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1#234560P+2"), STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1#234560P+3"), STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1#234560P+20"), STR("{:.6LA}"), F(0x1.23456p+20));
+
+  test(STR("1.234560P-3"), en_US, STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1.234560P-2"), en_US, STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1.234560P-1"), en_US, STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1.234560P+0"), en_US, STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1.234560P+1"), en_US, STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1.234560P+2"), en_US, STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1.234560P+3"), en_US, STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1.234560P+20"), en_US, STR("{:.6LA}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#234560P-3"), loc, STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1#234560P-2"), loc, STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1#234560P-1"), loc, STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1#234560P+0"), loc, STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1#234560P+1"), loc, STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1#234560P+2"), loc, STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1#234560P+3"), loc, STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1#234560P+20"), loc, STR("{:.6LA}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero Padding ***
+  std::locale::global(en_US);
+  test(STR("1.234560P+3$$$"), STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1.234560P+3"), STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1.234560P+3$$"), STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001.234560P+3"), STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1.234560P+3$$$"), STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560P+3"), STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1.234560P+3$$"), STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001.234560P+3"), STR("{:015.6LA}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#234560P+3$$$"), STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1#234560P+3"), STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1#234560P+3$$"), STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001#234560P+3"), STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1#234560P+3$$$"), STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560P+3"), STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1#234560P+3$$"), STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001#234560P+3"), STR("{:015.6LA}"), F(-0x1.23456p3));
+
+  test(STR("1.234560P+3$$$"), en_US, STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1.234560P+3"), en_US, STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1.234560P+3$$"), en_US, STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001.234560P+3"), en_US, STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1.234560P+3$$$"), en_US, STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560P+3"), en_US, STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1.234560P+3$$"), en_US, STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001.234560P+3"), en_US, STR("{:015.6LA}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#234560P+3$$$"), loc, STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1#234560P+3"), loc, STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1#234560P+3$$"), loc, STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001#234560P+3"), loc, STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1#234560P+3$$$"), loc, STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560P+3"), loc, STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1#234560P+3$$"), loc, STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001#234560P+3"), loc, STR("{:015.6LA}"), F(-0x1.23456p3));
+}
+
+template 
+void test_floating_point_scientific_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234567e-03"), STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1.234567e-02"), STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1.234567e-01"), STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1.234567e+00"), STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1.234567e+01"), STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1.234567e+02"), STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1.234567e+03"), STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1.234567e+20"), STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1.234567e-03"), STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1.234567e-02"), STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1.234567e-01"), STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1.234567e+00"), STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1.234567e+01"), STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1.234567e+02"), STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1.234567e+03"), STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1.234567e+20"), STR("{:.6Le}"), F(-1.234567e20));
+
+  std::locale::global(loc);
+  test(STR("1#234567e-03"), STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1#234567e-02"), STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1#234567e-01"), STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1#234567e+00"), STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1#234567e+01"), STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1#234567e+02"), STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1#234567e+03"), STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1#234567e+20"), STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1#234567e-03"), STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1#234567e-02"), STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1#234567e-01"), STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1#234567e+00"), STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1#234567e+01"), STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1#234567e+02"), STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1#234567e+03"), STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1#234567e+20"), STR("{:.6Le}"), F(-1.234567e20));
+
+  test(STR("1.234567e-03"), en_US, STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1.234567e-02"), en_US, STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1.234567e-01"), en_US, STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1.234567e+00"), en_US, STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1.234567e+01"), en_US, STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1.234567e+02"), en_US, STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1.234567e+03"), en_US, STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1.234567e+20"), en_US, STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1.234567e-03"), en_US, STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1.234567e-02"), en_US, STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1.234567e-01"), en_US, STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1.234567e+00"), en_US, STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1.234567e+01"), en_US, STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1.234567e+02"), en_US, STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1.234567e+03"), en_US, STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1.234567e+20"), en_US, STR("{:.6Le}"), F(-1.234567e20));
+
+  std::locale::global(en_US);
+  test(STR("1#234567e-03"), loc, STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1#234567e-02"), loc, STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1#234567e-01"), loc, STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1#234567e+00"), loc, STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1#234567e+01"), loc, STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1#234567e+02"), loc, STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1#234567e+03"), loc, STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1#234567e+20"), loc, STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1#234567e-03"), loc, STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1#234567e-02"), loc, STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1#234567e-01"), loc, STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1#234567e+00"), loc, STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1#234567e+01"), loc, STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1#234567e+02"), loc, STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1#234567e+03"), loc, STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1#234567e+20"), loc, STR("{:.6Le}"), F(-1.234567e20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.234567e+03$$$"), STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1.234567e+03"), STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1.234567e+03$$"), STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001.234567e+03"), STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1.234567e+03$$$"), STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1.234567e+03"), STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1.234567e+03$$"), STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001.234567e+03"), STR("{:016.6Le}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1#234567e+03$$$"), STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1#234567e+03"), STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1#234567e+03$$"), STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001#234567e+03"), STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1#234567e+03$$$"), STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1#234567e+03"), STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1#234567e+03$$"), STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001#234567e+03"), STR("{:016.6Le}"), F(-1.234567e3));
+
+  test(STR("1.234567e+03$$$"), en_US, STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1.234567e+03"), en_US, STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1.234567e+03$$"), en_US, STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001.234567e+03"), en_US, STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1.234567e+03$$$"), en_US, STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1.234567e+03"), en_US, STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1.234567e+03$$"), en_US, STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001.234567e+03"), en_US, STR("{:016.6Le}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1#234567e+03$$$"), loc, STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1#234567e+03"), loc, STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1#234567e+03$$"), loc, STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001#234567e+03"), loc, STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1#234567e+03$$$"), loc, STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1#234567e+03"), loc, STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1#234567e+03$$"), loc, STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001#234567e+03"), loc, STR("{:016.6Le}"), F(-1.234567e3));
+}
+
+template 
+void test_floating_point_scientific_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234567E-03"), STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1.234567E-02"), STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1.234567E-01"), STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1.234567E+00"), STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1.234567E+01"), STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1.234567E+02"), STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1.234567E+03"), STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1.234567E+20"), STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1.234567E-03"), STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1.234567E-02"), STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1.234567E-01"), STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1.234567E+00"), STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1.234567E+01"), STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1.234567E+02"), STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1.234567E+03"), STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1.234567E+20"), STR("{:.6LE}"), F(-1.234567e20));
+
+  std::locale::global(loc);
+  test(STR("1#234567E-03"), STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1#234567E-02"), STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1#234567E-01"), STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1#234567E+00"), STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1#234567E+01"), STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1#234567E+02"), STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1#234567E+03"), STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1#234567E+20"), STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1#234567E-03"), STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1#234567E-02"), STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1#234567E-01"), STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1#234567E+00"), STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1#234567E+01"), STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1#234567E+02"), STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1#234567E+03"), STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1#234567E+20"), STR("{:.6LE}"), F(-1.234567e20));
+
+  test(STR("1.234567E-03"), en_US, STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1.234567E-02"), en_US, STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1.234567E-01"), en_US, STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1.234567E+00"), en_US, STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1.234567E+01"), en_US, STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1.234567E+02"), en_US, STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1.234567E+03"), en_US, STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1.234567E+20"), en_US, STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1.234567E-03"), en_US, STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1.234567E-02"), en_US, STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1.234567E-01"), en_US, STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1.234567E+00"), en_US, STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1.234567E+01"), en_US, STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1.234567E+02"), en_US, STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1.234567E+03"), en_US, STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1.234567E+20"), en_US, STR("{:.6LE}"), F(-1.234567e20));
+
+  std::locale::global(en_US);
+  test(STR("1#234567E-03"), loc, STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1#234567E-02"), loc, STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1#234567E-01"), loc, STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1#234567E+00"), loc, STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1#234567E+01"), loc, STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1#234567E+02"), loc, STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1#234567E+03"), loc, STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1#234567E+20"), loc, STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1#234567E-03"), loc, STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1#234567E-02"), loc, STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1#234567E-01"), loc, STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1#234567E+00"), loc, STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1#234567E+01"), loc, STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1#234567E+02"), loc, STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1#234567E+03"), loc, STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1#234567E+20"), loc, STR("{:.6LE}"), F(-1.234567e20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.234567E+03$$$"), STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1.234567E+03"), STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1.234567E+03$$"), STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001.234567E+03"), STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1.234567E+03$$$"), STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1.234567E+03"), STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1.234567E+03$$"), STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001.234567E+03"), STR("{:016.6LE}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1#234567E+03$$$"), STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1#234567E+03"), STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1#234567E+03$$"), STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001#234567E+03"), STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1#234567E+03$$$"), STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1#234567E+03"), STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1#234567E+03$$"), STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001#234567E+03"), STR("{:016.6LE}"), F(-1.234567e3));
+
+  test(STR("1.234567E+03$$$"), en_US, STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1.234567E+03"), en_US, STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1.234567E+03$$"), en_US, STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001.234567E+03"), en_US, STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1.234567E+03$$$"), en_US, STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1.234567E+03"), en_US, STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1.234567E+03$$"), en_US, STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001.234567E+03"), en_US, STR("{:016.6LE}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1#234567E+03$$$"), loc, STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1#234567E+03"), loc, STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1#234567E+03$$"), loc, STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001#234567E+03"), loc, STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1#234567E+03$$$"), loc, STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1#234567E+03"), loc, STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1#234567E+03$$"), loc, STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001#234567E+03"), loc, STR("{:016.6LE}"), F(-1.234567e3));
+}
+
+template 
+void test_floating_point_fixed_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("0.000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(loc);
+  test(STR("0#000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  test(STR("0.000001"), en_US, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), en_US, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), en_US, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), en_US, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), en_US, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), en_US, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), en_US, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), en_US, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), en_US, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), en_US, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), en_US, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), en_US, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), en_US, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), en_US, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), en_US, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), en_US, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), en_US, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), en_US, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), en_US, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), en_US, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), en_US, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), en_US, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), en_US, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), en_US, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(en_US);
+  test(STR("0#000001"), loc, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), loc, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), loc, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), loc, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), loc, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), loc, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), loc, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), loc, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), loc, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), loc, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), loc, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), loc, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), loc, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), loc, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), loc, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), loc, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), loc, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), loc, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), loc, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), loc, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), loc, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), loc, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  // *** Fill, align, zero padding ***
+  if constexpr (sizeof(F) > sizeof(float)) {
+    std::locale::global(en_US);
+    test(STR("1,234.567000$$$"), STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(loc);
+    test(STR("1_23_4#567000$$$"), STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), STR("{:017.6Lf}"), F(-1.234567e3));
+
+    test(STR("1,234.567000$$$"), en_US, STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), en_US, STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), en_US, STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), en_US, STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), en_US, STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), en_US, STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), en_US, STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), en_US, STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(en_US);
+    test(STR("1_23_4#567000$$$"), loc, STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), loc, STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), loc, STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), loc, STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), loc, STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), loc, STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), loc, STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), loc, STR("{:017.6Lf}"), F(-1.234567e3));
+  }
+}
+
+template 
+void test_floating_point_fixed_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("0.000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(loc);
+  test(STR("0#000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  test(STR("0.000001"), en_US, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), en_US, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), en_US, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), en_US, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), en_US, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), en_US, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), en_US, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), en_US, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), en_US, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), en_US, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), en_US, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), en_US, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), en_US, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), en_US, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), en_US, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), en_US, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), en_US, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), en_US, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), en_US, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), en_US, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), en_US, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), en_US, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), en_US, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), en_US, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(en_US);
+  test(STR("0#000001"), loc, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), loc, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), loc, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), loc, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), loc, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), loc, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), loc, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), loc, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), loc, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), loc, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), loc, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), loc, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), loc, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), loc, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), loc, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), loc, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), loc, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), loc, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), loc, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), loc, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), loc, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), loc, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  // *** Fill, align, zero padding ***
+  if constexpr (sizeof(F) > sizeof(float)) {
+    std::locale::global(en_US);
+    test(STR("1,234.567000$$$"), STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(loc);
+    test(STR("1_23_4#567000$$$"), STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), STR("{:017.6Lf}"), F(-1.234567e3));
+
+    test(STR("1,234.567000$$$"), en_US, STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), en_US, STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), en_US, STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), en_US, STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), en_US, STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), en_US, STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), en_US, STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), en_US, STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(en_US);
+    test(STR("1_23_4#567000$$$"), loc, STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), loc, STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), loc, STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), loc, STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), loc, STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), loc, STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), loc, STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), loc, STR("{:017.6Lf}"), F(-1.234567e3));
+  }
+}
+
+template 
+void test_floating_point_general_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23457e-06"), STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0.000123457"), STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0.00123457"), STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0.0123457"), STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1.23457"), STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("12.3457"), STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("123.457"), STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1,234.57"), STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12,345.7"), STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123,457"), STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1.23457e+06"), STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1.23457e+07"), STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1.23457"), STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-12.3457"), STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-123.457"), STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1,234.57"), STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12,345.7"), STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123,457"), STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), STR("{:.6Lg}"), F(-1.234567e7));
+
+  std::locale::global(loc);
+  test(STR("1#23457e-06"), STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0#000123457"), STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0#00123457"), STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0#0123457"), STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1#23457"), STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("1_2#3457"), STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("12_3#457"), STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1_23_4#57"), STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12_34_5#7"), STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123_45_7"), STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1#23457e+06"), STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1#23457e+07"), STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1#23457"), STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-12_3#457"), STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123_45_7"), STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), STR("{:.6Lg}"), F(-1.234567e7));
+
+  test(STR("1.23457e-06"), en_US, STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), en_US, STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0.000123457"), en_US, STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0.00123457"), en_US, STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0.0123457"), en_US, STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1.23457"), en_US, STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("12.3457"), en_US, STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("123.457"), en_US, STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1,234.57"), en_US, STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12,345.7"), en_US, STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123,457"), en_US, STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1.23457e+06"), en_US, STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1.23457e+07"), en_US, STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), en_US, STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), en_US, STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1.23457"), en_US, STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-12.3457"), en_US, STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-123.457"), en_US, STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1,234.57"), en_US, STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12,345.7"), en_US, STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123,457"), en_US, STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), en_US, STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), en_US, STR("{:.6Lg}"), F(-1.234567e7));
+
+  std::locale::global(en_US);
+  test(STR("1#23457e-06"), loc, STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), loc, STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0#000123457"), loc, STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0#00123457"), loc, STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0#0123457"), loc, STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1#23457"), loc, STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("1_2#3457"), loc, STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("12_3#457"), loc, STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1_23_4#57"), loc, STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12_34_5#7"), loc, STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123_45_7"), loc, STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1#23457e+06"), loc, STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1#23457e+07"), loc, STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), loc, STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), loc, STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), loc, STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), loc, STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), loc, STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1#23457"), loc, STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), loc, STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-12_3#457"), loc, STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), loc, STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), loc, STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123_45_7"), loc, STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), loc, STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), loc, STR("{:.6Lg}"), F(-1.234567e7));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.57$$$"), STR("{:$<11.6Lg}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), STR("{:$>11.6Lg}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), STR("{:$^11.6Lg}"), F(1.234567e3));
+  test(STR("0001,234.57"), STR("{:011.6Lg}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), STR("{:$<12.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), STR("{:$>12.6Lg}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), STR("{:$^12.6Lg}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), STR("{:012.6Lg}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#57$$$"), STR("{:$<12.6Lg}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), STR("{:$>12.6Lg}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), STR("{:$^12.6Lg}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), STR("{:012.6Lg}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), STR("{:$<13.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), STR("{:$>13.6Lg}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), STR("{:$^13.6Lg}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), STR("{:013.6Lg}"), F(-1.234567e3));
+
+  test(STR("1,234.57$$$"), en_US, STR("{:$<11.6Lg}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), en_US, STR("{:$>11.6Lg}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), en_US, STR("{:$^11.6Lg}"), F(1.234567e3));
+  test(STR("0001,234.57"), en_US, STR("{:011.6Lg}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), en_US, STR("{:$<12.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), en_US, STR("{:$>12.6Lg}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), en_US, STR("{:$^12.6Lg}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), en_US, STR("{:012.6Lg}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#57$$$"), loc, STR("{:$<12.6Lg}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), loc, STR("{:$>12.6Lg}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), loc, STR("{:$^12.6Lg}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), loc, STR("{:012.6Lg}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), loc, STR("{:$<13.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), loc, STR("{:$>13.6Lg}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), loc, STR("{:$^13.6Lg}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), loc, STR("{:013.6Lg}"), F(-1.234567e3));
+}
+
+template 
+void test_floating_point_general_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23457E-06"), STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1.23457E-05"), STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0.000123457"), STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0.00123457"), STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0.0123457"), STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1.23457"), STR("{:.6LG}"), F(1.234567e0));
+  test(STR("12.3457"), STR("{:.6LG}"), F(1.234567e1));
+  test(STR("123.457"), STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1,234.57"), STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12,345.7"), STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123,457"), STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1.23457E+06"), STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1.23457E+07"), STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1.23457E-06"), STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1.23457E-05"), STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1.23457"), STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-12.3457"), STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-123.457"), STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1,234.57"), STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12,345.7"), STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123,457"), STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1.23457E+06"), STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1.23457E+07"), STR("{:.6LG}"), F(-1.234567e7));
+
+  std::locale::global(loc);
+  test(STR("1#23457E-06"), STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1#23457E-05"), STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0#000123457"), STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0#00123457"), STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0#0123457"), STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1#23457"), STR("{:.6LG}"), F(1.234567e0));
+  test(STR("1_2#3457"), STR("{:.6LG}"), F(1.234567e1));
+  test(STR("12_3#457"), STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1_23_4#57"), STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12_34_5#7"), STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123_45_7"), STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1#23457E+06"), STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1#23457E+07"), STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1#23457E-06"), STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1#23457E-05"), STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1#23457"), STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-12_3#457"), STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123_45_7"), STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1#23457E+06"), STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1#23457E+07"), STR("{:.6LG}"), F(-1.234567e7));
+
+  test(STR("1.23457E-06"), en_US, STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1.23457E-05"), en_US, STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0.000123457"), en_US, STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0.00123457"), en_US, STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0.0123457"), en_US, STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1.23457"), en_US, STR("{:.6LG}"), F(1.234567e0));
+  test(STR("12.3457"), en_US, STR("{:.6LG}"), F(1.234567e1));
+  test(STR("123.457"), en_US, STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1,234.57"), en_US, STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12,345.7"), en_US, STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123,457"), en_US, STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1.23457E+06"), en_US, STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1.23457E+07"), en_US, STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1.23457E-06"), en_US, STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1.23457E-05"), en_US, STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), en_US, STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), en_US, STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), en_US, STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1.23457"), en_US, STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-12.3457"), en_US, STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-123.457"), en_US, STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1,234.57"), en_US, STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12,345.7"), en_US, STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123,457"), en_US, STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1.23457E+06"), en_US, STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1.23457E+07"), en_US, STR("{:.6LG}"), F(-1.234567e7));
+
+  std::locale::global(en_US);
+  test(STR("1#23457E-06"), loc, STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1#23457E-05"), loc, STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0#000123457"), loc, STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0#00123457"), loc, STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0#0123457"), loc, STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1#23457"), loc, STR("{:.6LG}"), F(1.234567e0));
+  test(STR("1_2#3457"), loc, STR("{:.6LG}"), F(1.234567e1));
+  test(STR("12_3#457"), loc, STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1_23_4#57"), loc, STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12_34_5#7"), loc, STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123_45_7"), loc, STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1#23457E+06"), loc, STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1#23457E+07"), loc, STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1#23457E-06"), loc, STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1#23457E-05"), loc, STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), loc, STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), loc, STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), loc, STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1#23457"), loc, STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), loc, STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-12_3#457"), loc, STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), loc, STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), loc, STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123_45_7"), loc, STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1#23457E+06"), loc, STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1#23457E+07"), loc, STR("{:.6LG}"), F(-1.234567e7));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.57$$$"), STR("{:$<11.6LG}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), STR("{:$>11.6LG}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), STR("{:$^11.6LG}"), F(1.234567e3));
+  test(STR("0001,234.57"), STR("{:011.6LG}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), STR("{:$<12.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), STR("{:$>12.6LG}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), STR("{:$^12.6LG}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), STR("{:012.6LG}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#57$$$"), STR("{:$<12.6LG}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), STR("{:$>12.6LG}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), STR("{:$^12.6LG}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), STR("{:012.6LG}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), STR("{:$<13.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), STR("{:$>13.6LG}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), STR("{:$^13.6LG}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), STR("{:013.6LG}"), F(-1.234567e3));
+
+  test(STR("1,234.57$$$"), en_US, STR("{:$<11.6LG}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), en_US, STR("{:$>11.6LG}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), en_US, STR("{:$^11.6LG}"), F(1.234567e3));
+  test(STR("0001,234.57"), en_US, STR("{:011.6LG}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), en_US, STR("{:$<12.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), en_US, STR("{:$>12.6LG}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), en_US, STR("{:$^12.6LG}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), en_US, STR("{:012.6LG}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#57$$$"), loc, STR("{:$<12.6LG}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), loc, STR("{:$>12.6LG}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), loc, STR("{:$^12.6LG}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), loc, STR("{:012.6LG}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), loc, STR("{:$<13.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), loc, STR("{:$>13.6LG}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), loc, STR("{:$^13.6LG}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), loc, STR("{:013.6LG}"), F(-1.234567e3));
+}
+
+template 
+void test_floating_point_default() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234567e-06"), STR("{:L}"), F(1.234567e-6));
+  test(STR("1.234567e-05"), STR("{:L}"), F(1.234567e-5));
+  test(STR("0.0001234567"), STR("{:L}"), F(1.234567e-4));
+  test(STR("0.001234567"), STR("{:L}"), F(1.234567e-3));
+  test(STR("0.01234567"), STR("{:L}"), F(1.234567e-2));
+  test(STR("0.1234567"), STR("{:L}"), F(1.234567e-1));
+  test(STR("1.234567"), STR("{:L}"), F(1.234567e0));
+  test(STR("12.34567"), STR("{:L}"), F(1.234567e1));
+  test(STR("123.4567"), STR("{:L}"), F(1.234567e2));
+  test(STR("1,234.567"), STR("{:L}"), F(1.234567e3));
+  test(STR("12,345.67"), STR("{:L}"), F(1.234567e4));
+  test(STR("123,456.7"), STR("{:L}"), F(1.234567e5));
+  test(STR("1,234,567"), STR("{:L}"), F(1.234567e6));
+  test(STR("12,345,670"), STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123,456,700"), STR("{:L}"), F(1.234567e8));
+    test(STR("1,234,567,000"), STR("{:L}"), F(1.234567e9));
+    test(STR("12,345,670,000"), STR("{:L}"), F(1.234567e10));
+    test(STR("123,456,700,000"), STR("{:L}"), F(1.234567e11));
+    test(STR("1.234567e+12"), STR("{:L}"), F(1.234567e12));
+    test(STR("1.234567e+13"), STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1.234567e-06"), STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1.234567e-05"), STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0.0001234567"), STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0.001234567"), STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0.01234567"), STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0.1234567"), STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1.234567"), STR("{:L}"), F(-1.234567e0));
+  test(STR("-12.34567"), STR("{:L}"), F(-1.234567e1));
+  test(STR("-123.4567"), STR("{:L}"), F(-1.234567e2));
+  test(STR("-1,234.567"), STR("{:L}"), F(-1.234567e3));
+  test(STR("-12,345.67"), STR("{:L}"), F(-1.234567e4));
+  test(STR("-123,456.7"), STR("{:L}"), F(-1.234567e5));
+  test(STR("-1,234,567"), STR("{:L}"), F(-1.234567e6));
+  test(STR("-12,345,670"), STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123,456,700"), STR("{:L}"), F(-1.234567e8));
+    test(STR("-1,234,567,000"), STR("{:L}"), F(-1.234567e9));
+    test(STR("-12,345,670,000"), STR("{:L}"), F(-1.234567e10));
+    test(STR("-123,456,700,000"), STR("{:L}"), F(-1.234567e11));
+    test(STR("-1.234567e+12"), STR("{:L}"), F(-1.234567e12));
+    test(STR("-1.234567e+13"), STR("{:L}"), F(-1.234567e13));
+  }
+
+  std::locale::global(loc);
+  test(STR("1#234567e-06"), STR("{:L}"), F(1.234567e-6));
+  test(STR("1#234567e-05"), STR("{:L}"), F(1.234567e-5));
+  test(STR("0#0001234567"), STR("{:L}"), F(1.234567e-4));
+  test(STR("0#001234567"), STR("{:L}"), F(1.234567e-3));
+  test(STR("0#01234567"), STR("{:L}"), F(1.234567e-2));
+  test(STR("0#1234567"), STR("{:L}"), F(1.234567e-1));
+  test(STR("1#234567"), STR("{:L}"), F(1.234567e0));
+  test(STR("1_2#34567"), STR("{:L}"), F(1.234567e1));
+  test(STR("12_3#4567"), STR("{:L}"), F(1.234567e2));
+  test(STR("1_23_4#567"), STR("{:L}"), F(1.234567e3));
+  test(STR("12_34_5#67"), STR("{:L}"), F(1.234567e4));
+  test(STR("123_45_6#7"), STR("{:L}"), F(1.234567e5));
+  test(STR("1_234_56_7"), STR("{:L}"), F(1.234567e6));
+  test(STR("12_345_67_0"), STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("1_23_456_70_0"), STR("{:L}"), F(1.234567e8));
+    test(STR("1_2_34_567_00_0"), STR("{:L}"), F(1.234567e9));
+    test(STR("1_2_3_45_670_00_0"), STR("{:L}"), F(1.234567e10));
+    test(STR("1_2_3_4_56_700_00_0"), STR("{:L}"), F(1.234567e11));
+    test(STR("1#234567e+12"), STR("{:L}"), F(1.234567e12));
+    test(STR("1#234567e+13"), STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1#234567e-06"), STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1#234567e-05"), STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0#0001234567"), STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0#001234567"), STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0#01234567"), STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0#1234567"), STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1#234567"), STR("{:L}"), F(-1.234567e0));
+  test(STR("-1_2#34567"), STR("{:L}"), F(-1.234567e1));
+  test(STR("-12_3#4567"), STR("{:L}"), F(-1.234567e2));
+  test(STR("-1_23_4#567"), STR("{:L}"), F(-1.234567e3));
+  test(STR("-12_34_5#67"), STR("{:L}"), F(-1.234567e4));
+  test(STR("-123_45_6#7"), STR("{:L}"), F(-1.234567e5));
+  test(STR("-1_234_56_7"), STR("{:L}"), F(-1.234567e6));
+  test(STR("-12_345_67_0"), STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-1_23_456_70_0"), STR("{:L}"), F(-1.234567e8));
+    test(STR("-1_2_34_567_00_0"), STR("{:L}"), F(-1.234567e9));
+    test(STR("-1_2_3_45_670_00_0"), STR("{:L}"), F(-1.234567e10));
+    test(STR("-1_2_3_4_56_700_00_0"), STR("{:L}"), F(-1.234567e11));
+    test(STR("-1#234567e+12"), STR("{:L}"), F(-1.234567e12));
+    test(STR("-1#234567e+13"), STR("{:L}"), F(-1.234567e13));
+  }
+
+  test(STR("1.234567e-06"), en_US, STR("{:L}"), F(1.234567e-6));
+  test(STR("1.234567e-05"), en_US, STR("{:L}"), F(1.234567e-5));
+  test(STR("0.0001234567"), en_US, STR("{:L}"), F(1.234567e-4));
+  test(STR("0.001234567"), en_US, STR("{:L}"), F(1.234567e-3));
+  test(STR("0.01234567"), en_US, STR("{:L}"), F(1.234567e-2));
+  test(STR("0.1234567"), en_US, STR("{:L}"), F(1.234567e-1));
+  test(STR("1.234567"), en_US, STR("{:L}"), F(1.234567e0));
+  test(STR("12.34567"), en_US, STR("{:L}"), F(1.234567e1));
+  test(STR("123.4567"), en_US, STR("{:L}"), F(1.234567e2));
+  test(STR("1,234.567"), en_US, STR("{:L}"), F(1.234567e3));
+  test(STR("12,345.67"), en_US, STR("{:L}"), F(1.234567e4));
+  test(STR("123,456.7"), en_US, STR("{:L}"), F(1.234567e5));
+  test(STR("1,234,567"), en_US, STR("{:L}"), F(1.234567e6));
+  test(STR("12,345,670"), en_US, STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123,456,700"), en_US, STR("{:L}"), F(1.234567e8));
+    test(STR("1,234,567,000"), en_US, STR("{:L}"), F(1.234567e9));
+    test(STR("12,345,670,000"), en_US, STR("{:L}"), F(1.234567e10));
+    test(STR("123,456,700,000"), en_US, STR("{:L}"), F(1.234567e11));
+    test(STR("1.234567e+12"), en_US, STR("{:L}"), F(1.234567e12));
+    test(STR("1.234567e+13"), en_US, STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1.234567e-06"), en_US, STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1.234567e-05"), en_US, STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0.0001234567"), en_US, STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0.001234567"), en_US, STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0.01234567"), en_US, STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0.1234567"), en_US, STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1.234567"), en_US, STR("{:L}"), F(-1.234567e0));
+  test(STR("-12.34567"), en_US, STR("{:L}"), F(-1.234567e1));
+  test(STR("-123.4567"), en_US, STR("{:L}"), F(-1.234567e2));
+  test(STR("-1,234.567"), en_US, STR("{:L}"), F(-1.234567e3));
+  test(STR("-12,345.67"), en_US, STR("{:L}"), F(-1.234567e4));
+  test(STR("-123,456.7"), en_US, STR("{:L}"), F(-1.234567e5));
+  test(STR("-1,234,567"), en_US, STR("{:L}"), F(-1.234567e6));
+  test(STR("-12,345,670"), en_US, STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123,456,700"), en_US, STR("{:L}"), F(-1.234567e8));
+    test(STR("-1,234,567,000"), en_US, STR("{:L}"), F(-1.234567e9));
+    test(STR("-12,345,670,000"), en_US, STR("{:L}"), F(-1.234567e10));
+    test(STR("-123,456,700,000"), en_US, STR("{:L}"), F(-1.234567e11));
+    test(STR("-1.234567e+12"), en_US, STR("{:L}"), F(-1.234567e12));
+    test(STR("-1.234567e+13"), en_US, STR("{:L}"), F(-1.234567e13));
+  }
+
+  std::locale::global(en_US);
+  test(STR("1#234567e-06"), loc, STR("{:L}"), F(1.234567e-6));
+  test(STR("1#234567e-05"), loc, STR("{:L}"), F(1.234567e-5));
+  test(STR("0#0001234567"), loc, STR("{:L}"), F(1.234567e-4));
+  test(STR("0#001234567"), loc, STR("{:L}"), F(1.234567e-3));
+  test(STR("0#01234567"), loc, STR("{:L}"), F(1.234567e-2));
+  test(STR("0#1234567"), loc, STR("{:L}"), F(1.234567e-1));
+  test(STR("1#234567"), loc, STR("{:L}"), F(1.234567e0));
+  test(STR("1_2#34567"), loc, STR("{:L}"), F(1.234567e1));
+  test(STR("12_3#4567"), loc, STR("{:L}"), F(1.234567e2));
+  test(STR("1_23_4#567"), loc, STR("{:L}"), F(1.234567e3));
+  test(STR("12_34_5#67"), loc, STR("{:L}"), F(1.234567e4));
+  test(STR("123_45_6#7"), loc, STR("{:L}"), F(1.234567e5));
+  test(STR("1_234_56_7"), loc, STR("{:L}"), F(1.234567e6));
+  test(STR("12_345_67_0"), loc, STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("1_23_456_70_0"), loc, STR("{:L}"), F(1.234567e8));
+    test(STR("1_2_34_567_00_0"), loc, STR("{:L}"), F(1.234567e9));
+    test(STR("1_2_3_45_670_00_0"), loc, STR("{:L}"), F(1.234567e10));
+    test(STR("1_2_3_4_56_700_00_0"), loc, STR("{:L}"), F(1.234567e11));
+    test(STR("1#234567e+12"), loc, STR("{:L}"), F(1.234567e12));
+    test(STR("1#234567e+13"), loc, STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1#234567e-06"), loc, STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1#234567e-05"), loc, STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0#0001234567"), loc, STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0#001234567"), loc, STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0#01234567"), loc, STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0#1234567"), loc, STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1#234567"), loc, STR("{:L}"), F(-1.234567e0));
+  test(STR("-1_2#34567"), loc, STR("{:L}"), F(-1.234567e1));
+  test(STR("-12_3#4567"), loc, STR("{:L}"), F(-1.234567e2));
+  test(STR("-1_23_4#567"), loc, STR("{:L}"), F(-1.234567e3));
+  test(STR("-12_34_5#67"), loc, STR("{:L}"), F(-1.234567e4));
+  test(STR("-123_45_6#7"), loc, STR("{:L}"), F(-1.234567e5));
+  test(STR("-1_234_56_7"), loc, STR("{:L}"), F(-1.234567e6));
+  test(STR("-12_345_67_0"), loc, STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-1_23_456_70_0"), loc, STR("{:L}"), F(-1.234567e8));
+    test(STR("-1_2_34_567_00_0"), loc, STR("{:L}"), F(-1.234567e9));
+    test(STR("-1_2_3_45_670_00_0"), loc, STR("{:L}"), F(-1.234567e10));
+    test(STR("-1_2_3_4_56_700_00_0"), loc, STR("{:L}"), F(-1.234567e11));
+    test(STR("-1#234567e+12"), loc, STR("{:L}"), F(-1.234567e12));
+    test(STR("-1#234567e+13"), loc, STR("{:L}"), F(-1.234567e13));
+  }
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.567$$$"), STR("{:$<12L}"), F(1.234567e3));
+  test(STR("$$$1,234.567"), STR("{:$>12L}"), F(1.234567e3));
+  test(STR("$1,234.567$$"), STR("{:$^12L}"), F(1.234567e3));
+  test(STR("0001,234.567"), STR("{:012L}"), F(1.234567e3));
+  test(STR("-1,234.567$$$"), STR("{:$<13L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.567"), STR("{:$>13L}"), F(-1.234567e3));
+  test(STR("$-1,234.567$$"), STR("{:$^13L}"), F(-1.234567e3));
+  test(STR("-0001,234.567"), STR("{:013L}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#567$$$"), STR("{:$<13L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#567"), STR("{:$>13L}"), F(1.234567e3));
+  test(STR("$1_23_4#567$$"), STR("{:$^13L}"), F(1.234567e3));
+  test(STR("0001_23_4#567"), STR("{:013L}"), F(1.234567e3));
+  test(STR("-1_23_4#567$$$"), STR("{:$<14L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#567"), STR("{:$>14L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#567$$"), STR("{:$^14L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#567"), STR("{:014L}"), F(-1.234567e3));
+
+  test(STR("1,234.567$$$"), en_US, STR("{:$<12L}"), F(1.234567e3));
+  test(STR("$$$1,234.567"), en_US, STR("{:$>12L}"), F(1.234567e3));
+  test(STR("$1,234.567$$"), en_US, STR("{:$^12L}"), F(1.234567e3));
+  test(STR("0001,234.567"), en_US, STR("{:012L}"), F(1.234567e3));
+  test(STR("-1,234.567$$$"), en_US, STR("{:$<13L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.567"), en_US, STR("{:$>13L}"), F(-1.234567e3));
+  test(STR("$-1,234.567$$"), en_US, STR("{:$^13L}"), F(-1.234567e3));
+  test(STR("-0001,234.567"), en_US, STR("{:013L}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#567$$$"), loc, STR("{:$<13L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#567"), loc, STR("{:$>13L}"), F(1.234567e3));
+  test(STR("$1_23_4#567$$"), loc, STR("{:$^13L}"), F(1.234567e3));
+  test(STR("0001_23_4#567"), loc, STR("{:013L}"), F(1.234567e3));
+  test(STR("-1_23_4#567$$$"), loc, STR("{:$<14L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#567"), loc, STR("{:$>14L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#567$$"), loc, STR("{:$^14L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#567"), loc, STR("{:014L}"), F(-1.234567e3));
+}
+
+template 
+void test_floating_point_default_precision() {
+  std::locale loc = std::locale(std::locale(), new numpunct());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23457e-06"), STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0.000123457"), STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0.00123457"), STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0.0123457"), STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1.23457"), STR("{:.6L}"), F(1.234567e0));
+  test(STR("12.3457"), STR("{:.6L}"), F(1.234567e1));
+  test(STR("123.457"), STR("{:.6L}"), F(1.234567e2));
+  test(STR("1,234.57"), STR("{:.6L}"), F(1.234567e3));
+  test(STR("12,345.7"), STR("{:.6L}"), F(1.234567e4));
+  test(STR("123,457"), STR("{:.6L}"), F(1.234567e5));
+  test(STR("1.23457e+06"), STR("{:.6L}"), F(1.234567e6));
+  test(STR("1.23457e+07"), STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1.23457"), STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-12.3457"), STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-123.457"), STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1,234.57"), STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12,345.7"), STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123,457"), STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), STR("{:.6L}"), F(-1.234567e7));
+
+  std::locale::global(loc);
+  test(STR("1#23457e-06"), STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0#000123457"), STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0#00123457"), STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0#0123457"), STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1#23457"), STR("{:.6L}"), F(1.234567e0));
+  test(STR("1_2#3457"), STR("{:.6L}"), F(1.234567e1));
+  test(STR("12_3#457"), STR("{:.6L}"), F(1.234567e2));
+  test(STR("1_23_4#57"), STR("{:.6L}"), F(1.234567e3));
+  test(STR("12_34_5#7"), STR("{:.6L}"), F(1.234567e4));
+  test(STR("123_45_7"), STR("{:.6L}"), F(1.234567e5));
+  test(STR("1#23457e+06"), STR("{:.6L}"), F(1.234567e6));
+  test(STR("1#23457e+07"), STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1#23457"), STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-12_3#457"), STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123_45_7"), STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), STR("{:.6L}"), F(-1.234567e7));
+
+  test(STR("1.23457e-06"), en_US, STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), en_US, STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0.000123457"), en_US, STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0.00123457"), en_US, STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0.0123457"), en_US, STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1.23457"), en_US, STR("{:.6L}"), F(1.234567e0));
+  test(STR("12.3457"), en_US, STR("{:.6L}"), F(1.234567e1));
+  test(STR("123.457"), en_US, STR("{:.6L}"), F(1.234567e2));
+  test(STR("1,234.57"), en_US, STR("{:.6L}"), F(1.234567e3));
+  test(STR("12,345.7"), en_US, STR("{:.6L}"), F(1.234567e4));
+  test(STR("123,457"), en_US, STR("{:.6L}"), F(1.234567e5));
+  test(STR("1.23457e+06"), en_US, STR("{:.6L}"), F(1.234567e6));
+  test(STR("1.23457e+07"), en_US, STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), en_US, STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), en_US, STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), en_US, STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), en_US, STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), en_US, STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1.23457"), en_US, STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-12.3457"), en_US, STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-123.457"), en_US, STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1,234.57"), en_US, STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12,345.7"), en_US, STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123,457"), en_US, STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), en_US, STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), en_US, STR("{:.6L}"), F(-1.234567e7));
+
+  std::locale::global(en_US);
+  test(STR("1#23457e-06"), loc, STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), loc, STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0#000123457"), loc, STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0#00123457"), loc, STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0#0123457"), loc, STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1#23457"), loc, STR("{:.6L}"), F(1.234567e0));
+  test(STR("1_2#3457"), loc, STR("{:.6L}"), F(1.234567e1));
+  test(STR("12_3#457"), loc, STR("{:.6L}"), F(1.234567e2));
+  test(STR("1_23_4#57"), loc, STR("{:.6L}"), F(1.234567e3));
+  test(STR("12_34_5#7"), loc, STR("{:.6L}"), F(1.234567e4));
+  test(STR("123_45_7"), loc, STR("{:.6L}"), F(1.234567e5));
+  test(STR("1#23457e+06"), loc, STR("{:.6L}"), F(1.234567e6));
+  test(STR("1#23457e+07"), loc, STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), loc, STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), loc, STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), loc, STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), loc, STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), loc, STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1#23457"), loc, STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), loc, STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-12_3#457"), loc, STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), loc, STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), loc, STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123_45_7"), loc, STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), loc, STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), loc, STR("{:.6L}"), F(-1.234567e7));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.57$$$"), STR("{:$<11.6L}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), STR("{:$>11.6L}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), STR("{:$^11.6L}"), F(1.234567e3));
+  test(STR("0001,234.57"), STR("{:011.6L}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), STR("{:$<12.6L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), STR("{:$>12.6L}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), STR("{:$^12.6L}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), STR("{:012.6L}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#57$$$"), STR("{:$<12.6L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), STR("{:$>12.6L}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), STR("{:$^12.6L}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), STR("{:012.6L}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), STR("{:$<13.6L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), STR("{:$>13.6L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), STR("{:$^13.6L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), STR("{:013.6L}"), F(-1.234567e3));
+
+  test(STR("1,234.57$$$"), en_US, STR("{:$<11.6L}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), en_US, STR("{:$>11.6L}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), en_US, STR("{:$^11.6L}"), F(1.234567e3));
+  test(STR("0001,234.57"), en_US, STR("{:011.6L}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), en_US, STR("{:$<12.6L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), en_US, STR("{:$>12.6L}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), en_US, STR("{:$^12.6L}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), en_US, STR("{:012.6L}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#57$$$"), loc, STR("{:$<12.6L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), loc, STR("{:$>12.6L}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), loc, STR("{:$^12.6L}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), loc, STR("{:012.6L}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), loc, STR("{:$<13.6L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), loc, STR("{:$>13.6L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), loc, STR("{:$^13.6L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), loc, STR("{:013.6L}"), F(-1.234567e3));
+}
+
+template 
+void test_floating_point() {
+  test_floating_point_hex_lower_case();
+  test_floating_point_hex_upper_case();
+  test_floating_point_hex_lower_case_precision();
+  test_floating_point_hex_upper_case_precision();
+
+  test_floating_point_scientific_lower_case();
+  test_floating_point_scientific_upper_case();
+
+  test_floating_point_fixed_lower_case();
+  test_floating_point_fixed_upper_case();
+
+  test_floating_point_general_lower_case();
+  test_floating_point_general_upper_case();
+
+  test_floating_point_default();
+  test_floating_point_default_precision();
+}
+
 template 
 void test() {
   test_bool();
   test_integer();
+  test_floating_point();
+  test_floating_point();
+  test_floating_point();
 }
 
 int main(int, char**) {

From 787ccd345cbb3a569ba751580bb806552b4b6e57 Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Sun, 28 Nov 2021 14:43:43 +0100
Subject: [PATCH 419/946] [libc++][format] Adds formatter pointer.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This implements the last required formatter specialization.

Completes:
- LWG 3251 Are std::format alignment specifiers applied to string arguments?
- LWG 3340 Formatting functions should throw on argument/format string mismatch in §[format.functions]
- LWG 3540 §[format.arg] There should be no const in basic_format_arg(const T* p)

Implements parts of:
- P0645 Text Formatting

Depends on D114001

Reviewed By: ldionne, vitaut, #libc

Differential Revision: https://reviews.llvm.org/D115988
---
 libcxx/docs/Status/Cxx20Issues.csv            |   4 +-
 libcxx/docs/Status/Cxx2bIssues.csv            |   2 +-
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__format/format_arg.h          |   4 +-
 libcxx/include/__format/formatter_pointer.h   |  91 +++++++
 .../include/__format/parser_std_format_spec.h | 103 ++++++-
 libcxx/include/format                         |   1 +
 libcxx/include/module.modulemap               |   1 +
 .../formatter_pointer.module.verify.cpp       |  15 ++
 .../format.arg/visit_format_arg.pass.cpp      |   4 +
 .../std_format_spec_pointer.pass.cpp          | 254 ++++++++++++++++++
 .../formatter.pointer.pass.cpp                | 107 ++++++++
 .../format/format.functions/format_tests.h    |  47 ++++
 13 files changed, 629 insertions(+), 5 deletions(-)
 create mode 100644 libcxx/include/__format/formatter_pointer.h
 create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp
 create mode 100644 libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp
 create mode 100644 libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp

diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv
index 034fac3ca22f8..daa6466f87515 100644
--- a/libcxx/docs/Status/Cxx20Issues.csv
+++ b/libcxx/docs/Status/Cxx20Issues.csv
@@ -207,7 +207,7 @@
 "`3247 `__","``ranges::iter_move``\  should perform ADL-only lookup of ``iter_move``\ ","Prague","","","|ranges|"
 "`3248 `__","``std::format``\  ``#b``\ , ``#B``\ , ``#o``\ , ``#x``\ , and ``#X``\   presentation types misformat negative numbers","Prague","|Complete|","14.0","|format|"
 "`3250 `__","``std::format``\ : ``#``\  (alternate form) for NaN and inf","Prague","|Complete|","14.0","|format|"
-"`3251 `__","Are ``std::format``\  alignment specifiers applied to string arguments?","Prague","","","|format|"
+"`3251 `__","Are ``std::format``\  alignment specifiers applied to string arguments?","Prague","|Complete|","14.0","|format|"
 "`3252 `__","Parse locale's aware modifiers for commands are not consistent with POSIX spec","Prague","","","|chrono|"
 "`3254 `__","Strike ``stop_token``\ 's ``operator!=``\ ","Prague","",""
 "`3255 `__","``span``\ 's ``array``\  constructor is too strict","Prague","|Complete|",""
@@ -256,7 +256,7 @@
 "`3334 `__","``basic_osyncstream``\  move assignment and destruction calls ``basic_syncbuf::emit()``\  twice","Prague","",""
 "`3335 `__","Resolve C++20 NB comments US 273 and GB 274","Prague","","","|ranges|"
 "`3338 `__","Rename ``default_constructible``\  to ``default_initializable``\ ","Prague","|Complete|","13.0"
-"`3340 `__","Formatting functions should throw on argument/format string mismatch in |sect|\ [format.functions]","Prague","","","|format|"
+"`3340 `__","Formatting functions should throw on argument/format string mismatch in |sect|\ [format.functions]","Prague","|Complete|","14.0","|format|"
 "`3346 `__","``pair``\  and ``tuple``\  copy and move constructor have backwards specification","Prague","",""
 "`3347 `__","``std::pair``\  now requires ``T``\  and ``U``\  to be less-than-comparable","Prague","",""
 "`3348 `__","``__cpp_lib_unwrap_ref``\  in wrong header","Prague","|Complete|","12.0"
diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 27bc1e5832967..cbc12d27a5614 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -84,7 +84,7 @@
 `3533 `__,"Make ``base() const &`` consistent across iterator wrappers that supports ``input_iterators``","June 2021","","","|ranges|"
 `3536 `__,"Should ``chrono::from_stream()`` assign zero to duration for failure?","June 2021","","","|chrono|"
 `3539 `__,"``format_to`` must not copy models of ``output_iterator``","June 2021","","","|format|"
-`3540 `__,"§[format.arg] There should be no const in ``basic_format_arg(const T* p)``","June 2021","","","|format|"
+`3540 `__,"§[format.arg] There should be no const in ``basic_format_arg(const T* p)``","June 2021","|Complete|","14.0","|format|"
 `3541 `__,"``indirectly_readable_traits`` should be SFINAE-friendly for all types","June 2021","|Complete|","14.0","|ranges|"
 `3542 `__,"``basic_format_arg`` mishandles ``basic_string_view`` with custom traits","June 2021","|Complete|","14.0","|format|"
 `3543 `__,"Definition of when ``counted_iterators`` refer to the same sequence isn't quite right","June 2021","","","|ranges|"
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 3886ddba8e4e5..278e848e6ecbf 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -181,6 +181,7 @@ set(files
   __format/formatter_floating_point.h
   __format/formatter_integer.h
   __format/formatter_integral.h
+  __format/formatter_pointer.h
   __format/formatter_string.h
   __format/parser_std_format_spec.h
   __functional/binary_function.h
diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h
index 59429c13d4154..2153287a8a61c 100644
--- a/libcxx/include/__format/format_arg.h
+++ b/libcxx/include/__format/format_arg.h
@@ -245,7 +245,9 @@ class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
   explicit basic_format_arg(nullptr_t) noexcept
       : __ptr(nullptr), __type_(__format::__arg_t::__ptr) {}
 
-  // TODO FMT Implement the _Tp* constructor.
+  template 
+  requires is_void_v<_Tp> _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(_Tp* __p) noexcept
+      : __ptr(__p), __type_(__format::__arg_t::__ptr) {}
 };
 
 #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
diff --git a/libcxx/include/__format/formatter_pointer.h b/libcxx/include/__format/formatter_pointer.h
new file mode 100644
index 0000000000000..aa2eb641c6c6d
--- /dev/null
+++ b/libcxx/include/__format/formatter_pointer.h
@@ -0,0 +1,91 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_POINTER_H
+#define _LIBCPP___FORMAT_FORMATTER_POINTER_H
+
+#include <__algorithm/copy.h>
+#include <__availability>
+#include <__config>
+#include <__debug>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/formatter.h>
+#include <__format/formatter_integral.h>
+#include <__format/parser_std_format_spec.h>
+#include <__iterator/access.h>
+#include <__nullptr>
+#include 
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#  if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatter_pointer : public __parser_pointer<_CharT> {
+public:
+  _LIBCPP_HIDE_FROM_ABI auto format(const void* __ptr, auto& __ctx) -> decltype(__ctx.out()) {
+    _LIBCPP_ASSERT(this->__alignment != _Flags::_Alignment::__default,
+                   "The call to parse should have updated the alignment");
+    if (this->__width_needs_substitution())
+      this->__substitute_width_arg_id(__ctx.arg(this->__width));
+
+    // This code looks a lot like the code to format a hexadecimal integral,
+    // but that code isn't public. Making that code public requires some
+    // refactoring.
+    // TODO FMT Remove code duplication.
+    char __buffer[2 + 2 * sizeof(uintptr_t)];
+    __buffer[0] = '0';
+    __buffer[1] = 'x';
+    char* __last = __to_buffer(__buffer + 2, _VSTD::end(__buffer), reinterpret_cast(__ptr), 16);
+
+    unsigned __size = __last - __buffer;
+    if (__size >= this->__width)
+      return _VSTD::copy(__buffer, __last, __ctx.out());
+
+    return __formatter::__write(__ctx.out(), __buffer, __last, __size, this->__width, this->__fill, this->__alignment);
+  }
+};
+
+} // namespace __format_spec
+
+// [format.formatter.spec]/2.4
+// For each charT, the pointer type specializations template<>
+// - struct formatter;
+// - template<> struct formatter;
+// - template<> struct formatter;
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
+    : public __format_spec::__formatter_pointer<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
+    : public __format_spec::__formatter_pointer<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
+    : public __format_spec::__formatter_pointer<_CharT> {};
+
+#  endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_FORMATTER_POINTER_H
diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index 75fc626c84ac1..9d893e9ced27f 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -826,7 +826,108 @@ class _LIBCPP_TEMPLATE_VIS __parser_floating_point
   }
 };
 
-// TODO FMT Add a parser for pointer values.
+/**
+ * The parser for the std-format-spec.
+ *
+ * This implements the parser for the pointer types.
+ *
+ * See @ref __parser_string.
+ */
+template 
+class _LIBCPP_TEMPLATE_VIS __parser_pointer : public __parser_width,              // provides __width(|as_arg)
+                                              public __parser_fill_align<_CharT>, // provides __fill and uses __flags
+                                              public _Flags                       // provides __flags
+{
+public:
+  using char_type = _CharT;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __parser_pointer() {
+    // Implements LWG3612 Inconsistent pointer alignment in std::format.
+    // The issue's current status is "Tentatively Ready" and libc++ status is
+    // still experimental.
+    //
+    // TODO FMT Validate this with the final resolution of LWG3612.
+    this->__alignment = _Flags::_Alignment::__right;
+  }
+
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) {
+    auto __it = __parse(__parse_ctx);
+    __process_display_type();
+    return __it;
+  }
+
+protected:
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) {
+    auto __begin = __parse_ctx.begin();
+    auto __end = __parse_ctx.end();
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    // An integer presentation type isn't defined in the Standard.
+    // Since a pointer is formatted as an integer it can be argued it's an
+    // integer presentation type. However there are two LWG-issues asserting it
+    // isn't an integer presentation type:
+    // - LWG3612 Inconsistent pointer alignment in std::format
+    // - LWG3644 std::format does not define "integer presentation type"
+    //
+    // There's a paper to make additional clarifications on the status of
+    // formatting pointers and proposes additional fields to be valid. That
+    // paper hasn't been reviewed by the Committee yet.
+    // - P2510 Formatting pointers
+    //
+    // The current implementation assumes formatting pointers isn't covered by
+    // "integer presentation type".
+    // TODO FMT Apply the LWG-issues/papers after approval/rejection by the Committee.
+
+    __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
+
+    if (__begin != __end && *__begin != _CharT('}'))
+      __throw_format_error("The format-spec should consume the input or end with a '}'");
+
+    return __begin;
+  }
+
+  /** Processes the parsed std-format-spec based on the parsed display type. */
+  _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
+    switch (this->__type) {
+    case _Flags::_Type::__default:
+      this->__type = _Flags::_Type::__pointer;
+      break;
+    case _Flags::_Type::__pointer:
+      break;
+    default:
+      __throw_format_error("The format-spec type has a type not supported for a pointer argument");
+    }
+  }
+};
 
 /** Helper struct returned from @ref __get_string_alignment. */
 template 
diff --git a/libcxx/include/format b/libcxx/include/format
index 3a186469dd5c0..53de4a0e6ca06 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -279,6 +279,7 @@ namespace std {
 #include <__format/formatter_char.h>
 #include <__format/formatter_floating_point.h>
 #include <__format/formatter_integer.h>
+#include <__format/formatter_pointer.h>
 #include <__format/formatter_string.h>
 #include <__format/parser_std_format_spec.h>
 #include <__variant/monostate.h>
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index c940a6d11d816..90fae9bb8362d 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -506,6 +506,7 @@ module std [system] {
       module formatter_floating_point { private header "__format/formatter_floating_point.h" }
       module formatter_integer        { private header "__format/formatter_integer.h" }
       module formatter_integral       { private header "__format/formatter_integral.h" }
+      module formatter_pointer        { private header "__format/formatter_pointer.h" }
       module formatter_string         { private header "__format/formatter_string.h" }
       module parser_std_format_spec   { private header "__format/parser_std_format_spec.h" }
     }
diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp
new file mode 100644
index 0000000000000..abb82de85f37a
--- /dev/null
+++ b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: modules-build
+
+// WARNING: This test was generated by 'generate_private_header_tests.py'
+// and should not be edited manually.
+
+// expected-error@*:* {{use of private header from outside its module: '__format/formatter_pointer.h'}}
+#include <__format/formatter_pointer.h>
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
index b6e34d78c8ac8..76548c97d77ba 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
@@ -346,6 +346,10 @@ void test() {
   // Test pointer types.
 
   test(nullptr);
+  int i = 0;
+  test(static_cast(&i));
+  const int ci = 0;
+  test(static_cast(&ci));
 }
 
 void test() {
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp
new file mode 100644
index 0000000000000..7a34bbeb8e25a
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp
@@ -0,0 +1,254 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// 
+
+// Tests the parsing of the format string as specified in [format.string.std].
+// It validates whether the std-format-spec is valid for a pointer type.
+
+#include 
+#include 
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include 
+#endif
+
+#include "concepts_precision.h"
+#include "test_macros.h"
+#include "make_string.h"
+#include "test_exception.h"
+
+#define CSTR(S) MAKE_CSTRING(CharT, S)
+
+using namespace std::__format_spec;
+
+template 
+using Parser = __parser_pointer;
+
+template 
+struct Expected {
+  CharT fill = CharT(' ');
+  _Flags::_Alignment alignment = _Flags::_Alignment::__right;
+  _Flags::_Sign sign = _Flags::_Sign::__default;
+  bool alternate_form = false;
+  bool zero_padding = false;
+  uint32_t width = 0;
+  bool width_as_arg = false;
+  bool locale_specific_form = false;
+  _Flags::_Type type = _Flags::_Type::__pointer;
+};
+
+template 
+constexpr void test(Expected expected, size_t size, std::basic_string_view fmt) {
+  // Initialize parser with sufficient arguments to avoid the parsing to fail
+  // due to insufficient arguments.
+  std::basic_format_parse_context parse_ctx(fmt, std::__format::__number_max);
+  auto begin = parse_ctx.begin();
+  auto end = parse_ctx.end();
+  Parser parser;
+  auto it = parser.parse(parse_ctx);
+
+  assert(begin == parse_ctx.begin());
+  assert(end == parse_ctx.end());
+
+  assert(begin + size == it);
+  assert(parser.__fill == expected.fill);
+  assert(parser.__alignment == expected.alignment);
+  assert(parser.__sign == expected.sign);
+  assert(parser.__alternate_form == expected.alternate_form);
+  assert(parser.__zero_padding == expected.zero_padding);
+  assert(parser.__width == expected.width);
+  assert(parser.__width_as_arg == expected.width_as_arg);
+  assert(parser.__locale_specific_form == expected.locale_specific_form);
+  assert(parser.__type == expected.type);
+}
+
+template 
+constexpr void test(Expected expected, size_t size, const CharT* f) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  std::basic_string_view fmt{f};
+  assert(fmt.back() == CharT('}') && "Pre-condition failure");
+
+  test(expected, size, fmt);
+  fmt.remove_suffix(1);
+  test(expected, size, fmt);
+}
+
+template 
+constexpr void test() {
+  Parser parser;
+
+  assert(parser.__fill == CharT(' '));
+  assert(parser.__alignment == _Flags::_Alignment::__right);
+  assert(parser.__sign == _Flags::_Sign::__default);
+  assert(parser.__alternate_form == false);
+  assert(parser.__zero_padding == false);
+  assert(parser.__width == 0);
+  assert(parser.__width_as_arg == false);
+  assert(parser.__locale_specific_form == false);
+  assert(parser.__type == _Flags::_Type::__default);
+
+  test({}, 0, CSTR("}"));
+
+  // *** Align-fill ***
+  test({.alignment = _Flags::_Alignment::__left}, 1, CSTR("<}"));
+  test({.alignment = _Flags::_Alignment::__center}, 1, "^}");
+  test({.alignment = _Flags::_Alignment::__right}, 1, ">}");
+
+  test({.fill = CharT('L'), .alignment = _Flags::_Alignment::__left}, 2, CSTR("L<}"));
+  test({.fill = CharT('#'), .alignment = _Flags::_Alignment::__center}, 2, CSTR("#^}"));
+  test({.fill = CharT('0'), .alignment = _Flags::_Alignment::__right}, 2, CSTR("0>}"));
+
+  test_exception>("The format-spec fill field contains an invalid character", CSTR("{<"));
+  test_exception>("The format-spec fill field contains an invalid character", CSTR("}<"));
+
+  // *** Sign ***
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR("+"));
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR("-"));
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR(" "));
+
+  // *** Alternate form ***
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR("#"));
+
+  // *** Zero padding ***
+  test_exception>("A format-spec width field shouldn't have a leading zero", CSTR("0"));
+
+  // *** Width ***
+  test({.width = 0, .width_as_arg = false}, 0, CSTR("}"));
+  test({.width = 1, .width_as_arg = false}, 1, CSTR("1}"));
+  test({.width = 10, .width_as_arg = false}, 2, CSTR("10}"));
+  test({.width = 1000, .width_as_arg = false}, 4, CSTR("1000}"));
+  test({.width = 1000000, .width_as_arg = false}, 7, CSTR("1000000}"));
+
+  test({.width = 0, .width_as_arg = true}, 2, CSTR("{}}"));
+  test({.width = 0, .width_as_arg = true}, 3, CSTR("{0}}"));
+  test({.width = 1, .width_as_arg = true}, 3, CSTR("{1}}"));
+
+  test_exception>("A format-spec width field shouldn't have a leading zero", CSTR("00"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  test({.width = 2'147'483'647, .width_as_arg = false}, 10, CSTR("2147483647}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("2147483648"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("5000000000"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("10000000000"));
+
+  test_exception>("End of input while parsing format-spec arg-id", CSTR("{"));
+  test_exception>("Invalid arg-id", CSTR("{0"));
+  test_exception>("The arg-id of the format-spec starts with an invalid character", CSTR("{a"));
+  test_exception>("Invalid arg-id", CSTR("{1"));
+  test_exception>("Invalid arg-id", CSTR("{9"));
+  test_exception>("Invalid arg-id", CSTR("{9:"));
+  test_exception>("Invalid arg-id", CSTR("{9a"));
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  // Note the static_assert tests whether the arg-id is valid.
+  // Therefore the following should be true arg-id < __format::__number_max.
+  test({.width = 2'147'483'646, .width_as_arg = true}, 12, CSTR("{2147483646}}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("{2147483648}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("{5000000000}"));
+  test_exception>("The numeric value of the format-spec is too large", CSTR("{10000000000}"));
+
+  // *** Precision ***
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR("."));
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR(".1"));
+
+  // *** Locale-specific form ***
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR("L"));
+
+  // *** Type ***
+  {
+    const char* unsuported_type = "The format-spec type has a type not supported for a pointer argument";
+    const char* not_a_type = "The format-spec should consume the input or end with a '}'";
+
+    test_exception>(unsuported_type, CSTR("A}"));
+    test_exception>(unsuported_type, CSTR("B}"));
+    test_exception>(not_a_type, CSTR("C}"));
+    test_exception>(not_a_type, CSTR("D}"));
+    test_exception>(unsuported_type, CSTR("E}"));
+    test_exception>(unsuported_type, CSTR("F}"));
+    test_exception>(unsuported_type, CSTR("G}"));
+    test_exception>(not_a_type, CSTR("H}"));
+    test_exception>(not_a_type, CSTR("I}"));
+    test_exception>(not_a_type, CSTR("J}"));
+    test_exception>(not_a_type, CSTR("K}"));
+    test_exception>("The format-spec should consume the input or end with a '}'", CSTR("L"));
+    test_exception>(not_a_type, CSTR("M}"));
+    test_exception>(not_a_type, CSTR("N}"));
+    test_exception>(not_a_type, CSTR("O}"));
+    test_exception>(not_a_type, CSTR("P}"));
+    test_exception>(not_a_type, CSTR("Q}"));
+    test_exception>(not_a_type, CSTR("R}"));
+    test_exception>(not_a_type, CSTR("S}"));
+    test_exception>(not_a_type, CSTR("T}"));
+    test_exception>(not_a_type, CSTR("U}"));
+    test_exception>(not_a_type, CSTR("V}"));
+    test_exception>(not_a_type, CSTR("W}"));
+    test_exception>(unsuported_type, CSTR("X}"));
+    test_exception>(not_a_type, CSTR("Y}"));
+    test_exception>(not_a_type, CSTR("Z}"));
+
+    test_exception>(unsuported_type, CSTR("a}"));
+    test_exception>(unsuported_type, CSTR("b}"));
+    test_exception>(unsuported_type, CSTR("c}"));
+    test_exception>(unsuported_type, CSTR("d}"));
+    test_exception>(unsuported_type, CSTR("e}"));
+    test_exception>(unsuported_type, CSTR("f}"));
+    test_exception>(unsuported_type, CSTR("g}"));
+    test_exception>(not_a_type, CSTR("h}"));
+    test_exception>(not_a_type, CSTR("i}"));
+    test_exception>(not_a_type, CSTR("j}"));
+    test_exception>(not_a_type, CSTR("k}"));
+    test_exception>(not_a_type, CSTR("l}"));
+    test_exception>(not_a_type, CSTR("m}"));
+    test_exception>(not_a_type, CSTR("n}"));
+    test_exception>(unsuported_type, CSTR("o}"));
+    test({.type = _Flags::_Type::__pointer}, 1, CSTR("p}"));
+    test_exception>(not_a_type, CSTR("q}"));
+    test_exception>(not_a_type, CSTR("r}"));
+    test_exception>(unsuported_type, CSTR("s}"));
+    test_exception>(not_a_type, CSTR("t}"));
+    test_exception>(not_a_type, CSTR("u}"));
+    test_exception>(not_a_type, CSTR("v}"));
+    test_exception>(not_a_type, CSTR("w}"));
+    test_exception>(unsuported_type, CSTR("x}"));
+    test_exception>(not_a_type, CSTR("y}"));
+    test_exception>(not_a_type, CSTR("z}"));
+  }
+
+  // **** General ***
+  test_exception>("The format-spec should consume the input or end with a '}'", CSTR("ss"));
+}
+
+constexpr bool test() {
+  test();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test();
+#endif
+
+  return true;
+}
+
+int main(int, char**) {
+#if !defined(_WIN32) && !defined(_AIX)
+  // Make sure the parsers match the expectations. The layout of the
+  // subobjects is chosen to minimize the size required.
+  static_assert(sizeof(Parser) == 2 * sizeof(uint32_t));
+#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  static_assert(sizeof(Parser) == (sizeof(wchar_t) <= 2 ? 2 * sizeof(uint32_t) : 3 * sizeof(uint32_t)));
+#  endif
+#endif
+
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp
new file mode 100644
index 0000000000000..b60943becdefe
--- /dev/null
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// 
+
+// [format.formatter.spec]:
+// Each header that declares the template `formatter` provides the following
+// enabled specializations:
+// ...
+// For each charT, the pointer type specializations
+// - template<> struct formatter;
+// - template<> struct formatter;
+// - template<> struct formatter;
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test_macros.h"
+#include "make_string.h"
+
+#define STR(S) MAKE_STRING(CharT, S)
+
+template 
+void test(StringT expected, StringViewT fmt, PointerT arg) {
+  using CharT = typename StringT::value_type;
+  auto parse_ctx = std::basic_format_parse_context(fmt);
+  std::formatter formatter;
+  static_assert(std::semiregular);
+
+  auto it = formatter.parse(parse_ctx);
+  assert(it == fmt.end() - (!fmt.empty() && fmt.back() == '}'));
+
+  StringT result;
+  auto out = std::back_inserter(result);
+  using FormatCtxT = std::basic_format_context;
+
+  auto format_ctx = std::__format_context_create(out, std::make_format_args(arg));
+  formatter.format(arg, format_ctx);
+
+  if (expected.empty()) {
+    std::array buffer;
+    buffer[0] = CharT('0');
+    buffer[1] = CharT('x');
+    expected.append(buffer.begin(),
+                    std::to_chars(buffer.begin() + 2, buffer.end(), reinterpret_cast(arg), 16).ptr);
+  }
+  assert(result == expected);
+}
+
+template 
+void test_termination_condition(StringT expected, StringT f, PointerT arg) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  using CharT = typename StringT::value_type;
+  std::basic_string_view fmt{f};
+  assert(fmt.back() == CharT('}') && "Pre-condition failure");
+
+  test(expected, fmt, arg);
+  fmt.remove_suffix(1);
+  test(expected, fmt, arg);
+}
+
+template 
+void test_nullptr_t() {
+  test_termination_condition(STR("0x0"), STR("}"), nullptr);
+}
+
+template 
+void test_pointer_type() {
+  test_termination_condition(STR("0x0"), STR("}"), PointerT(0));
+  test_termination_condition(STR("0x42"), STR("}"), PointerT(0x42));
+  test_termination_condition(STR("0xffff"), STR("}"), PointerT(0xffff));
+  test_termination_condition(STR(""), STR("}"), PointerT(-1));
+}
+
+template 
+void test_all_pointer_types() {
+  test_nullptr_t();
+  test_pointer_type();
+  test_pointer_type();
+}
+
+int main(int, char**) {
+  test_all_pointer_types();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test_all_pointer_types();
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index c2eeb236f99d0..4b269bbd4e64c 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -2484,6 +2484,47 @@ void format_test_floating_point(TestFunction check, ExceptionTest check_exceptio
   format_test_floating_point(check, check_exception);
 }
 
+template 
+void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
+  // *** align-fill & width ***
+  check(STR("answer is '   0x0'"), STR("answer is '{:6}'"), P(nullptr));
+  check(STR("answer is '   0x0'"), STR("answer is '{:>6}'"), P(nullptr));
+  check(STR("answer is '0x0   '"), STR("answer is '{:<6}'"), P(nullptr));
+  check(STR("answer is ' 0x0  '"), STR("answer is '{:^6}'"), P(nullptr));
+
+  check(STR("answer is '---0x0'"), STR("answer is '{:->6}'"), P(nullptr));
+  check(STR("answer is '0x0---'"), STR("answer is '{:-<6}'"), P(nullptr));
+  check(STR("answer is '-0x0--'"), STR("answer is '{:-^6}'"), P(nullptr));
+
+  // *** Sign ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:-}"), P(nullptr));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:+}"), P(nullptr));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{: }"), P(nullptr));
+
+  // *** alternate form ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:#}"), P(nullptr));
+
+  // *** zero-padding ***
+  check_exception("A format-spec width field shouldn't have a leading zero", STR("{:0}"), P(nullptr));
+
+  // *** precision ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), P(nullptr));
+
+  // *** locale-specific form ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:L}"), P(nullptr));
+
+  // *** type ***
+  for (const auto& fmt : invalid_types("p"))
+    check_exception("The format-spec type has a type not supported for a pointer argument", fmt, P(nullptr));
+}
+
+template 
+void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
+  format_test_pointer(check, check_exception);
+  format_test_pointer(check, check_exception);
+  format_test_pointer(check, check_exception);
+}
+
 template 
 void format_tests(TestFunction check, ExceptionTest check_exception) {
   // *** Test escaping  ***
@@ -2611,6 +2652,12 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   check(STR("hello 42"), STR("hello {}"), static_cast(42));
   check(STR("hello 42"), STR("hello {}"), static_cast(42));
   format_test_floating_point(check, check_exception);
+
+  // *** Test pointer formater argument ***
+  check(STR("hello 0x0"), STR("hello {}"), nullptr);
+  check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast(0x42));
+  check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast(0x42));
+  format_test_pointer(check, check_exception);
 }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS

From 2b8b48c5a0c40d33569c74924f72cc31055a7b56 Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Tue, 14 Dec 2021 19:46:10 +0100
Subject: [PATCH 420/946] [libc++][format] Disable default formatter.

[format.formatter.spec]/5 lists the requirements for the default
formatter. The original implementation didn't implement this. This
implements the default formatter according to the Standard.

This adds additional test to validate the default formatter is disabled
and the required standard formatters are enabled.

While adding the tests it seems the formatters needed a constraint for the
character types they were valid for.

Implements parts of:
- P0645 Text Formatting

Depends on D115988

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D115989
---
 libcxx/include/__format/formatter.h           |  30 +-
 libcxx/include/__format/formatter_bool.h      |   2 +-
 libcxx/include/__format/formatter_integer.h   |  24 +-
 libcxx/include/__format/formatter_string.h    |  10 +-
 .../types.compile.pass.cpp                    | 370 ++++++++++++++++++
 5 files changed, 400 insertions(+), 36 deletions(-)
 create mode 100644 libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp

diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h
index 0c0c02ba19173..38b73bba32f3e 100644
--- a/libcxx/include/__format/formatter.h
+++ b/libcxx/include/__format/formatter.h
@@ -38,26 +38,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // to support compilers with partial C++20 support.
 #if !defined(_LIBCPP_HAS_NO_CONCEPTS)
 
-// Currently not implemented specializations throw an exception when used. This
-// does not conform to the Standard. However not all Standard defined formatters
-// have been implemented yet. Until that time the current behavior is intended.
-// TODO FMT Disable the default template.
+/// The default formatter template.
+///
+/// [format.formatter.spec]/5
+/// If F is a disabled specialization of formatter, these values are false:
+/// - is_default_constructible_v,
+/// - is_copy_constructible_v,
+/// - is_move_constructible_v,
+/// - is_copy_assignable, and
+/// - is_move_assignable.
 template 
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter {
-  _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI auto parse(auto& __parse_ctx)
-      -> decltype(__parse_ctx.begin()) {
-    __throw();
-  }
-
-  _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI auto format(_Tp, auto& __ctx)
-      -> decltype(__ctx.out()) {
-    __throw();
-  }
-
-private:
-  _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI void __throw() {
-    __throw_format_error("Argument type not implemented yet");
-  }
+  formatter() = delete;
+  formatter(const formatter&) = delete;
+  formatter& operator=(const formatter&) = delete;
 };
 
 namespace __format_spec {
diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h
index fdd1d75355d24..1e40bc0a435ae 100644
--- a/libcxx/include/__format/formatter_bool.h
+++ b/libcxx/include/__format/formatter_bool.h
@@ -102,7 +102,7 @@ using __formatter_bool = __formatter_integral<__parser_bool<_CharT>>;
 // For each charT, for each cv-unqualified arithmetic type ArithmeticT other
 // than char, wchar_t, char8_t, char16_t, or char32_t, a specialization
 
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
     : public __format_spec::__formatter_bool<_CharT> {
   using _Base = __format_spec::__formatter_bool<_CharT>;
diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h
index 767df36e61eb7..e1f3d4e34897b 100644
--- a/libcxx/include/__format/formatter_integer.h
+++ b/libcxx/include/__format/formatter_integer.h
@@ -81,25 +81,25 @@ using __formatter_integer = __formatter_integral<__parser_integer<_CharT>>;
 // than char, wchar_t, char8_t, char16_t, or char32_t, a specialization
 
 // Signed integral types.
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_integer<_CharT> {};
 #ifndef _LIBCPP_HAS_NO_INT128
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<__int128_t, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {
@@ -119,28 +119,28 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 #endif
 
 // Unsigned integral types.
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_integer<_CharT> {};
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_integer<_CharT> {};
 #ifndef _LIBCPP_HAS_NO_INT128
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<__uint128_t, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {
diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h
index 75a81f5184a0d..04950faa4a21e 100644
--- a/libcxx/include/__format/formatter_string.h
+++ b/libcxx/include/__format/formatter_string.h
@@ -64,7 +64,7 @@ class _LIBCPP_TEMPLATE_VIS __formatter_string : public __parser_string<_CharT> {
 // [format.formatter.spec]/2.2 For each charT, the string type specializations
 
 // Formatter const char*.
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_string<_CharT> {
@@ -98,7 +98,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter char*.
-template 
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<_CharT*, _CharT> : public formatter {
   using _Base = formatter;
@@ -110,7 +110,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter const char[].
-template 
+template <__formatter::__char_type _CharT, size_t _Size>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter
     : public __format_spec::__formatter_string<_CharT> {
@@ -123,7 +123,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter std::string.
-template 
+template <__formatter::__char_type _CharT, class _Traits, class _Allocator>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter, _CharT>
     : public __format_spec::__formatter_string<_CharT> {
@@ -138,7 +138,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter std::string_view.
-template 
+template <__formatter::__char_type _CharT, class _Traits>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT>
     : public __format_spec::__formatter_string<_CharT> {
   using _Base = __format_spec::__formatter_string<_CharT>;
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp
new file mode 100644
index 0000000000000..522a583f349cb
--- /dev/null
+++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp
@@ -0,0 +1,370 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// 
+
+// template 
+// struct formatter;
+
+// Tests the enabled and disabled requirements for std::formatter.
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#ifndef _LIBCPP_HAS_NO_FILESYSTEM_LIBRARY
+#  include 
+#endif
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include 
+#endif
+#include 
+#include 
+#include 
+#ifndef _LIBCPP_HAS_NO_THREADS
+#  include 
+#endif
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test_macros.h"
+
+// Validate default template argument.
+static_assert(std::same_as, std::formatter>);
+
+// Concept for an enabled formatter.
+//
+// Since it's not possible to extract the T and CharT types from the formatter
+// they are specified and the proper formatter is always intended to be
+// defaulted.
+//
+// [formatter.requirements]/2
+// A type F meets the Formatter requirements if it meets the BasicFormatter
+// requirements and the expressions shown in Table 71 are valid and have the
+// indicated semantics.
+template >
+concept enabled =
+    // The basic BasicFormatter requirements:
+    std::default_initializable && std::copyable && std::destructible && std::swappable &&
+    // The expressions shown in Table 71
+    requires(F f, std::basic_format_parse_context pc, T u, std::basic_format_context fc) {
+  { f.parse(pc) } -> std::same_as;
+  { f.format(u, fc) } -> std::same_as;
+};
+
+// Concept for a disabled formatter.
+//
+// This uses the same template arguments as enable. This isn't required since
+// the concept doesn't need to inspect T and CharT. This makes it easier for
+// future changes. For example P2286 formatting ranges intents to change
+// std::formatter> from disabled to enabled. The current way
+// makes it easy to define a macro like
+// #if TEST_STD_VER > 23
+//   TEST_ENABLED_AFTER_CXX23(T, CharT) enabled
+// #else
+//   TEST_ENABLED_AFTER_CXX23(T, CharT) disabled
+// #endif
+template >
+// [formatter.requirements]/5
+// If F is a disabled specialization of formatter, these values are false:
+concept disabled = !std::is_default_constructible_v && !std::is_copy_constructible_v &&
+                   !std::is_move_constructible_v && !std::is_copy_assignable_v && !std::is_move_assignable_v;
+
+template 
+void assert_formatter_is_disabled() {
+  static_assert(disabled);
+}
+
+template 
+void assert_formatter_is_enabled() {
+  // Only formatters for CharT == char || CharT == wchar_t are enabled for the
+  // standard formatters. When CharT is a different type the formatter should
+  // be disabled.
+  if constexpr (std::same_as
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+                || std::same_as
+#endif
+  )
+    static_assert(enabled);
+  else
+    assert_formatter_is_disabled();
+}
+
+// Tests for P0645 Text Formatting
+template 
+void test_P0645() {
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  // Tests the special formatter that converts a char to a wchar_t.
+  assert_formatter_is_enabled();
+#endif
+  assert_formatter_is_enabled();
+
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled, CharT>();
+  assert_formatter_is_enabled, CharT>();
+
+  assert_formatter_is_enabled();
+
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+#ifndef _LIBCPP_HAS_NO_INT128
+  assert_formatter_is_enabled<__int128_t, CharT>();
+#endif
+
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+#ifndef _LIBCPP_HAS_NO_INT128
+  assert_formatter_is_enabled<__uint128_t, CharT>();
+#endif
+
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+  assert_formatter_is_enabled();
+}
+
+// Tests for P1361 Integration of chrono with text formatting
+//
+// Some tests are commented out since these types haven't been implemented in
+// chrono yet. After P1361 has been implemented these formatters should be all
+// enabled.
+template 
+void test_P1361() {
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled, CharT>();
+  //assert_formatter_is_enabled, CharT>();
+  //assert_formatter_is_enabled, CharT>();
+  //assert_formatter_is_enabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled, CharT>();
+
+  //assert_formatter_is_enabled();
+  //assert_formatter_is_enabled();
+
+  //assert_formatter_is_enabled();
+}
+
+// Tests for P1636 Formatters for library types
+//
+// The paper hasn't been voted in so currently all formatters are disabled.
+// TODO validate whether the test is correct after the paper has been accepted.
+template 
+void test_P1636() {
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled();
+#ifndef _LIBCPP_HAS_NO_FILESYSTEM_LIBRARY
+  assert_formatter_is_disabled();
+#endif
+  assert_formatter_is_disabled, CharT>();
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  assert_formatter_is_disabled, CharT>();
+#endif
+#ifndef _LIBCPP_HAS_NO_THREADS
+  assert_formatter_is_disabled();
+#endif
+  assert_formatter_is_disabled, CharT>();
+}
+
+// Tests for P2286 Formatting ranges
+//
+// The paper hasn't been voted in so currently all formatters are disabled.
+// TODO validate whether the test is correct after the paper has been accepted.
+template 
+void test_P2286() {
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+}
+
+class c {
+  void f();
+  void fc() const;
+  static void sf();
+};
+enum e { a };
+enum class ec { a };
+template 
+void test_disabled() {
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  assert_formatter_is_disabled();
+#endif
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+  assert_formatter_is_disabled();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+
+  assert_formatter_is_disabled, CharT>();
+  assert_formatter_is_disabled, CharT>();
+}
+
+template 
+void test() {
+  test_P0645();
+  test_P1361();
+  test_P1636();
+  test_P2286();
+  test_disabled();
+}
+
+void test() {
+  test();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test();
+#endif
+  test();
+  test();
+  test();
+
+  test();
+}

From ade6d0d8fa1d9e327e9a1975351aa6b4b5dbf800 Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Thu, 16 Dec 2021 18:17:47 +0100
Subject: [PATCH 421/946] [libc++][format] Adds formatter handle.

This implements the handler according to P0645. P2418 changes the wording
in the Standard. That isn't implemented and requires changes in more
places. LWG3631 applies modifications to P2418, but is currently
unresolved.

Implements parts of:
* P0645 Text Formatting

Depends on D115989

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D115991
---
 libcxx/include/__format/format_arg.h          |  40 +++++-
 libcxx/include/format                         |   2 +
 .../format.arg/visit_format_arg.pass.cpp      |   2 +
 .../format.arguments/format.args/get.pass.cpp |   2 +
 .../format.arg.store/class.pass.cpp           |   2 +
 .../format.arg.store/make_format_args.sh.cpp  |   2 +
 .../format.arg/operator_bool.pass.cpp         |   2 +
 .../format.args/ctor.pass.cpp                 |   2 +
 .../format.context/arg.pass.cpp               |   2 +
 .../format.context/ctor.pass.cpp              |   2 +
 .../format.context/locale.pass.cpp            |   2 +
 .../formatter.char.pass.cpp                   |   2 +
 .../formatter.handle.pass.cpp                 |  76 +++++++++++
 .../formatter.signed_integral.pass.cpp        |   2 +
 .../formatter.unsigned_integral.pass.cpp      |   2 +
 .../format/format.functions/format_tests.h    | 120 +++++++++++++++++-
 16 files changed, 256 insertions(+), 6 deletions(-)
 create mode 100644 libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp

diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h
index 2153287a8a61c..da829d52fbfe4 100644
--- a/libcxx/include/__format/format_arg.h
+++ b/libcxx/include/__format/format_arg.h
@@ -14,7 +14,9 @@
 #include <__config>
 #include <__format/format_error.h>
 #include <__format/format_fwd.h>
+#include <__format/format_parse_context.h>
 #include <__functional_base>
+#include <__memory/addressof.h>
 #include <__variant/monostate.h>
 #include 
 #include 
@@ -56,7 +58,8 @@ enum class _LIBCPP_ENUM_VIS __arg_t : uint8_t {
   __long_double,
   __const_char_type_ptr,
   __string_view,
-  __ptr
+  __ptr,
+  __handle
 };
 } // namespace __format
 
@@ -104,6 +107,8 @@ visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
     return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__string_view);
   case __format::__arg_t::__ptr:
     return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__ptr);
+  case __format::__arg_t::__handle:
+    return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__handle);
   }
   _LIBCPP_UNREACHABLE();
 }
@@ -111,8 +116,7 @@ visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
 template 
 class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
 public:
-  // TODO FMT Define the handle class.
-  class handle;
+  class _LIBCPP_TEMPLATE_VIS handle;
 
   _LIBCPP_HIDE_FROM_ABI basic_format_arg() noexcept
       : __type_{__format::__arg_t::__none} {}
@@ -161,7 +165,7 @@ class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
     const char_type* __const_char_type_ptr;
     basic_string_view __string_view;
     const void* __ptr;
-    // TODO FMT Add the handle.
+    handle __handle;
   };
   __format::__arg_t __type_;
 
@@ -248,6 +252,34 @@ class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
   template 
   requires is_void_v<_Tp> _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(_Tp* __p) noexcept
       : __ptr(__p), __type_(__format::__arg_t::__ptr) {}
+
+  template 
+  _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(const _Tp& __v) noexcept
+      : __handle(__v), __type_(__format::__arg_t::__handle) {}
+};
+
+template 
+class _LIBCPP_TEMPLATE_VIS basic_format_arg<_Context>::handle {
+  friend class basic_format_arg<_Context>;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI
+  void format(basic_format_parse_context& __parse_ctx, _Context& __ctx) const {
+    __format_(__parse_ctx, __ctx, __ptr_);
+  }
+
+private:
+  const void* __ptr_;
+  void (*__format_)(basic_format_parse_context&, _Context&, const void*);
+
+  template 
+  _LIBCPP_HIDE_FROM_ABI explicit handle(const _Tp& __v) noexcept
+      : __ptr_(_VSTD::addressof(__v)),
+        __format_([](basic_format_parse_context& __parse_ctx, _Context& __ctx, const void* __ptr) {
+          typename _Context::template formatter_type<_Tp> __f;
+          __parse_ctx.advance_to(__f.parse(__parse_ctx));
+          __ctx.advance_to(__f.format(*static_cast(__ptr), __ctx));
+        }) {}
 };
 
 #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
diff --git a/libcxx/include/format b/libcxx/include/format
index 53de4a0e6ca06..c1f1be7d31b98 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -369,6 +369,8 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end,
       [&](auto __arg) {
         if constexpr (same_as)
           __throw_format_error("Argument index out of bounds");
+        else if constexpr (same_as::handle>)
+          __arg.format(__parse_ctx, __ctx);
         else {
           formatter __formatter;
           __parse_ctx.advance_to(__formatter.parse(__parse_ctx));
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
index 76548c97d77ba..7a534d7282ef7 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // This test requires the dylib support introduced in D92214.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
index 6f8035bff7707..da02e6961012c 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // This test requires the dylib support introduced in D92214.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp
index 5472944af6684..b2f0dae3edf83 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // 
 
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp
index f3539b3c4ad71..72f1d6b4787b8 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
 // UNSUPPORTED: libcpp-has-no-wide-characters
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // Validate it works regardless of the signedness of `char`.
 // RUN: %{cxx} %{flags} %{compile_flags} -fsigned-char -fsyntax-only %s
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp
index 50f6885ba8c67..31c66917a5ff5 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // 
 
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp
index 2a73d951cc952..0232df433ab06 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // 
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp
index e345878b478a1..aae23cb108794 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // 
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp
index 2a4b3faee9685..41eb8d3797f80 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-localization
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp
index 89f3a36d011f7..fe2a1938000a5 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-localization
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp
index 65bca30220d26..3ca3297a151bd 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // 
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp
new file mode 100644
index 0000000000000..6d28b4f423fae
--- /dev/null
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp
@@ -0,0 +1,76 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// 
+
+// A user defined formatter using
+// template
+// class basic_format_arg::handle
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test_macros.h"
+
+enum class color { black, red, gold };
+const char* color_names[] = {"black", "red", "gold"};
+
+template <>
+struct std::formatter : std::formatter {
+  auto format(color c, format_context& ctx) {
+    return formatter::format(color_names[static_cast(c)], ctx);
+  }
+};
+
+void test(std::string expected, std::string_view fmt, color arg) {
+  auto parse_ctx = std::format_parse_context(fmt);
+  std::formatter formatter;
+  static_assert(std::semiregular);
+
+  auto it = formatter.parse(parse_ctx);
+  assert(it == fmt.end() - (!fmt.empty() && fmt.back() == '}'));
+
+  std::string result;
+  auto out = std::back_inserter(result);
+  using FormatCtxT = std::basic_format_context;
+
+  auto format_ctx = std::__format_context_create(out, std::make_format_args(arg));
+  formatter.format(arg, format_ctx);
+  assert(result == expected);
+}
+
+void test_termination_condition(std::string expected, std::string f, color arg) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  std::string_view fmt{f};
+  assert(fmt.back() == '}' && "Pre-condition failure");
+
+  test(expected, fmt, arg);
+  fmt.remove_suffix(1);
+  test(expected, fmt, arg);
+}
+
+int main(int, char**) {
+  test_termination_condition("black", "}", color::black);
+  test_termination_condition("red", "}", color::red);
+  test_termination_condition("gold", "}", color::gold);
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
index 8f2d5af2dab91..d25441799483e 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // 
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
index 22e330c9daa81..4ee1cfc9dc808 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // 
 
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 4b269bbd4e64c..8deed4da43635 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -10,11 +10,14 @@
 
 #include 
 
+#include 
+#include 
+#include 
+#include 
+
 #include "make_string.h"
 #include "test_macros.h"
 
-#include 
-
 // In this file the following template types are used:
 // TestFunction must be callable as check(expected-result, string-to-format, args-to-format...)
 // ExceptionTest must be callable as check_exception(expected-exception, string-to-format, args-to-format...)
@@ -40,6 +43,93 @@ struct context {
 template 
 using context_t = typename context::type;
 
+// A user-defined type used to test the handle formatter.
+enum class status : uint16_t { foo = 0xAAAA, bar = 0x5555, foobar = 0xAA55 };
+
+// The formatter for a user-defined type used to test the handle formatter.
+template 
+struct std::formatter {
+  int type = 0;
+
+  constexpr auto parse(auto& parse_ctx) -> decltype(parse_ctx.begin()) {
+    auto begin = parse_ctx.begin();
+    auto end = parse_ctx.end();
+    if (begin == end)
+      return begin;
+
+    switch (*begin) {
+    case CharT('x'):
+      break;
+    case CharT('X'):
+      type = 1;
+      break;
+    case CharT('s'):
+      type = 2;
+      break;
+    case CharT('}'):
+      return begin;
+    default:
+      throw_format_error("The format-spec type has a type not supported for a status argument");
+    }
+
+    ++begin;
+    if (begin != end && *begin != CharT('}'))
+      throw_format_error("The format-spec should consume the input or end with a '}'");
+
+    return begin;
+  }
+
+  auto format(status s, auto& ctx) -> decltype(ctx.out()) {
+    const char* names[] = {"foo", "bar", "foobar"};
+    char buffer[6];
+    const char* begin;
+    const char* end;
+    switch (type) {
+    case 0:
+      begin = buffer;
+      buffer[0] = '0';
+      buffer[1] = 'x';
+      end = std::to_chars(&buffer[2], std::end(buffer), static_cast(s), 16).ptr;
+      break;
+
+    case 1:
+      begin = buffer;
+      buffer[0] = '0';
+      buffer[1] = 'X';
+      end = std::to_chars(&buffer[2], std::end(buffer), static_cast(s), 16).ptr;
+      std::transform(static_cast(&buffer[2]), end, &buffer[2], [](char c) { return std::toupper(c); });
+      break;
+
+    case 2:
+      switch (s) {
+      case status::foo:
+        begin = names[0];
+        break;
+      case status::bar:
+        begin = names[1];
+        break;
+      case status::foobar:
+        begin = names[2];
+        break;
+      }
+      end = begin + strlen(begin);
+      break;
+    }
+
+    return std::copy(begin, end, ctx.out());
+  }
+
+private:
+  void throw_format_error(const char* s) {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+    throw std::format_error(s);
+#else
+    (void)s;
+    std::abort();
+#endif
+  }
+};
+
 template 
 std::vector> invalid_types(std::string valid) {
   std::vector> result;
@@ -2518,6 +2608,29 @@ void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
     check_exception("The format-spec type has a type not supported for a pointer argument", fmt, P(nullptr));
 }
 
+template 
+void format_test_handle(TestFunction check, ExceptionTest check_exception) {
+  // *** Valid permuatations ***
+  check(STR("answer is '0xaaaa'"), STR("answer is '{}'"), status::foo);
+  check(STR("answer is '0xaaaa'"), STR("answer is '{:x}'"), status::foo);
+  check(STR("answer is '0XAAAA'"), STR("answer is '{:X}'"), status::foo);
+  check(STR("answer is 'foo'"), STR("answer is '{:s}'"), status::foo);
+
+  check(STR("answer is '0x5555'"), STR("answer is '{}'"), status::bar);
+  check(STR("answer is '0x5555'"), STR("answer is '{:x}'"), status::bar);
+  check(STR("answer is '0X5555'"), STR("answer is '{:X}'"), status::bar);
+  check(STR("answer is 'bar'"), STR("answer is '{:s}'"), status::bar);
+
+  check(STR("answer is '0xaa55'"), STR("answer is '{}'"), status::foobar);
+  check(STR("answer is '0xaa55'"), STR("answer is '{:x}'"), status::foobar);
+  check(STR("answer is '0XAA55'"), STR("answer is '{:X}'"), status::foobar);
+  check(STR("answer is 'foobar'"), STR("answer is '{:s}'"), status::foobar);
+
+  // *** type ***
+  for (const auto& fmt : invalid_types("xXs"))
+    check_exception("The format-spec type has a type not supported for a status argument", fmt, status::foo);
+}
+
 template 
 void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
   format_test_pointer(check, check_exception);
@@ -2658,6 +2771,9 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast(0x42));
   check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast(0x42));
   format_test_pointer(check, check_exception);
+
+  // *** Test handle formatter argument ***
+  format_test_handle(check, check_exception);
 }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS

From a922324590a13ae544491d21eb035a284a9e75e5 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot 
Date: Mon, 24 Jan 2022 17:15:33 +0000
Subject: [PATCH 422/946] [gn build] Port 787ccd345cbb

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 3e28727f082ed..0797908a50c31 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -240,6 +240,7 @@ if (current_toolchain == default_toolchain) {
       "__format/formatter_char.h",
       "__format/formatter_integer.h",
       "__format/formatter_integral.h",
+      "__format/formatter_pointer.h",
       "__format/formatter_string.h",
       "__format/parser_std_format_spec.h",
       "__functional/binary_function.h",

From 5fa40fb293241affeac45c9ec4e129e2280f7510 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot 
Date: Mon, 24 Jan 2022 17:15:34 +0000
Subject: [PATCH 423/946] [gn build] Port db2944e34b16

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 0797908a50c31..e4fa480a6aa34 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -238,6 +238,7 @@ if (current_toolchain == default_toolchain) {
       "__format/formatter.h",
       "__format/formatter_bool.h",
       "__format/formatter_char.h",
+      "__format/formatter_floating_point.h",
       "__format/formatter_integer.h",
       "__format/formatter_integral.h",
       "__format/formatter_pointer.h",

From ea17d29a6c834a34a698c87193a86eeab04922d2 Mon Sep 17 00:00:00 2001
From: Stephen Tozer 
Date: Tue, 18 Jan 2022 11:11:57 +0000
Subject: [PATCH 424/946] [llvm] Do not replace dead constant references in
 metadata with undef

This patch removes an incorrect behaviour in Constants.cpp, which would
replace dead constant references in metadata with an undef value. This
blanket replacement resulted in undef values being inserted into
metadata that would not accept them. The replacement was intended for
debug info metadata, but this is now instead handled in the RAUW
handler.

Differential Revision: https://reviews.llvm.org/D117300
---
 llvm/lib/IR/Constants.cpp                     |  9 +----
 .../Resolution/X86/Inputs/no-undef-type-md.ll | 13 +++++++
 .../LTO/Resolution/X86/no-undef-type-md.ll    | 37 +++++++++++++++++++
 3 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll
 create mode 100644 llvm/test/LTO/Resolution/X86/no-undef-type-md.ll

diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index e031f889caf69..c13990af360ec 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -739,15 +739,8 @@ static bool constantIsDead(const Constant *C, bool RemoveDeadUsers) {
       ++I;
   }
 
-  if (RemoveDeadUsers) {
-    // If C is only used by metadata, it should not be preserved but should
-    // have its uses replaced.
-    if (C->isUsedByMetadata()) {
-      const_cast(C)->replaceAllUsesWith(
-          UndefValue::get(C->getType()));
-    }
+  if (RemoveDeadUsers)
     const_cast(C)->destroyConstant();
-  }
 
   return true;
 }
diff --git a/llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll b/llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll
new file mode 100644
index 0000000000000..94166f39e2ae3
--- /dev/null
+++ b/llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll
@@ -0,0 +1,13 @@
+; ModuleID = 'test.cpp.o'
+source_filename = "test.cpp"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @a()
+
+!llvm.module.flags = !{!9, !39}
+
+!9 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+!39 = !{i32 5, !"CG Profile", !40}
+!40 = !{!41}
+!41 = distinct !{null, i32 ()* bitcast (void ()* @a to i32 ()*), i64 2594092}
diff --git a/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
new file mode 100644
index 0000000000000..afee5e656df17
--- /dev/null
+++ b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
@@ -0,0 +1,37 @@
+; RUN: opt <%s -o %t0.o -thinlto-bc -thinlto-split-lto-unit
+; RUN: llvm-as -o %t1.o %S/Inputs/no-undef-type-md.ll
+; RUN: llvm-lto2 run -o a.out \
+; RUN: %t0.o \
+; RUN: -r=%t0.o,a, \
+; RUN: -r=%t0.o,b,pl \
+; RUN: %t1.o \
+; RUN: -r=%t1.o,a,pl \
+; RUN: | FileCheck --allow-empty --check-prefix=ERROR %s
+; RUN llvm-nm a.out.0 a.out.1 -S | FileCheck %s
+
+; ERROR-NOT: expected a Function or null
+; ERROR-NOT: i32 (%0*, i32*)* undef
+
+; CHECK: a.out.0:
+; CHECK: a.out.1:
+
+; ModuleID = 'test.cpp.o'
+source_filename = "test.cpp"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @a() {
+entry:
+  ret i32 0
+}
+
+define i32 @b() {
+entry:
+  ret i32 0
+}
+
+!llvm.module.flags = !{!39}
+
+!39 = !{i32 5, !"CG Profile", !40}
+!40 = !{!41}
+!41 = !{i32 ()* @b, i32 ()* @a, i64 2594092}

From 11cea7e5ce4d3f6a0d2fac016d503f99c52cdc96 Mon Sep 17 00:00:00 2001
From: Sander de Smalen 
Date: Fri, 21 Jan 2022 11:28:16 +0000
Subject: [PATCH 425/946] [AArch64] NFC: Clarify and auto-generate some CodeGen
 tests.

* For ext-narrow-index.ll, move vscale_range attribute closer to the
  function definition, rather than through indirect # attribute. This
  makes the test a bit easier to read.
* auto-generated CHECK lines for sve-cmp-select.ll and
  named-vector-shuffles-sve.ll.
* re-generated CHECK lines for tests that had a mention they were
  auto-generated, but where the CHECK lines were out of date.
---
 llvm/test/CodeGen/AArch64/concat-vector.ll    |   2 +-
 llvm/test/CodeGen/AArch64/ext-narrow-index.ll | 229 +++++++++---------
 .../AArch64/named-vector-shuffles-sve.ll      |  48 ++--
 llvm/test/CodeGen/AArch64/neon-stepvector.ll  |   1 -
 llvm/test/CodeGen/AArch64/shift_minsize.ll    |  66 +++++
 llvm/test/CodeGen/AArch64/sve-cmp-select.ll   |  37 +--
 .../AArch64/sve-fixed-length-sdiv-pow2.ll     |  17 +-
 .../CodeGen/AArch64/sve-punpklo-combine.ll    |   8 +-
 8 files changed, 240 insertions(+), 168 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index 690fb716771af..1e5d2660a79eb 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -88,7 +88,7 @@ define <8 x i32> @concat8(<4 x i32>* %A, <4 x i32>* %B) {
 define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
 ; CHECK-LABEL: concat9:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    zip1    v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    zip1 v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    ret
    %v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> 
    ret <4 x half> %v4half
diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index f7f143ff49e31..b296a79ce4f40 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
 
 ; Tests of shufflevector where the index operand is half the width of the vector
@@ -6,9 +7,9 @@
 ; i8 tests
 define <8 x i8> @i8_off0(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -16,10 +17,10 @@ entry:
 
 define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -27,10 +28,10 @@ entry:
 
 define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off8:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -38,9 +39,10 @@ entry:
 
 define <8 x i8> @i8_off15(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off15:
-; CHECK: ext v0.16b, v0.16b, v1.16b, #15
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -48,9 +50,10 @@ entry:
 
 define <8 x i8> @i8_off22(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off22:
-; CHECK: ext v0.16b, v1.16b, v1.16b, #6
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v1.16b, v1.16b, #6
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -59,9 +62,9 @@ entry:
 ; i16 tests
 define <4 x i16> @i16_off0(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -69,10 +72,10 @@ entry:
 
 define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -80,9 +83,10 @@ entry:
 
 define <4 x i16> @i16_off7(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off7:
-; CHECK: ext v0.16b, v0.16b, v1.16b, #14
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -90,9 +94,10 @@ entry:
 
 define <4 x i16> @i16_off8(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off8:
-; CHECK: mov v0.16b, v1.16b
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -101,9 +106,9 @@ entry:
 ; i32 tests
 define <2 x i32> @i32_off0(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -111,10 +116,10 @@ entry:
 
 define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -122,9 +127,10 @@ entry:
 
 define <2 x i32> @i32_off3(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off3:
-; CHECK: ext v0.16b, v0.16b, v1.16b, #12
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -132,9 +138,10 @@ entry:
 
 define <2 x i32> @i32_off4(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off4:
-; CHECK: mov v0.16b, v1.16b
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -143,9 +150,9 @@ entry:
 ; i64 tests
 define <1 x i64> @i64_off0(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> 
   ret <1 x i64> %shuffle
@@ -153,10 +160,10 @@ entry:
 
 define <1 x i64> @i64_off1(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> 
   ret <1 x i64> %shuffle
@@ -164,9 +171,10 @@ entry:
 
 define <1 x i64> @i64_off2(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off2:
-; CHECK: mov v0.16b, v1.16b
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> 
   ret <1 x i64> %shuffle
@@ -175,9 +183,9 @@ entry:
 ; i8 tests with second operand zero
 define <8 x i8> @i8_zero_off0(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -185,10 +193,10 @@ entry:
 
 define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -196,10 +204,10 @@ entry:
 
 define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off8:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -207,10 +215,11 @@ entry:
 
 define <8 x i8> @i8_zero_off15(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off15:
-; CHECK: movi [[REG:v[0-9]+]].2d, #0
-; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #15
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -218,9 +227,9 @@ entry:
 
 define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off22:
-; CHECK: movi v0.2d, #0
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> 
   ret <8 x i8> %shuffle
@@ -229,9 +238,9 @@ entry:
 ; i16 tests with second operand zero
 define <4 x i16> @i16_zero_off0(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -239,10 +248,10 @@ entry:
 
 define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -250,10 +259,11 @@ entry:
 
 define <4 x i16> @i16_zero_off7(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off7:
-; CHECK: movi [[REG:v[0-9]+]].2d, #0
-; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #14
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -261,9 +271,9 @@ entry:
 
 define <4 x i16> @i16_zero_off8(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off8:
-; CHECK: movi v0.2d, #0
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> 
   ret <4 x i16> %shuffle
@@ -272,9 +282,9 @@ entry:
 ; i32 tests with second operand zero
 define <2 x i32> @i32_zero_off0(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -282,10 +292,10 @@ entry:
 
 define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -293,10 +303,11 @@ entry:
 
 define <2 x i32> @i32_zero_off3(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off3:
-; CHECK: movi [[REG:v[0-9]+]].2d, #0
-; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #12
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -304,9 +315,9 @@ entry:
 
 define <2 x i32> @i32_zero_off4(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off4:
-; CHECK: movi v0.2d, #0
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> 
   ret <2 x i32> %shuffle
@@ -315,9 +326,9 @@ entry:
 ; i64 tests with second operand zero
 define <1 x i64> @i64_zero_off0(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> 
   ret <1 x i64> %shuffle
@@ -325,10 +336,10 @@ entry:
 
 define <1 x i64> @i64_zero_off1(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> 
   ret <1 x i64> %shuffle
@@ -336,9 +347,9 @@ entry:
 
 define <1 x i64> @i64_zero_off2(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off2:
-; CHECK: fmov d0, xzr
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, xzr
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> 
   ret <1 x i64> %shuffle
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
index bce9e035da8c1..404811433ac20 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -24,7 +24,7 @@ define  @splice_nxv16i8_first_idx( %a,  %res
 }
 
-define  @splice_nxv16i8_last_idx( %a,  %b) #1 {
+define  @splice_nxv16i8_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #255
@@ -51,7 +51,7 @@ define  @splice_nxv4i32_first_idx( %a,  %res
 }
 
-define  @splice_nxv4i32_last_idx( %a,  %b) #1 {
+define  @splice_nxv4i32_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv4i32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -69,7 +69,7 @@ define  @splice_nxv2i64_first_idx( %a,  %res
 }
 
-define  @splice_nxv2i64_last_idx( %a,  %b) #1 {
+define  @splice_nxv2i64_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2i64_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -109,7 +109,7 @@ define  @splice_nxv2f16_first_idx( %a,  %res
 }
 
-define  @splice_nxv2f16_last_idx( %a,  %b) #1 {
+define  @splice_nxv2f16_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -149,7 +149,7 @@ define  @splice_nxv4f16_first_idx( %a,  %res
 }
 
-define  @splice_nxv4f16_last_idx( %a,  %b) #1 {
+define  @splice_nxv4f16_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv4f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -167,7 +167,7 @@ define  @splice_nxv8f16_first_idx( %a,  %res
 }
 
-define  @splice_nxv8f16_last_idx( %a,  %b) #1 {
+define  @splice_nxv8f16_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv8f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #254
@@ -207,7 +207,7 @@ define  @splice_nxv2f32_first_idx( %a, <
   ret  %res
 }
 
-define  @splice_nxv2f32_last_idx( %a,  %b) #1 {
+define  @splice_nxv2f32_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2f32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -225,7 +225,7 @@ define  @splice_nxv4f32_first_idx( %a, <
   ret  %res
 }
 
-define  @splice_nxv4f32_last_idx( %a,  %b) #1 {
+define  @splice_nxv4f32_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv4f32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -243,7 +243,7 @@ define  @splice_nxv2f64_first_idx( %a,
   ret  %res
 }
 
-define  @splice_nxv2f64_last_idx( %a,  %b) #1 {
+define  @splice_nxv2f64_last_idx( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2f64_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -345,7 +345,7 @@ define  @splice_nxv8i32_idx( %a,  @splice_nxv16f32_16( %a,  %b) #2 {
+define  @splice_nxv16f32_16( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16f32_16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -392,7 +392,7 @@ define  @splice_nxv16i8( %a,  %res
 }
 
-define  @splice_nxv16i8_neg32( %a,  %b) #2 {
+define  @splice_nxv16i8_neg32( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl32
@@ -403,7 +403,7 @@ define  @splice_nxv16i8_neg32( %a,  %res
 }
 
-define  @splice_nxv16i8_neg64( %a,  %b) #3 {
+define  @splice_nxv16i8_neg64( %a,  %b) vscale_range(4,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl64
@@ -414,7 +414,7 @@ define  @splice_nxv16i8_neg64( %a,  %res
 }
 
-define  @splice_nxv16i8_neg128( %a,  %b) #4 {
+define  @splice_nxv16i8_neg128( %a,  %b) vscale_range(8,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl128
@@ -425,7 +425,7 @@ define  @splice_nxv16i8_neg128( %a,  %res
 }
 
-define  @splice_nxv16i8_neg256( %a,  %b) #1 {
+define  @splice_nxv16i8_neg256( %a,  %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg256:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl256
@@ -447,7 +447,7 @@ define  @splice_nxv16i8_1( %a,  %res
 }
 
-define  @splice_nxv16i8_neg17( %a,  %b) #2 {
+define  @splice_nxv16i8_neg17( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg17:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -492,7 +492,7 @@ define  @splice_nxv8i16_1( %a,  %res
 }
 
-define  @splice_nxv8i16_neg9( %a,  %b) #2 {
+define  @splice_nxv8i16_neg9( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv8i16_neg9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -537,7 +537,7 @@ define  @splice_nxv4i32_1( %a,  %res
 }
 
-define  @splice_nxv4i32_neg5( %a,  %b) #2 {
+define  @splice_nxv4i32_neg5( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv4i32_neg5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl5
@@ -570,7 +570,7 @@ define  @splice_nxv2i64_1( %a,  %res
 }
 
-define  @splice_nxv2i64_neg3( %a,  %b) #2 {
+define  @splice_nxv2i64_neg3( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv2i64_neg3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl3
@@ -603,7 +603,7 @@ define  @splice_nxv8f16_1( %a,  %res
 }
 
-define  @splice_nxv8f16_neg9( %a,  %b) #2 {
+define  @splice_nxv8f16_neg9( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv8f16_neg9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -648,7 +648,7 @@ define  @splice_nxv4f32_1( %a,  %res
 }
 
-define  @splice_nxv4f32_neg5( %a,  %b) #2 {
+define  @splice_nxv4f32_neg5( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv4f32_neg5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl5
@@ -681,7 +681,7 @@ define  @splice_nxv2f64_1( %a,  %res
 }
 
-define  @splice_nxv2f64_neg3( %a,  %b) #2 {
+define  @splice_nxv2f64_neg3( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv2f64_neg3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl3
@@ -797,7 +797,7 @@ define  @splice_nxv8i32( %a,  @splice_nxv16f32_neg17( %a,  %b) #2 {
+define  @splice_nxv16f32_neg17( %a,  %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16f32_neg17:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -848,7 +848,3 @@ declare  @llvm.experimental.vector.splice.nxv16f32( @llvm.experimental.vector.splice.nxv2f64(, , i32)
 
 attributes #0 = { nounwind "target-features"="+sve" }
-attributes #1 = { nounwind "target-features"="+sve" vscale_range(16,16) }
-attributes #2 = { nounwind "target-features"="+sve" vscale_range(2,16) }
-attributes #3 = { nounwind "target-features"="+sve" vscale_range(4,16) }
-attributes #4 = { nounwind "target-features"="+sve" vscale_range(8,16) }
diff --git a/llvm/test/CodeGen/AArch64/neon-stepvector.ll b/llvm/test/CodeGen/AArch64/neon-stepvector.ll
index 05308bf5f6d76..7255574f42097 100644
--- a/llvm/test/CodeGen/AArch64/neon-stepvector.ll
+++ b/llvm/test/CodeGen/AArch64/neon-stepvector.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK
 
 ; LEGAL INTEGER TYPES
diff --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll
index 78d87ff77762c..cc29e3a5f04f5 100644
--- a/llvm/test/CodeGen/AArch64/shift_minsize.ll
+++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll
@@ -16,6 +16,11 @@ define i64 @f0(i64 %val, i64 %amt) minsize optsize {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsl x0, x0, x1
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f0:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    lsl x0, x0, x1
+; CHECK-DARWIN-NEXT:    ret
   %res = shl i64 %val, %amt
   ret i64 %res
 }
@@ -26,6 +31,12 @@ define i32 @f1(i64 %x, i64 %y) minsize optsize {
 ; CHECK-NEXT:    lsl x0, x0, x1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f1:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    lsl x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
+; CHECK-DARWIN-NEXT:    ret
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -37,6 +48,12 @@ define i32 @f2(i64 %x, i64 %y) minsize optsize {
 ; CHECK-NEXT:    asr x0, x0, x1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f2:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    asr x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
+; CHECK-DARWIN-NEXT:    ret
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -48,6 +65,12 @@ define i32 @f3(i64 %x, i64 %y) minsize optsize {
 ; CHECK-NEXT:    lsr x0, x0, x1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f3:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    lsr x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
+; CHECK-DARWIN-NEXT:    ret
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -62,6 +85,20 @@ define dso_local { i64, i64 } @shl128(i64 %x.coerce0, i64 %x.coerce1, i8 signext
 ; CHECK-NEXT:    bl __ashlti3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: shl128:
+; CHECK-DARWIN:       ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT:    mvn w8, w2
+; CHECK-DARWIN-NEXT:    mov w9, w2
+; CHECK-DARWIN-NEXT:    lsr x10, x0, #1
+; CHECK-DARWIN-NEXT:    tst x9, #0x40
+; CHECK-DARWIN-NEXT:    lsr x8, x10, x8
+; CHECK-DARWIN-NEXT:    lsl x10, x1, x9
+; CHECK-DARWIN-NEXT:    orr x8, x10, x8
+; CHECK-DARWIN-NEXT:    lsl x10, x0, x9
+; CHECK-DARWIN-NEXT:    csel x1, x10, x8, ne
+; CHECK-DARWIN-NEXT:    csel x0, xzr, x10, ne
+; CHECK-DARWIN-NEXT:    ret
 
 entry:
   %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
@@ -88,6 +125,21 @@ define dso_local { i64, i64 } @ashr128(i64 %x.coerce0, i64 %x.coerce1, i8 signex
 ; CHECK-NEXT:    bl __ashrti3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: ashr128:
+; CHECK-DARWIN:       ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT:    mov w8, w2
+; CHECK-DARWIN-NEXT:    mvn w9, w2
+; CHECK-DARWIN-NEXT:    lsl x10, x1, #1
+; CHECK-DARWIN-NEXT:    tst x8, #0x40
+; CHECK-DARWIN-NEXT:    lsr x11, x0, x8
+; CHECK-DARWIN-NEXT:    lsl x9, x10, x9
+; CHECK-DARWIN-NEXT:    asr x10, x1, x8
+; CHECK-DARWIN-NEXT:    orr x9, x9, x11
+; CHECK-DARWIN-NEXT:    asr x8, x1, #63
+; CHECK-DARWIN-NEXT:    csel x0, x10, x9, ne
+; CHECK-DARWIN-NEXT:    csel x1, x8, x10, ne
+; CHECK-DARWIN-NEXT:    ret
 entry:
   %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
   %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
@@ -113,6 +165,20 @@ define dso_local { i64, i64 } @lshr128(i64 %x.coerce0, i64 %x.coerce1, i8 signex
 ; CHECK-NEXT:    bl __lshrti3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: lshr128:
+; CHECK-DARWIN:       ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT:    mov w8, w2
+; CHECK-DARWIN-NEXT:    mvn w9, w2
+; CHECK-DARWIN-NEXT:    lsl x10, x1, #1
+; CHECK-DARWIN-NEXT:    tst x8, #0x40
+; CHECK-DARWIN-NEXT:    lsr x11, x0, x8
+; CHECK-DARWIN-NEXT:    lsl x9, x10, x9
+; CHECK-DARWIN-NEXT:    orr x9, x9, x11
+; CHECK-DARWIN-NEXT:    lsr x10, x1, x8
+; CHECK-DARWIN-NEXT:    csel x0, x10, x9, ne
+; CHECK-DARWIN-NEXT:    csel x1, xzr, x10, ne
+; CHECK-DARWIN-NEXT:    ret
 entry:
   %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
   %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-select.ll b/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
index b04e8d922c803..1a30005fa4674 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
@@ -1,36 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve -o - < %s | FileCheck %s
 
 define  @vselect_cmp_ne( %a,  %b,  %c) {
-  ; CHECK-LABEL: vselect_cmp_ne
-  ; CHECK:       // %bb.0:
-	; CHECK-NEXT:    ptrue	p0.b
-	; CHECK-NEXT:    cmpne	p0.b, p0/z, z0.b, z1.b
-	; CHECK-NEXT:    sel	z0.b, p0, z1.b, z2.b
-	; CHECK-NEXT:    ret
+; CHECK-LABEL: vselect_cmp_ne:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT:    ret
   %cmp = icmp ne  %a, %b
   %d = select  %cmp,  %b,  %c
   ret  %d
 }
 
 define  @vselect_cmp_sgt( %a,  %b,  %c) {
-  ; CHECK-LABEL: vselect_cmp_sgt
-  ; CHECK:       // %bb.0:
-  ; CHECK-NEXT: 	ptrue	p0.b
-  ; CHECK-NEXT: 	cmpgt	p0.b, p0/z, z0.b, z1.b
-  ; CHECK-NEXT: 	sel	z0.b, p0, z1.b, z2.b
-  ; CHECK-NEXT: 	ret
+; CHECK-LABEL: vselect_cmp_sgt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT:    ret
   %cmp = icmp sgt  %a, %b
   %d = select  %cmp,  %b,  %c
   ret  %d
 }
 
 define  @vselect_cmp_ugt( %a,  %b,  %c) {
-  ; CHECK-LABEL: vselect_cmp_ugt
-  ; CHECK:       // %bb.0:
-  ; CHECK-NEXT: 	ptrue	p0.b
-  ; CHECK-NEXT: 	cmphi	p0.b, p0/z, z0.b, z1.b
-  ; CHECK-NEXT: 	sel	z0.b, p0, z1.b, z2.b
-  ; CHECK-NEXT: 	ret
+; CHECK-LABEL: vselect_cmp_ugt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT:    ret
   %cmp = icmp ugt  %a, %b
   %d = select  %cmp,  %b,  %c
   ret  %d
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 76163f37a2aca..e09a79fb838f0 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
 ; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK
 ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
@@ -21,7 +20,7 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -33,7 +32,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -113,7 +112,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -125,7 +124,7 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -205,7 +204,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -217,7 +216,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -297,7 +296,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ptrue p0.d, vl1
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -310,7 +309,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll b/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
index 4e737424506d0..ddc2f5bd81284 100644
--- a/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
@@ -177,11 +177,11 @@ define  @masked_load_sext_i8i64_parg(i8* %ap,  @masked_load_sext_i8i16_ptrue_all(i8* %ap,  %b) #0 {
 ; CHECK-LABEL: masked_load_sext_i8i16_ptrue_all:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl64
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ptrue p0.h, vl32
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ret
   %p0 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
@@ -197,12 +197,12 @@ define  @masked_load_sext_i8i16_ptrue_all(i8* %ap,  @masked_load_sext_i8i32_ptrue_all(i8* %ap,  %b) #0 {
 ; CHECK-LABEL: masked_load_sext_i8i32_ptrue_all:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl64
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ret
   %p0 = call  @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)

From 57eb5033cdffd4a4b35e2ba308876f50bc9cce62 Mon Sep 17 00:00:00 2001
From: Clint Caywood 
Date: Mon, 24 Jan 2022 09:40:38 -0800
Subject: [PATCH 426/946] [libc] Add bazel definition for hypot/hypotf.

Patch by Clint Caywood.

Differential Revision: https://reviews.llvm.org/D118053
---
 utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index e17f138e74ab9..b709565b14830 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -75,6 +75,7 @@ fputil_common_hdrs = [
     "src/__support/FPUtil/FEnvUtils.h",
     "src/__support/FPUtil/FPBits.h",
     "src/__support/FPUtil/FloatProperties.h",
+    "src/__support/FPUtil/Hypot.h",
     "src/__support/FPUtil/ManipulationFunctions.h",
     "src/__support/FPUtil/NearestIntegerOperations.h",
     "src/__support/FPUtil/NormalFloat.h",
@@ -371,6 +372,10 @@ libc_math_function(name = "frexpf")
 
 libc_math_function(name = "frexpl")
 
+libc_math_function(name = "hypot")
+
+libc_math_function(name = "hypotf")
+
 libc_math_function(name = "logb")
 
 libc_math_function(name = "logbf")

From 38e16e1cebb891ad47b85727bf46f7dac6d7da94 Mon Sep 17 00:00:00 2001
From: Hans Wennborg 
Date: Mon, 24 Jan 2022 19:01:38 +0100
Subject: [PATCH 427/946] Use -gdwarf-4 in
 compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c

otherwise the test fails after the recent DWARF 4 -> 5 default change,
see https://github.com/llvm/llvm-project/issues/53387
---
 compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
index c78614f28b11e..9aa24d72376ca 100644
--- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
+++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
@@ -1,7 +1,7 @@
 // REQUIRES: zlib
 
 // Value profiling is currently not supported in lightweight mode.
-// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: %clang_pgogen -o %t -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
 // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
 

From d27f02261442a15b0edb627023a8568735b2d110 Mon Sep 17 00:00:00 2001
From: Jeremy Morse 
Date: Mon, 24 Jan 2022 17:52:52 +0000
Subject: [PATCH 428/946] [NFC][DebugInfo] Strip out an undesired #if 0 block

As mentioned in discussion of D116821, it's better to just delete this
block than keep it hanging around.
---
 llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 3149729b92313..8f697611a82c0 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -123,14 +123,6 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
 }
 
 bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) {
-  // Enable by default on x86_64, disable if explicitly turned off on cmdline.
-  // Disabled while https://reviews.llvm.org/D116821 is investigated.
-#if 0
-  if (T.getArch() == llvm::Triple::x86_64 &&
-      ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE)
-    return true;
-#endif
-
-  // Otherwise: enable if explicitly requested on command line.
+  // Enable if explicitly requested on command line.
   return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE;
 }

From 830df62a07031d84257a8a208798a6a2b4c0461a Mon Sep 17 00:00:00 2001
From: Florian Hahn 
Date: Mon, 24 Jan 2022 18:21:52 +0000
Subject: [PATCH 429/946] [ConstraintElimination] Add test from PR53123.

---
 .../ConstraintElimination/sub-nuw.ll          | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
index bcefc9930f3ca..4d3b259b0143b 100644
--- a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
+++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
@@ -237,4 +237,38 @@ if.end:                                           ; preds = %entry
   ret void
 }
 
+define i16 @test_pr53123_sub_constraint_sign(i16 %v) {
+; CHECK-LABEL: @test_pr53123_sub_constraint_sign(
+; CHECK-NEXT:  bb.0:
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i16 32767, [[V:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i16 [[V]], [[SUB]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[BB_2:%.*]], label [[BB_1:%.*]]
+; CHECK:       bb.1:
+; CHECK-NEXT:    [[ADD:%.*]] = shl nuw nsw i16 [[V]], 1
+; CHECK-NEXT:    [[SUB9:%.*]] = sub nuw nsw i16 32767, [[ADD]]
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp ugt i16 [[ADD]], [[SUB9]]
+; CHECK-NEXT:    br i1 false, label [[BB_3:%.*]], label [[BB_2]]
+; CHECK:       bb.2:
+; CHECK-NEXT:    ret i16 1
+; CHECK:       bb.3:
+; CHECK-NEXT:    ret i16 0
+;
+bb.0:
+  %sub = sub nuw nsw i16 32767, %v
+  %cmp1 = icmp ugt i16 %v, %sub
+  br i1 %cmp1, label %bb.2, label %bb.1
+
+bb.1:
+  %add = shl nuw nsw i16 %v, 1
+  %sub9 = sub nuw nsw i16 32767, %add
+  %cmp11 = icmp ugt i16 %add, %sub9
+  br i1 %cmp11, label %bb.3, label %bb.2
+
+bb.2:
+  ret i16 1
+
+bb.3:
+  ret i16 0
+}
+
 declare void @use(i1)

From 8a15caaae56182815839741de414a0ba60037a9a Mon Sep 17 00:00:00 2001
From: Florian Hahn 
Date: Mon, 24 Jan 2022 18:32:32 +0000
Subject: [PATCH 430/946] [ConstraintElimination] Fix sign of sub
 decomposition.

Update the decomposition code to make sure the right coefficient (-1) is
used for the second operand of the subtract.

Fixes PR53123.
---
 llvm/lib/Transforms/Scalar/ConstraintElimination.cpp  | 2 +-
 llvm/test/Transforms/ConstraintElimination/sub-nuw.ll | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 7f2d5d7d99871..fcb88b54a094c 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -108,7 +108,7 @@ static SmallVector, 4> decompose(Value *V) {
   if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))))
     return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}};
   if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
-    return {{0, nullptr}, {1, Op0}, {1, Op1}};
+    return {{0, nullptr}, {1, Op0}, {-1, Op1}};
 
   return {{0, nullptr}, {1, V}};
 }
diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
index 4d3b259b0143b..25594db9905d0 100644
--- a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
+++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
@@ -247,7 +247,7 @@ define i16 @test_pr53123_sub_constraint_sign(i16 %v) {
 ; CHECK-NEXT:    [[ADD:%.*]] = shl nuw nsw i16 [[V]], 1
 ; CHECK-NEXT:    [[SUB9:%.*]] = sub nuw nsw i16 32767, [[ADD]]
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp ugt i16 [[ADD]], [[SUB9]]
-; CHECK-NEXT:    br i1 false, label [[BB_3:%.*]], label [[BB_2]]
+; CHECK-NEXT:    br i1 [[CMP11]], label [[BB_3:%.*]], label [[BB_2]]
 ; CHECK:       bb.2:
 ; CHECK-NEXT:    ret i16 1
 ; CHECK:       bb.3:

From 0a3d946e7bb4cd3519370c879dac7cbb58d13a55 Mon Sep 17 00:00:00 2001
From: John Ericson 
Date: Sun, 23 Jan 2022 05:30:32 +0000
Subject: [PATCH 431/946] [libc][cmake] Make `add_tablegen` calls match others

in all the other `add_tablegen` calls, the project name is so transformed so it
can be a prefix of a CMake variable. I think it is better to do do that here
too for consistency.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D117979
---
 libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt | 2 +-
 libc/utils/tools/WrapperGen/CMakeLists.txt        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt b/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt
index c90fde76dd58c..9e25c21c6b359 100644
--- a/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt
+++ b/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_tablegen(libc-prototype-testgen llvm-libc
+add_tablegen(libc-prototype-testgen LLVM_LIBC
   PrototypeTestGen.cpp
 )
 target_link_libraries(libc-prototype-testgen PRIVATE LibcTableGenUtil)
diff --git a/libc/utils/tools/WrapperGen/CMakeLists.txt b/libc/utils/tools/WrapperGen/CMakeLists.txt
index fe8ffcce94a58..5fd78591e9b3c 100644
--- a/libc/utils/tools/WrapperGen/CMakeLists.txt
+++ b/libc/utils/tools/WrapperGen/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(LLVM_LINK_COMPONENTS Support)
 
-add_tablegen(libc-wrappergen llvm-libc
+add_tablegen(libc-wrappergen LLVM_LIBC
   Main.cpp
 )
 

From eadf7268d578396d4f2fc2a0f7eda8096c041007 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer 
Date: Sat, 15 Jan 2022 13:29:26 -0500
Subject: [PATCH 432/946] [libc++] Fix bugs in common_iterator; add test
 coverage.

Differential Revision: https://reviews.llvm.org/D117400
---
 libcxx/docs/Status/Cxx2bIssues.csv            |   4 +-
 libcxx/include/__iterator/common_iterator.h   |  67 ++++------
 .../constraints.compile.pass.cpp              |  28 ++++
 .../iterators.common/ctor.converting.pass.cpp |  48 +++++++
 .../iterators.common/ctor.default.pass.cpp    |  41 ++++++
 .../iterators.common/ctor.iter.pass.cpp       |  50 ++++++++
 .../iterators.common/ctor.pass.cpp            |  90 -------------
 .../iterators.common/ctor.sentinel.pass.cpp   |  63 +++++++++
 .../iterators.common/iter_move.pass.cpp       |  74 ++++++++---
 .../iterators.common/iter_swap.pass.cpp       | 121 +++++++++++++-----
 .../predef.iterators/iterators.common/types.h |  26 ----
 libcxx/test/support/test_iterators.h          |  32 -----
 12 files changed, 408 insertions(+), 236 deletions(-)
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp
 delete mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index cbc12d27a5614..9baefabe05da8 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -126,7 +126,7 @@
 `3571 `__,"``flush_emit`` should set ``badbit`` if the ``emit`` call fails","October 2021","",""
 `3572 `__,"``copyable-box`` should be fully ``constexpr``","October 2021","","","|ranges|"
 `3573 `__,"Missing Throws element for ``basic_string_view(It begin, End end)``","October 2021","|Complete|","14.0"
-`3574 `__,"``common_iterator`` should be completely ``constexpr``-able","October 2021","","","|ranges|"
+`3574 `__,"``common_iterator`` should be completely ``constexpr``-able","October 2021","|Complete|","14.0","|ranges|"
 `3580 `__,"``iota_view``'s ``iterator``'s binary ``operator+`` should be improved","October 2021","","","|ranges|"
 `3581 `__,"The range constructor makes ``basic_string_view`` not trivially move constructible","October 2021","","","|ranges|"
 `3585 `__,"``variant`` converting assignment with immovable alternative","October 2021","",""
@@ -135,7 +135,7 @@
 `3591 `__,"``lazy_split_view::inner-iterator::base() &&`` invalidates outer iterators","October 2021","","","|ranges|"
 `3592 `__,"``lazy_split_view`` needs to check the simpleness of Pattern","October 2021","","","|ranges|"
 `3593 `__,"Several iterators' ``base() const &`` and ``lazy_split_view::outer-iterator::value_type::end()`` missing ``noexcept``","October 2021","","","|ranges|"
-`3595 `__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","","","|ranges|"
+`3595 `__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","|Complete|","14.0","|ranges|"
 "","","","",""
 `3645 `__,"``resize_and_overwrite`` is overspecified to call its callback with lvalues", "Not voted in","|Complete|","14.0",""
 "","","","",""
diff --git a/libcxx/include/__iterator/common_iterator.h b/libcxx/include/__iterator/common_iterator.h
index 9a142769e55a7..df4c7bd18e8d2 100644
--- a/libcxx/include/__iterator/common_iterator.h
+++ b/libcxx/include/__iterator/common_iterator.h
@@ -41,7 +41,7 @@ class common_iterator {
       : __value(_VSTD::move(__x)) {}
 
   public:
-    const iter_value_t<_Iter>* operator->() const {
+    constexpr const iter_value_t<_Iter>* operator->() const noexcept {
       return _VSTD::addressof(__value);
     }
   };
@@ -58,7 +58,7 @@ class common_iterator {
       constructible_from, iter_reference_t<_Iter>> &&
       move_constructible>;
 
-    const iter_value_t<_Iter>& operator*() const {
+    constexpr const iter_value_t<_Iter>& operator*() const noexcept {
       return __value;
     }
   };
@@ -75,7 +75,7 @@ class common_iterator {
     requires convertible_to && convertible_to
   constexpr common_iterator(const common_iterator<_I2, _S2>& __other)
     : __hold_([&]() -> variant<_Iter, _Sent> {
-      _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Constructed from valueless iterator.");
+      _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Attempted to construct from a valueless common_iterator");
       if (__other.__hold_.index() == 0)
         return variant<_Iter, _Sent>{in_place_index<0>, _VSTD::__unchecked_get<0>(__other.__hold_)};
       return variant<_Iter, _Sent>{in_place_index<1>, _VSTD::__unchecked_get<1>(__other.__hold_)};
@@ -85,7 +85,7 @@ class common_iterator {
     requires convertible_to && convertible_to &&
              assignable_from<_Iter&, const _I2&> && assignable_from<_Sent&, const _S2&>
   common_iterator& operator=(const common_iterator<_I2, _S2>& __other) {
-    _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Assigned from valueless iterator.");
+    _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Attempted to assign from a valueless common_iterator");
 
     auto __idx = __hold_.index();
     auto __other_idx = __other.__hold_.index();
@@ -105,18 +105,16 @@ class common_iterator {
     return *this;
   }
 
-  decltype(auto) operator*()
+  constexpr decltype(auto) operator*()
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot dereference sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to dereference a non-dereferenceable common_iterator");
     return *_VSTD::__unchecked_get<_Iter>(__hold_);
   }
 
-  decltype(auto) operator*() const
+  constexpr decltype(auto) operator*() const
     requires __dereferenceable
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot dereference sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to dereference a non-dereferenceable common_iterator");
     return *_VSTD::__unchecked_get<_Iter>(__hold_);
   }
 
@@ -127,9 +125,7 @@ class common_iterator {
      is_reference_v> ||
      constructible_from, iter_reference_t<_I2>>)
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot dereference sentinel. Common iterator not holding an iterator.");
-
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to dereference a non-dereferenceable common_iterator");
     if constexpr (is_pointer_v<_Iter> || requires(const _Iter& __i) { __i.operator->(); })    {
       return _VSTD::__unchecked_get<_Iter>(__hold_);
     } else if constexpr (is_reference_v>) {
@@ -141,15 +137,12 @@ class common_iterator {
   }
 
   common_iterator& operator++() {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot increment sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to increment a non-dereferenceable common_iterator");
     ++_VSTD::__unchecked_get<_Iter>(__hold_); return *this;
   }
 
   decltype(auto) operator++(int) {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot increment sentinel. Common iterator not holding an iterator.");
-
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to increment a non-dereferenceable common_iterator");
     if constexpr (forward_iterator<_Iter>) {
       auto __tmp = *this;
       ++*this;
@@ -166,10 +159,9 @@ class common_iterator {
 
   template _S2>
     requires sentinel_for<_Sent, _I2>
-  friend bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
-    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception() &&
-                   !__y.__hold_.valueless_by_exception(),
-                   "One or both common_iterators are valueless. (Cannot compare valueless iterators.)");
+  friend constexpr bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
+    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
+    _LIBCPP_ASSERT(!__y.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
 
     auto __x_index = __x.__hold_.index();
     auto __y_index = __y.__hold_.index();
@@ -185,10 +177,9 @@ class common_iterator {
 
   template _S2>
     requires sentinel_for<_Sent, _I2> && equality_comparable_with<_Iter, _I2>
-  friend bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
-    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception() &&
-                   !__y.__hold_.valueless_by_exception(),
-                   "One or both common_iterators are valueless. (Cannot compare valueless iterators.)");
+  friend constexpr bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
+    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
+    _LIBCPP_ASSERT(!__y.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
 
     auto __x_index = __x.__hold_.index();
     auto __y_index = __y.__hold_.index();
@@ -207,10 +198,9 @@ class common_iterator {
 
   template _I2, sized_sentinel_for<_Iter> _S2>
     requires sized_sentinel_for<_Sent, _I2>
-  friend iter_difference_t<_I2> operator-(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
-    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception() &&
-                   !__y.__hold_.valueless_by_exception(),
-                   "One or both common_iterators are valueless. (Cannot subtract valueless iterators.)");
+  friend constexpr iter_difference_t<_I2> operator-(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
+    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception(), "Attempted to subtract from a valueless common_iterator");
+    _LIBCPP_ASSERT(!__y.__hold_.valueless_by_exception(), "Attempted to subtract a valueless common_iterator");
 
     auto __x_index = __x.__hold_.index();
     auto __y_index = __y.__hold_.index();
@@ -227,24 +217,21 @@ class common_iterator {
     return _VSTD::__unchecked_get<_Sent>(__x.__hold_) - _VSTD::__unchecked_get<_I2>(__y.__hold_);
   }
 
-  friend iter_rvalue_reference_t<_Iter> iter_move(const common_iterator& __i)
+  friend constexpr iter_rvalue_reference_t<_Iter> iter_move(const common_iterator& __i)
     noexcept(noexcept(ranges::iter_move(declval())))
       requires input_iterator<_Iter>
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__i.__hold_),
-                   "Cannot iter_move a sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__i.__hold_), "Attempted to iter_move a non-dereferenceable common_iterator");
     return ranges::iter_move( _VSTD::__unchecked_get<_Iter>(__i.__hold_));
   }
 
   template _I2, class _S2>
-  friend void iter_swap(const common_iterator& __x, const common_iterator<_I2, _S2>& __y)
+  friend constexpr void iter_swap(const common_iterator& __x, const common_iterator<_I2, _S2>& __y)
       noexcept(noexcept(ranges::iter_swap(declval(), declval())))
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__x.__hold_),
-                   "Cannot swap __y with a sentinel. Common iterator (__x) not holding an iterator.");
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__y.__hold_),
-                   "Cannot swap __x with a sentinel. Common iterator (__y) not holding an iterator.");
-    return ranges::iter_swap( _VSTD::__unchecked_get<_Iter>(__x.__hold_),  _VSTD::__unchecked_get<_Iter>(__y.__hold_));
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__x.__hold_), "Attempted to iter_swap a non-dereferenceable common_iterator");
+    _LIBCPP_ASSERT(holds_alternative<_I2>(__y.__hold_), "Attempted to iter_swap a non-dereferenceable common_iterator");
+    return ranges::iter_swap(_VSTD::__unchecked_get<_Iter>(__x.__hold_), _VSTD::__unchecked_get<_I2>(__y.__hold_));
   }
 };
 
@@ -271,7 +258,7 @@ struct __arrow_type_or_void {
 template
   requires __common_iter_has_ptr_op<_Iter, _Sent>
 struct __arrow_type_or_void<_Iter, _Sent> {
-    using type = decltype(declval>().operator->());
+    using type = decltype(declval&>().operator->());
 };
 
 template
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp
new file mode 100644
index 0000000000000..3eb99e1428673
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// template S>
+//   requires (!same_as && copyable)
+
+#include 
+
+#include "test_iterators.h"
+
+template
+concept ValidCommonIterator = requires {
+  typename std::common_iterator;
+};
+
+static_assert( ValidCommonIterator);
+static_assert(!ValidCommonIterator); // !input_or_output_iterator
+static_assert(!ValidCommonIterator); // !sentinel_for
+static_assert(!ValidCommonIterator); // !same_as
+static_assert(!ValidCommonIterator, sentinel_wrapper>>); // !copyable
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp
new file mode 100644
index 0000000000000..b28120537b5a4
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// template
+//   requires convertible_to && convertible_to
+//     constexpr common_iterator(const common_iterator& x);
+
+#include 
+#include 
+
+#include "test_macros.h"
+
+constexpr bool test()
+{
+  struct Base {};
+  struct Derived : Base {};
+
+  using BaseIt = std::common_iterator;
+  using DerivedIt = std::common_iterator;
+  static_assert(std::is_convertible_v); // Derived* to Base*
+  static_assert(!std::is_constructible_v); // Base* to Derived*
+
+  Derived a[10] = {};
+  DerivedIt it = DerivedIt(a); // the iterator type
+  BaseIt jt = BaseIt(it);
+  assert(jt == BaseIt(a));
+
+  it = DerivedIt((const Derived*)a); // the sentinel type
+  jt = BaseIt(it);
+  assert(jt == BaseIt(a));
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp
new file mode 100644
index 0000000000000..199ceb66893e6
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// constexpr common_iterator() requires default_initializable = default;
+
+#include 
+#include 
+
+#include "test_iterators.h"
+
+constexpr bool test()
+{
+  {
+    using It = cpp17_input_iterator;
+    using CommonIt = std::common_iterator>;
+    static_assert(!std::is_default_constructible_v); // premise
+    static_assert(!std::is_default_constructible_v); // conclusion
+  }
+  {
+    // The base iterator is value-initialized.
+    std::common_iterator> c;
+    assert(c == nullptr);
+  }
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp
new file mode 100644
index 0000000000000..fd47612eca389
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// constexpr common_iterator(I i);
+
+#include 
+#include 
+
+#include "test_iterators.h"
+
+template
+constexpr bool test() {
+  using CommonIt = std::common_iterator>;
+  int a[] = {1,2,3};
+  It it = It(a);
+  CommonIt lv = CommonIt(it);
+  assert(&*lv == a);
+  CommonIt rv = CommonIt(std::move(it));
+  assert(&*rv == a);
+
+  return true;
+}
+
+int main(int, char**) {
+  test>();
+  test>();
+  test>();
+  test>();
+  test>();
+  test();
+  test();
+
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test());
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp
deleted file mode 100644
index c329e8b8a81a7..0000000000000
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// constexpr common_iterator() requires default_initializable = default;
-// constexpr common_iterator(I i);
-// constexpr common_iterator(S s);
-// template
-//   requires convertible_to && convertible_to
-//     constexpr common_iterator(const common_iterator& x);
-
-#include 
-#include 
-
-#include "test_macros.h"
-#include "types.h"
-
-template
-concept ValidCommonIterator = requires {
-  typename std::common_iterator;
-};
-
-template
-concept ConvCtorEnabled = requires(std::common_iterator> ci) {
-  std::common_iterator>(ci);
-};
-
-void test() {
-  int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
-
-  static_assert( std::is_default_constructible_v>>);
-  static_assert(!std::is_default_constructible_v, sentinel_type>>);
-
-  // Not copyable:
-  static_assert(!ValidCommonIterator, sentinel_type>);
-  // Same iter and sent:
-  static_assert(!ValidCommonIterator, cpp17_input_iterator>);
-
-  {
-    auto iter1 = cpp17_input_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    auto commonSent1 = std::common_iterator>(sentinel_type{buffer + 8});
-
-    assert(*iter1 == 1);
-    assert(*commonIter1 == 1);
-    assert(commonIter1 != commonSent1);
-  }
-  {
-    auto iter1 = forward_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    auto commonSent1 = std::common_iterator>(sentinel_type{buffer + 8});
-
-    assert(*iter1 == 1);
-    assert(*commonIter1 == 1);
-    assert(commonIter1 != commonSent1);
-  }
-  {
-    auto iter1 = random_access_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    auto commonSent1 = std::common_iterator>(sentinel_type{buffer + 8});
-
-    assert(*iter1 == 1);
-    assert(*commonIter1 == 1);
-    assert(commonIter1 != commonSent1);
-  }
-
-  // Conversion constructor:
-  {
-    convertible_iterator conv{buffer};
-    auto commonIter1 = std::common_iterator, sentinel_type>(conv);
-    auto commonIter2 = std::common_iterator, sentinel_type>(commonIter1);
-    assert(*commonIter2 == 1);
-
-    static_assert( ConvCtorEnabled, convertible_iterator>);
-    static_assert(!ConvCtorEnabled, random_access_iterator>);
-  }
-}
-
-int main(int, char**) {
-  test();
-
-  return 0;
-}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp
new file mode 100644
index 0000000000000..c6c0f301ead99
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// constexpr common_iterator(S s);
+
+#include 
+#include 
+#include 
+
+#include "test_iterators.h"
+
+template
+constexpr bool test() {
+  using Sent = sentinel_wrapper;
+  using CommonIt = std::common_iterator;
+  int a[] = {1,2,3};
+  It it = It(a);
+  Sent sent = Sent(It(a+1));
+
+  CommonIt lv = CommonIt(sent);
+  assert(lv == CommonIt(sent));
+  assert(lv != CommonIt(it));
+  if (!std::is_constant_evaluated()) {
+    assert(lv == std::next(CommonIt(it)));
+  }
+
+  CommonIt rv = CommonIt(std::move(sent));
+  assert(rv == CommonIt(sent));
+  assert(rv != CommonIt(it));
+  if (!std::is_constant_evaluated()) {
+    assert(rv == std::next(CommonIt(it)));
+  }
+
+  return true;
+}
+
+int main(int, char**) {
+  test>();
+  test>();
+  test>();
+  test>();
+  test>();
+  test();
+  test();
+
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test>());
+  static_assert(test());
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp
index 0bb4d455c34d3..4c8ae2dae729d 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp
@@ -15,35 +15,79 @@
 
 #include 
 #include 
+#include 
 
+#include "test_iterators.h"
 #include "test_macros.h"
-#include "types.h"
 
-void test() {
-  int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+struct IterMovingIt {
+  using value_type = int;
+  using difference_type = int;
+  explicit IterMovingIt() = default;
+  IterMovingIt(const IterMovingIt&); // copyable, but this test shouldn't make copies
+  IterMovingIt(IterMovingIt&&) = default;
+  IterMovingIt& operator=(const IterMovingIt&);
+  int& operator*() const;
+  constexpr IterMovingIt& operator++() { return *this; }
+  IterMovingIt operator++(int);
+  friend constexpr int iter_move(const IterMovingIt&) {
+    return 42;
+  }
+  bool operator==(std::default_sentinel_t) const;
+};
+static_assert(std::input_iterator);
 
+constexpr bool test() {
   {
-    auto iter1 = cpp17_input_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    assert(std::ranges::iter_move(commonIter1) == 1);
-    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(commonIter1)), int&&);
+    using It = int*;
+    using CommonIt = std::common_iterator>;
+    int a[] = {1, 2, 3};
+    CommonIt it = CommonIt(It(a));
+    ASSERT_NOEXCEPT(iter_move(it));
+    ASSERT_NOEXCEPT(std::ranges::iter_move(it));
+    ASSERT_SAME_TYPE(decltype(iter_move(it)), int&&);
+    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(it)), int&&);
+    assert(iter_move(it) == 1);
+    if (!std::is_constant_evaluated()) {
+      ++it;
+      assert(iter_move(it) == 2);
+    }
   }
   {
-    auto iter1 = forward_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    assert(std::ranges::iter_move(commonIter1) == 1);
-    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(commonIter1)), int&&);
+    using It = const int*;
+    using CommonIt = std::common_iterator>;
+    int a[] = {1, 2, 3};
+    CommonIt it = CommonIt(It(a));
+    ASSERT_NOEXCEPT(iter_move(it));
+    ASSERT_NOEXCEPT(std::ranges::iter_move(it));
+    ASSERT_SAME_TYPE(decltype(iter_move(it)), const int&&);
+    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(it)), const int&&);
+    assert(iter_move(it) == 1);
+    if (!std::is_constant_evaluated()) {
+      ++it;
+      assert(iter_move(it) == 2);
+    }
   }
   {
-    auto iter1 = random_access_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    assert(std::ranges::iter_move(commonIter1) == 1);
-    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(commonIter1)), int&&);
+    using It = IterMovingIt;
+    using CommonIt = std::common_iterator;
+    CommonIt it = CommonIt(It());
+    ASSERT_NOT_NOEXCEPT(iter_move(it));
+    ASSERT_NOT_NOEXCEPT(std::ranges::iter_move(it));
+    ASSERT_SAME_TYPE(decltype(iter_move(it)), int);
+    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(it)), int);
+    assert(iter_move(it) == 42);
+    if (!std::is_constant_evaluated()) {
+      ++it;
+      assert(iter_move(it) == 42);
+    }
   }
+  return true;
 }
 
 int main(int, char**) {
   test();
+  static_assert(test());
 
   return 0;
 }
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp
index f649d334b9cb6..3b69a6d7bd4b5 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp
@@ -10,55 +10,114 @@
 // UNSUPPORTED: libcpp-no-concepts
 
 // template I2, class S2>
-//   friend void iter_swap(const common_iterator& x, const common_iterator& y)
+//   friend constexpr void iter_swap(const common_iterator& x, const common_iterator& y)
 //     noexcept(noexcept(ranges::iter_swap(declval(), declval())));
 
 #include 
 #include 
+#include 
 
+#include "test_iterators.h"
 #include "test_macros.h"
-#include "types.h"
 
-void test() {
-  int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+template
+struct IterSwappingIt {
+  using value_type = int;
+  using difference_type = int;
+  constexpr explicit IterSwappingIt(int *swaps) : swaps_(swaps) {}
+  IterSwappingIt(const IterSwappingIt&); // copyable, but this test shouldn't make copies
+  IterSwappingIt(IterSwappingIt&&) = default;
+  IterSwappingIt& operator=(const IterSwappingIt&);
+  int& operator*() const;
+  constexpr IterSwappingIt& operator++() { return *this; }
+  IterSwappingIt operator++(int);
 
+  template
+  friend constexpr int iter_swap(const IterSwappingIt& lhs, const IterSwappingIt& rhs) {
+    *lhs.swaps_ += 10;
+    *rhs.swaps_ += 1;
+    return 42; // should be accepted but ignored
+  }
+
+  bool operator==(std::default_sentinel_t) const;
+
+  int *swaps_ = nullptr;
+};
+static_assert(std::input_iterator>);
+static_assert(std::indirectly_swappable, IterSwappingIt<0>>);
+static_assert(std::indirectly_swappable, IterSwappingIt<1>>);
+
+constexpr bool test() {
   {
-    auto iter1 = cpp17_input_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    auto commonIter2 = std::common_iterator>(iter1);
-    for (auto i = 0; i < 4; ++i) ++commonIter2;
-    assert(*commonIter2 == 5);
-    std::ranges::iter_swap(commonIter1, commonIter2);
-    assert(*commonIter1 == 5);
-    assert(*commonIter2 == 1);
-    std::ranges::iter_swap(commonIter2, commonIter1);
+    using It = int*;
+    using CommonIt = std::common_iterator>;
+    static_assert(std::indirectly_swappable);
+
+    int a[] = {1, 2, 3};
+    CommonIt it = CommonIt(It(a));
+    CommonIt jt = CommonIt(It(a+1));
+    ASSERT_NOEXCEPT(iter_swap(it, jt));
+    ASSERT_SAME_TYPE(decltype(iter_swap(it, jt)), void);
+    iter_swap(it, jt);
+    assert(a[0] == 2);
+    assert(a[1] == 1);
   }
   {
-    auto iter1 = forward_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    auto commonIter2 = std::common_iterator>(iter1);
-    for (auto i = 0; i < 4; ++i) ++commonIter2;
-    assert(*commonIter2 == 5);
-    std::ranges::iter_swap(commonIter1, commonIter2);
-    assert(*commonIter1 == 5);
-    assert(*commonIter2 == 1);
-    std::ranges::iter_swap(commonIter2, commonIter1);
+    using It = const int*;
+    using CommonIt = std::common_iterator>;
+    static_assert(!std::indirectly_swappable);
   }
   {
-    auto iter1 = random_access_iterator(buffer);
-    auto commonIter1 = std::common_iterator>(iter1);
-    auto commonIter2 = std::common_iterator>(iter1);
-    for (auto i = 0; i < 4; ++i) ++commonIter2;
-    assert(*commonIter2 == 5);
-    std::ranges::iter_swap(commonIter1, commonIter2);
-    assert(*commonIter1 == 5);
-    assert(*commonIter2 == 1);
-    std::ranges::iter_swap(commonIter2, commonIter1);
+    using It = IterSwappingIt<0>;
+    using CommonIt = std::common_iterator;
+    static_assert(std::indirectly_swappable);
+
+    int iswaps = 100;
+    int jswaps = 100;
+    CommonIt it = CommonIt(It(&iswaps));
+    CommonIt jt = CommonIt(It(&jswaps));
+    ASSERT_NOT_NOEXCEPT(iter_swap(it, jt));
+    ASSERT_SAME_TYPE(decltype(iter_swap(it, jt)), void);
+    iter_swap(it, jt); // lvalue iterators
+    assert(iswaps == 110);
+    assert(jswaps == 101);
+    iter_swap(CommonIt(It(&iswaps)), CommonIt(It(&jswaps))); // rvalue iterators
+    assert(iswaps == 120);
+    assert(jswaps == 102);
+    std::ranges::iter_swap(it, jt);
+    assert(iswaps == 130);
+    assert(jswaps == 103);
+  }
+  {
+    using It = IterSwappingIt<0>;
+    using Jt = IterSwappingIt<1>;
+    static_assert(std::indirectly_swappable);
+    using CommonIt = std::common_iterator;
+    using CommonJt = std::common_iterator;
+    static_assert(std::indirectly_swappable);
+
+    int iswaps = 100;
+    int jswaps = 100;
+    CommonIt it = CommonIt(It(&iswaps));
+    CommonJt jt = CommonJt(Jt(&jswaps));
+    ASSERT_NOT_NOEXCEPT(iter_swap(it, jt));
+    ASSERT_SAME_TYPE(decltype(iter_swap(it, jt)), void);
+    iter_swap(it, jt); // lvalue iterators
+    assert(iswaps == 110);
+    assert(jswaps == 101);
+    iter_swap(CommonIt(It(&iswaps)), CommonJt(Jt(&jswaps))); // rvalue iterators
+    assert(iswaps == 120);
+    assert(jswaps == 102);
+    std::ranges::iter_swap(it, jt);
+    assert(iswaps == 130);
+    assert(jswaps == 103);
   }
+  return true;
 }
 
 int main(int, char**) {
   test();
+  static_assert(test());
 
   return 0;
 }
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h b/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
index d5068b3cf013f..03b94f63f631e 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
@@ -157,32 +157,6 @@ class comparable_iterator
     }
 };
 
-template 
-class convertible_iterator
-{
-    It it_;
-
-public:
-    typedef          std::input_iterator_tag                   iterator_category;
-    typedef typename std::iterator_traits::value_type      value_type;
-    typedef typename std::iterator_traits::difference_type difference_type;
-    typedef It                                                 pointer;
-    typedef typename std::iterator_traits::reference       reference;
-
-    constexpr It base() const {return it_;}
-
-    convertible_iterator() = default;
-    explicit constexpr convertible_iterator(It it) : it_(it) {}
-
-    constexpr reference operator*() const {return *it_;}
-
-    constexpr convertible_iterator& operator++() {++it_; return *this;}
-    constexpr convertible_iterator operator++(int)
-        {convertible_iterator tmp(*this); ++(*this); return tmp;}
-
-    operator forward_iterator() const { return forward_iterator(it_); }
-};
-
 template 
 class non_const_deref_iterator
 {
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 5e084d3222c68..60313a5889a52 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -117,38 +117,6 @@ class forward_iterator
     void operator,(T const &) = delete;
 };
 
-template 
-class non_default_constructible_iterator
-{
-    It it_;
-
-    template  friend class non_default_constructible_iterator;
-public:
-    typedef          std::input_iterator_tag                   iterator_category;
-    typedef typename std::iterator_traits::value_type      value_type;
-    typedef typename std::iterator_traits::difference_type difference_type;
-    typedef It                                                 pointer;
-    typedef typename std::iterator_traits::reference       reference;
-
-    non_default_constructible_iterator() = delete;
-
-    TEST_CONSTEXPR explicit non_default_constructible_iterator(It it) : it_(it) {}
-    template 
-        TEST_CONSTEXPR non_default_constructible_iterator(const non_default_constructible_iterator& u) : it_(u.it_) {}
-
-    TEST_CONSTEXPR reference operator*() const {return *it_;}
-    TEST_CONSTEXPR pointer operator->() const {return it_;}
-
-    TEST_CONSTEXPR_CXX14 non_default_constructible_iterator& operator++() {++it_; return *this;}
-    TEST_CONSTEXPR_CXX14 non_default_constructible_iterator operator++(int) {return non_default_constructible_iterator(it_++);}
-
-    friend TEST_CONSTEXPR bool operator==(const non_default_constructible_iterator& x, const non_default_constructible_iterator& y) {return x.it_ == y.it_;}
-    friend TEST_CONSTEXPR bool operator!=(const non_default_constructible_iterator& x, const non_default_constructible_iterator& y) {return x.it_ != y.it_;}
-
-    template 
-    void operator,(T const &) = delete;
-};
-
 template 
 class bidirectional_iterator
 {

From 4684857abfd7cadde9693eed8cfd21446047c579 Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Sat, 18 Dec 2021 15:03:26 +0100
Subject: [PATCH 433/946] [libc++][format] Finish P0645 Text Formatting.

This adjust the version macro and sets it as completed. All parts of the paper
have been implemented, except for the parts replaced by later papers and
LWG-issues.

Adjusted the synopsis to match the synopsis in the Standard. Not yet
implemented parts of P2216 and P2418 still use the P0645 wording.

Completes:
- P0645 Text Formatting

Depends on D115991

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D115999
---
 libcxx/docs/ReleaseNotes.rst        |  11 +-
 libcxx/docs/Status/Cxx20.rst        |   1 +
 libcxx/docs/Status/Cxx20Papers.csv  |   2 +-
 libcxx/docs/Status/FormatIssues.csv |   2 +-
 libcxx/include/format               | 158 ++--------------------------
 5 files changed, 18 insertions(+), 156 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index e1be3a6c114c1..b35c8a3329515 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -38,10 +38,13 @@ What's New in Libc++ 14.0.0?
 New Features
 ------------
 
-- There's initial support for the C++20 header ````. The implementation
-  is incomplete. Some functions are known to be inefficient; both in memory
-  usage and performance. The implementation is considered experimental and isn't
-  considered ABI stable.
+- There's support for the C++20 header ````. Some parts are still
+  missing, most notably the compile-time format string validation. Some
+  functions are known to be inefficient, both in memory usage and performance.
+  The implementation isn't API- or ABI-stable and therefore considered
+  experimental. (Some not-yet-implemented papers require an API-break.)
+  Vendors can still disable this header by turning the CMake option
+  `LIBCXX_ENABLE_INCOMPLETE_FEATURES` off.
 
 - There's a new CMake option ``LIBCXX_ENABLE_UNICODE`` to disable Unicode
   support in the ```` header. This only affects the estimation of the
diff --git a/libcxx/docs/Status/Cxx20.rst b/libcxx/docs/Status/Cxx20.rst
index d59fd8a8b35f7..43744077638ff 100644
--- a/libcxx/docs/Status/Cxx20.rst
+++ b/libcxx/docs/Status/Cxx20.rst
@@ -41,6 +41,7 @@ Paper Status
 .. note::
 
    .. [#note-P0600] P0600: The missing bits in P0600 are in |sect|\ [mem.res.class] and |sect|\ [mem.poly.allocator.class].
+   .. [#note-P0645] P0645: The paper is implemented but still marked as an incomplete feature. Not yet implemented LWG-issues will cause API and ABI breakage.
    .. [#note-P0966] P0966: It was previously erroneously marked as complete in version 8.0. See `bug 45368 `__.
    .. [#note-P0619] P0619: Only sections D.8, D.9, D.10 and D.13 are implemented. Sections D.4, D.7, D.11, D.12, and D.14 remain undone.
    .. [#note-P0883] P0883: shared_ptr and floating-point changes weren't applied as they themselves aren't implemented yet.
diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv
index 5a974a798f7bd..9e4e7284109be 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -103,7 +103,7 @@
 "`P0466R5 `__","LWG","Layout-compatibility and Pointer-interconvertibility Traits","Cologne","",""
 "`P0553R4 `__","LWG","Bit operations","Cologne","|Complete|","9.0"
 "`P0631R8 `__","LWG","Math Constants","Cologne","|Complete|","11.0"
-"`P0645R10 `__","LWG","Text Formatting","Cologne","|In Progress|",""
+"`P0645R10 `__","LWG","Text Formatting","Cologne","|Complete| [#note-P0645]_","14.0"
 "`P0660R10 `__","LWG","Stop Token and Joining Thread, Rev 10","Cologne","",""
 "`P0784R7 `__","CWG","More constexpr containers","Cologne","|Complete|","12.0"
 "`P0980R1 `__","LWG","Making std::string constexpr","Cologne","",""
diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv
index 0984712fdb22a..b979b0de3a18d 100644
--- a/libcxx/docs/Status/FormatIssues.csv
+++ b/libcxx/docs/Status/FormatIssues.csv
@@ -1,5 +1,5 @@
 Number,Name,Assignee,Patch,Status,First released version
-`P0645 `_,"Text Formatting",Mark de Wever,,|Partial|,
+`P0645 `_,"Text Formatting",Mark de Wever,,|Complete|,Clang 14
 `P1652 `_,"Printf corner cases in std::format",Mark de Wever,"`D103433 `__, `D114001 `__",|Review|,
 `P1892 `_,"Extended locale-specific presentation specifiers for std::format",Mark de Wever,`D103368 `__,|Complete|,Clang 14
 `P1868 `_,"width: clarifying units of width and precision in std::format (Implements the unicode support.)",Mark de Wever,"`D103413 `__ `D103425 `__ `D103670 `__",|Complete|,Clang 14
diff --git a/libcxx/include/format b/libcxx/include/format
index c1f1be7d31b98..4cf146ead17a2 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -14,43 +14,15 @@
 
 namespace std {
   // [format.context], class template basic_format_context
-  template
-  class basic_format_context {
-    basic_format_args args_;      // exposition only
-    Out out_;                                           // exposition only
-
-  public:
-    using iterator = Out;
-    using char_type = charT;
-    template using formatter_type = formatter;
-
-    basic_format_arg arg(size_t id) const;
-    std::locale locale();
-
-    iterator out();
-    void advance_to(iterator it);
-  };
+  template class basic_format_context;
   using format_context = basic_format_context;
   using wformat_context = basic_format_context;
 
   // [format.args], class template basic_format_args
-  template
-  class basic_format_args {
-    size_t size_;                               // exposition only
-    const basic_format_arg* data_;     // exposition only
-
-  public:
-    basic_format_args() noexcept;
-
-    template
-      basic_format_args(const format-arg-store& store) noexcept;
-
-    basic_format_arg get(size_t i) const noexcept;
-  };
+  template class basic_format_args;
   using format_args = basic_format_args;
   using wformat_args = basic_format_args;
 
-
   // [format.functions], formatting functions
   template
     string format(string_view fmt, const Args&... args);
@@ -90,8 +62,7 @@ namespace std {
     Out out;
     iter_difference_t size;
   };
-
- template
+  template
     format_to_n_result format_to_n(Out out, iter_difference_t n,
                                         string_view fmt, const Args&... args);
   template
@@ -116,99 +87,22 @@ namespace std {
     size_t formatted_size(const locale& loc, wstring_view fmt, const Args&... args);
 
   // [format.formatter], formatter
-  template<> struct formatter;
-  template<> struct formatter;
-  template<> struct formatter;
-
-  template<> struct formatter;
-  template<> struct formatter;
-  template struct formatter;
-  template
-    struct formatter, charT>;
-  template
-    struct formatter, charT>;
+  template struct formatter;
 
   // [format.parse.ctx], class template basic_format_parse_context
-  template
-  class basic_format_parse_context {
-  public:
-    using char_type = charT;
-    using const_iterator = typename basic_string_view::const_iterator;
-    using iterator = const_iterator;
-
-  private:
-    iterator begin_;                                    // exposition only
-    iterator end_;                                      // exposition only
-    enum indexing { unknown, manual, automatic };       // exposition only
-    indexing indexing_;                                 // exposition only
-    size_t next_arg_id_;                                // exposition only
-    size_t num_args_;                                   // exposition only
-
-  public:
-    constexpr explicit basic_format_parse_context(basic_string_view fmt,
-                                                  size_t num_args = 0) noexcept;
-    basic_format_parse_context(const basic_format_parse_context&) = delete;
-    basic_format_parse_context& operator=(const basic_format_parse_context&) = delete;
-
-    constexpr const_iterator begin() const noexcept;
-    constexpr const_iterator end() const noexcept;
-    constexpr void advance_to(const_iterator it);
-
-    constexpr size_t next_arg_id();
-    constexpr void check_arg_id(size_t id);
-  };
+  template class basic_format_parse_context;
   using format_parse_context = basic_format_parse_context;
   using wformat_parse_context = basic_format_parse_context;
 
   // [format.arguments], arguments
   // [format.arg], class template basic_format_arg
-  template
-  class basic_format_arg {
-  public:
-    class handle;
-
-  private:
-    using char_type = typename Context::char_type;                              // exposition only
-
-    variant,
-            const void*, handle> value;                                         // exposition only
-
-    template explicit basic_format_arg(const T& v) noexcept;           // exposition only
-    explicit basic_format_arg(float n) noexcept;                                // exposition only
-    explicit basic_format_arg(double n) noexcept;                               // exposition only
-    explicit basic_format_arg(long double n) noexcept;                          // exposition only
-    explicit basic_format_arg(const char_type* s);                              // exposition only
-
-    template
-      explicit basic_format_arg(
-        basic_string_view s) noexcept;                       // exposition only
-
-    template
-      explicit basic_format_arg(
-        const basic_string& s) noexcept;          // exposition only
-
-    explicit basic_format_arg(nullptr_t) noexcept;                              // exposition only
-
-    template
-      explicit basic_format_arg(const T* p) noexcept;                           // exposition only
-
-  public:
-    basic_format_arg() noexcept;
-
-    explicit operator bool() const noexcept;
-  };
+  template class basic_format_arg;
 
   template
     see below visit_format_arg(Visitor&& vis, basic_format_arg arg);
 
   // [format.arg.store], class template format-arg-store
-  template
-  struct format-arg-store {      // exposition only
-    array, sizeof...(Args)> args;
-  };
+  template struct format-arg-store;      // exposition only
 
   template
     format-arg-store
@@ -218,43 +112,7 @@ namespace std {
       make_wformat_args(const Args&... args);
 
   // [format.error], class format_error
-  class format_error : public runtime_error {
-  public:
-    explicit format_error(const string& what_arg);
-    explicit format_error(const char* what_arg);
-  };
-
-  // [format.parse.ctx], class template basic_format_parse_context
-  template
-  class basic_format_parse_context {
-  public:
-    using char_type = charT;
-    using const_iterator = typename basic_string_view::const_iterator;
-    using iterator = const_iterator;
-
-  private:
-    iterator begin_;                                    // exposition only
-    iterator end_;                                      // exposition only
-    enum indexing { unknown, manual, automatic };       // exposition only
-    indexing indexing_;                                 // exposition only
-    size_t next_arg_id_;                                // exposition only
-    size_t num_args_;                                   // exposition only
-
-  public:
-    constexpr explicit basic_format_parse_context(basic_string_view fmt,
-                                                  size_t num_args = 0) noexcept;
-    basic_format_parse_context(const basic_format_parse_context&) = delete;
-    basic_format_parse_context& operator=(const basic_format_parse_context&) = delete;
-
-    constexpr const_iterator begin() const noexcept;
-    constexpr const_iterator end() const noexcept;
-    constexpr void advance_to(const_iterator it);
-
-    constexpr size_t next_arg_id();
-    constexpr void check_arg_id(size_t id);
-  };
-  using format_parse_context = basic_format_parse_context;
-  using wformat_parse_context = basic_format_parse_context;
+  class format_error;
 }
 
 */

From b1af01fe6aa7cb733461b2d475add77b947b79fb Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Mon, 24 Jan 2022 11:18:02 -0800
Subject: [PATCH 434/946] [NFC][MLGO] Simplify conditional compilation

Most of the code that's shared between 'release' and 'development'
modes doesn't depend on anything special.
---
 llvm/lib/Analysis/MLInlineAdvisor.cpp       |  9 +--------
 llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 22 +++++++++------------
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 203e0b025e6c6..0480c1cd28428 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -36,11 +36,7 @@
 
 using namespace llvm;
 
-#ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL
-#define LLVM_HAVE_TF_AOT
-#endif
-
-#if defined(LLVM_HAVE_TF_AOT)
+#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
 // codegen-ed file
 #include "InlinerSizeModel.h" // NOLINT
 
@@ -55,8 +51,6 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
 
 #define DEBUG_TYPE "inline-ml"
 
-#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
-
 static cl::opt SizeIncreaseThreshold(
     "ml-advisor-size-increase-threshold", cl::Hidden,
     cl::desc("Maximum factor by which expected native size may increase before "
@@ -417,4 +411,3 @@ void MLInlineAdvice::recordUnattemptedInliningImpl() {
     return R;
   });
 }
-#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 848f63da288de..a74c57690640c 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -42,11 +42,9 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "ml-regalloc"
-#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
-#define LLVM_HAVE_TF_AOT
-#endif
+
 // Generated header in release (AOT) mode
-#if defined LLVM_HAVE_TF_AOT
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
 #include "RegallocEvictModel.h"
 #endif
 
@@ -104,7 +102,6 @@ INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
 // ===================================
 // Common ML Advisor declarations
 // ===================================
-#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
 namespace {
 // This is the maximum number of interfererring ranges. That's the number of
 // distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
@@ -193,7 +190,9 @@ static const std::vector PerLiveRangeShape{1, NumberOfInterferences};
 // of the output tensor.
 // The contract with the model is that the output will be guaranteed to be to a
 // mask == 1 position.
-const char *const DecisionName = "index_to_evict";
+// Using a macro here to avoid 'not used' warnings (and keep cond compilation to
+// a minimum)
+#define DecisionName "index_to_evict"
 
 // Named features index.
 enum FeatureIDs {
@@ -296,13 +295,12 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor {
 // ===================================
 // Release (AOT) - specifics
 // ===================================
-#ifdef LLVM_HAVE_TF_AOT
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
 const std::array FeatureNames{
 #define _GETNAME(_, NAME, __, ___) #NAME,
     RA_EVICT_FEATURES_LIST(_GETNAME)
 #undef _GETNAME
 };
-
 class ReleaseModeEvictionAdvisorAnalysis final
     : public RegAllocEvictionAdvisorAnalysis {
 public:
@@ -331,7 +329,7 @@ class ReleaseModeEvictionAdvisorAnalysis final
   }
   std::unique_ptr> Runner;
 };
-#endif // LLVM_HAVE_TF_AOT
+#endif
 
 // ===================================
 // Development mode-specifics
@@ -852,13 +850,11 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
 }
 #endif // #ifdef LLVM_HAVE_TF_API
 
-// Release mode specific implementations
-#if defined LLVM_HAVE_TF_AOT
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
 RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
   return new ReleaseModeEvictionAdvisorAnalysis();
 }
-#endif // defined(LLVM_HAVE_TF_AOT)
-#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
+#endif
 
 // In all cases except development mode, we don't need scoring.
 #if !defined(LLVM_HAVE_TF_API)

From c27f8fb96882ee8c684d03068836cf610d4f0640 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 21 Jan 2022 09:55:34 -0800
Subject: [PATCH 435/946] [AMDGPU] Remove cndmask from readsExecAsData

Differential Revision: https://reviews.llvm.org/D117909
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp |   5 -
 llvm/test/CodeGen/AMDGPU/licm-valu.mir | 144 -------------------------
 2 files changed, 149 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3ac04ee717dea..f89f109d0d3a2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -138,11 +138,6 @@ static bool readsExecAsData(const MachineInstr &MI) {
   default:
     break;
   case AMDGPU::V_READFIRSTLANE_B32:
-  case AMDGPU::V_CNDMASK_B64_PSEUDO:
-  case AMDGPU::V_CNDMASK_B32_dpp:
-  case AMDGPU::V_CNDMASK_B32_e32:
-  case AMDGPU::V_CNDMASK_B32_e64:
-  case AMDGPU::V_CNDMASK_B32_sdwa:
     return true;
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
index 45a050cffde9c..0bf2c7c2bc3ba 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
@@ -99,147 +99,3 @@ body:             |
   bb.2:
     S_ENDPGM 0
 ...
----
-name: no_hoist_cndmask_e64
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_e64
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF1]], implicit $exec
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_e64 0, %0, 0, %0, %1, implicit $exec
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...
----
-name: no_hoist_cndmask_e32
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_e32
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 [[DEF]], [[DEF]], implicit undef $vcc, implicit $exec
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_e32 %0, %0, implicit undef $vcc, implicit $exec
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...
----
-name: no_hoist_cndmask_dpp
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_dpp
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp [[DEF]], 0, [[DEF]], 0, [[DEF]], 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_dpp %0:vgpr_32, 0, %0:vgpr_32, 0, %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...
----
-name: no_hoist_cndmask_sdwa
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_sdwa
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[DEF]], 0, [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, %0:vgpr_32, 0, %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...

From 4858fe04a1571e78ff97b778c0fb6a46855c3d6a Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani 
Date: Tue, 18 Jan 2022 12:44:48 +0100
Subject: [PATCH 436/946] [lldb/Plugins] Add ScriptedProcess::GetThreadsInfo
 interface

This patch adds a new method to the Scripted Process interface to
retrive a dictionary of Scripted Threads. It uses the thread ID as a key
and the Scripted Thread instance as the value.

This dictionary will be used to create Scripted Threads in lldb and
perform calls to the python scripted thread object.

rdar://87427126

Differential Revision: https://reviews.llvm.org/D117068

Signed-off-by: Med Ismail Bennani 
---
 .../python/scripted_process/scripted_process.py       | 11 +++++++++++
 .../lldb/Interpreter/ScriptedProcessInterface.h       |  2 ++
 .../Python/ScriptedProcessPythonInterface.cpp         | 11 +++++++++++
 .../Python/ScriptedProcessPythonInterface.h           |  2 ++
 4 files changed, 26 insertions(+)

diff --git a/lldb/examples/python/scripted_process/scripted_process.py b/lldb/examples/python/scripted_process/scripted_process.py
index 16dcc72748532..ec751b495fdb6 100644
--- a/lldb/examples/python/scripted_process/scripted_process.py
+++ b/lldb/examples/python/scripted_process/scripted_process.py
@@ -19,6 +19,7 @@ class ScriptedProcess:
     memory_regions = None
     stack_memory_dump = None
     loaded_images = None
+    threads = {}
 
     @abstractmethod
     def __init__(self, target, args):
@@ -51,6 +52,16 @@ def get_memory_region_containing_address(self, addr):
         """
         pass
 
+    def get_threads_info(self):
+        """ Get the dictionary describing the process' Scripted Threads.
+
+        Returns:
+            Dict: The dictionary of threads, with the thread ID as the key and
+            a Scripted Thread instance as the value.
+            The dictionary can be empty.
+        """
+        return self.threads
+
     @abstractmethod
     def get_thread_with_id(self, tid):
         """ Get the scripted process thread with a specific ID.
diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
index 26fd956f96bbc..efdea6df2d417 100644
--- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
@@ -41,6 +41,8 @@ class ScriptedProcessInterface : virtual public ScriptedInterface {
     return {};
   }
 
+  virtual StructuredData::DictionarySP GetThreadsInfo() { return nullptr; }
+
   virtual StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) {
     return nullptr;
   }
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
index da8ff42213552..447bceebb00b4 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
@@ -92,6 +92,17 @@ ScriptedProcessPythonInterface::GetMemoryRegionContainingAddress(
   return mem_region;
 }
 
+StructuredData::DictionarySP ScriptedProcessPythonInterface::GetThreadsInfo() {
+  Status error;
+  StructuredData::DictionarySP dict =
+      Dispatch("get_threads_info", error);
+
+  if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error))
+    return {};
+
+  return dict;
+}
+
 StructuredData::DictionarySP
 ScriptedProcessPythonInterface::GetThreadWithID(lldb::tid_t tid) {
   Status error;
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
index 421bdd59887ce..ac4e768b2d31b 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
@@ -39,6 +39,8 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface,
   GetMemoryRegionContainingAddress(lldb::addr_t address,
                                    Status &error) override;
 
+  StructuredData::DictionarySP GetThreadsInfo() override;
+
   StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) override;
 
   StructuredData::DictionarySP GetRegistersForThread(lldb::tid_t tid) override;

From 1b86344fa80bd11853e0347ea33dc6cb5a460c4f Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani 
Date: Tue, 18 Jan 2022 12:45:27 +0100
Subject: [PATCH 437/946] [lldb/Plugins] Move ScriptedThreadInterface to
 ScriptedThread

Since we can have multiple Scripted Threads per Scripted Process, having
only a single ScriptedThreadInterface (with a single object instance)
will cause the method calls to be done on the wrong object.

Instead, this patch creates a separate ScriptedThreadInterface for each
new lldb_private::ScriptedThread to make sure we interact with the right
instance.

rdar://87427911

Differential Revision: https://reviews.llvm.org/D117070

Signed-off-by: Med Ismail Bennani 
---
 lldb/include/lldb/Interpreter/ScriptedProcessInterface.h  | 4 +---
 lldb/source/Plugins/Process/scripted/ScriptedThread.cpp   | 6 ++++--
 lldb/source/Plugins/Process/scripted/ScriptedThread.h     | 1 +
 .../Python/ScriptedProcessPythonInterface.cpp             | 8 ++------
 .../Python/ScriptedProcessPythonInterface.h               | 2 +-
 5 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
index efdea6df2d417..d62767417f339 100644
--- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
@@ -68,11 +68,9 @@ class ScriptedProcessInterface : virtual public ScriptedInterface {
 
 protected:
   friend class ScriptedThread;
-  virtual lldb::ScriptedThreadInterfaceSP GetScriptedThreadInterface() {
+  virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() {
     return nullptr;
   }
-
-  lldb::ScriptedThreadInterfaceSP m_scripted_thread_interface_sp = nullptr;
 };
 
 class ScriptedThreadInterface : virtual public ScriptedInterface {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 959b8c5818852..4185e1b67587b 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -29,7 +29,9 @@ void ScriptedThread::CheckInterpreterAndScriptObject() const {
 }
 
 ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error)
-    : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process) {
+    : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process),
+      m_scripted_thread_interface_sp(
+          m_scripted_process.GetInterface().CreateScriptedThreadInterface()) {
   if (!process.IsValid()) {
     error.SetErrorString("Invalid scripted process");
     return;
@@ -190,7 +192,7 @@ void ScriptedThread::RefreshStateAfterStop() {
 }
 
 lldb::ScriptedThreadInterfaceSP ScriptedThread::GetInterface() const {
-  return m_scripted_process.GetInterface().GetScriptedThreadInterface();
+  return m_scripted_thread_interface_sp;
 }
 
 std::shared_ptr ScriptedThread::GetDynamicRegisterInfo() {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
index cdcd543702a48..54b095777ab73 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
@@ -59,6 +59,7 @@ class ScriptedThread : public lldb_private::Thread {
   std::shared_ptr GetDynamicRegisterInfo();
 
   const ScriptedProcess &m_scripted_process;
+  lldb::ScriptedThreadInterfaceSP m_scripted_thread_interface_sp = nullptr;
   std::shared_ptr m_register_info_sp = nullptr;
   lldb_private::StructuredData::ObjectSP m_script_object_sp = nullptr;
 };
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
index 447bceebb00b4..29516c4c4501e 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
@@ -165,12 +165,8 @@ ScriptedProcessPythonInterface::GetScriptedThreadPluginName() {
 }
 
 lldb::ScriptedThreadInterfaceSP
-ScriptedProcessPythonInterface::GetScriptedThreadInterface() {
-  if (!m_scripted_thread_interface_sp)
-    m_scripted_thread_interface_sp =
-        std::make_shared(m_interpreter);
-
-  return m_scripted_thread_interface_sp;
+ScriptedProcessPythonInterface::CreateScriptedThreadInterface() {
+  return std::make_shared(m_interpreter);
 }
 
 #endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
index ac4e768b2d31b..83507a93bb973 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
@@ -57,7 +57,7 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface,
   llvm::Optional GetScriptedThreadPluginName() override;
 
 private:
-  lldb::ScriptedThreadInterfaceSP GetScriptedThreadInterface() override;
+  lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override;
 };
 } // namespace lldb_private
 

From d3e0f7e1503b1bca8baa6483d3b5c452a91f60a6 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani 
Date: Tue, 18 Jan 2022 12:45:57 +0100
Subject: [PATCH 438/946] [lldb/Plugins] Add support of multiple
 ScriptedThreads in a ScriptedProcess

This patch adds support of multiple Scripted Threads in a ScriptedProcess.

This is done by fetching the Scripted Threads info dictionary at every
ScriptedProcess::DoUpdateThreadList and iterate over each element to
create a new ScriptedThread using the object instance, if it was not
already available.

This patch also adds the ability to pass a pointer of a script interpreter
object instance to initialize a ScriptedInterface instead of having to call
the script object initializer in the ScriptedInterface constructor.

This is used to instantiate the ScriptedThreadInterface from the
ScriptedThread constructor, to be able to perform call on that script
interpreter object instance.

Finally, the patch also updates the scripted process test to check for
multiple threads.

rdar://84507704

Differential Revision: https://reviews.llvm.org/D117071

Signed-off-by: Med Ismail Bennani 
---
 .../scripted_process/scripted_process.py      |  9 +--
 .../lldb/Interpreter/ScriptedInterface.h      |  3 +-
 .../Interpreter/ScriptedProcessInterface.h    |  6 +-
 .../Process/scripted/ScriptedProcess.cpp      | 61 ++++++++++++++-----
 .../Process/scripted/ScriptedThread.cpp       | 24 +++++---
 .../Plugins/Process/scripted/ScriptedThread.h |  3 +-
 .../Python/ScriptedProcessPythonInterface.cpp |  5 +-
 .../Python/ScriptedProcessPythonInterface.h   |  3 +-
 .../Python/ScriptedThreadPythonInterface.cpp  | 15 +++--
 .../Python/ScriptedThreadPythonInterface.h    |  3 +-
 .../functionalities/scripted_process/Makefile |  4 +-
 .../scripted_process/TestScriptedProcess.py   | 15 ++---
 .../invalid_scripted_process.py               |  3 +-
 .../functionalities/scripted_process/main.c   |  8 ---
 .../functionalities/scripted_process/main.cpp | 34 +++++++++++
 .../stack_core_scripted_process.py            | 53 ++++++++++++----
 16 files changed, 176 insertions(+), 73 deletions(-)
 delete mode 100644 lldb/test/API/functionalities/scripted_process/main.c
 create mode 100644 lldb/test/API/functionalities/scripted_process/main.cpp

diff --git a/lldb/examples/python/scripted_process/scripted_process.py b/lldb/examples/python/scripted_process/scripted_process.py
index ec751b495fdb6..83ec3513cfcd7 100644
--- a/lldb/examples/python/scripted_process/scripted_process.py
+++ b/lldb/examples/python/scripted_process/scripted_process.py
@@ -70,7 +70,7 @@ def get_thread_with_id(self, tid):
             tid (int): Thread ID to look for in the scripted process.
 
         Returns:
-            Dict: The thread represented as a dictionary, withr the
+            Dict: The thread represented as a dictionary, with the
                 tid thread ID. None if tid doesn't match any of the scripted
                 process threads.
         """
@@ -212,11 +212,12 @@ def __init__(self, process, args):
         self.target = None
         self.process = None
         self.args = None
-        if isinstance(process, lldb.SBProcess) and process.IsValid():
-            self.process = process
-            self.target = process.GetTarget()
+        if isinstance(process, ScriptedProcess):
+            self.target = process.target
+            self.process = self.target.GetProcess()
 
         self.id = None
+        self.idx = None
         self.name = None
         self.queue = None
         self.state = None
diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/ScriptedInterface.h
index 427fa3f4f793a..27cf9f036e5fd 100644
--- a/lldb/include/lldb/Interpreter/ScriptedInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedInterface.h
@@ -27,7 +27,8 @@ class ScriptedInterface {
 
   virtual StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) = 0;
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) = 0;
 
   template 
   Ret ErrorWithMessage(llvm::StringRef caller_name, llvm::StringRef error_msg,
diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
index d62767417f339..0712b3bf4a3ee 100644
--- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
@@ -23,7 +23,8 @@ class ScriptedProcessInterface : virtual public ScriptedInterface {
 public:
   StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override {
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override {
     return nullptr;
   }
 
@@ -77,7 +78,8 @@ class ScriptedThreadInterface : virtual public ScriptedInterface {
 public:
   StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override {
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override {
     return nullptr;
   }
 
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index cb21a3e7e65f3..f01e599ad5585 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -164,9 +164,6 @@ Status ScriptedProcess::DoLaunch(Module *exe_module,
 
   SetPrivateState(eStateStopped);
 
-  UpdateThreadListIfNeeded();
-  GetThreadList();
-
   return {};
 }
 
@@ -304,19 +301,55 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
             .str(),
         error);
 
-  lldb::ThreadSP thread_sp;
-  thread_sp = std::make_shared(*this, error);
-
-  if (!thread_sp || error.Fail())
-    return GetInterface().ErrorWithMessage(LLVM_PRETTY_FUNCTION,
-                                                 error.AsCString(), error);
+  StructuredData::DictionarySP thread_info_sp = GetInterface().GetThreadsInfo();
 
-  RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
-  if (!reg_ctx_sp)
+  if (!thread_info_sp)
     return GetInterface().ErrorWithMessage(
-        LLVM_PRETTY_FUNCTION, "Invalid Register Context", error);
-
-  new_thread_list.AddThread(thread_sp);
+        LLVM_PRETTY_FUNCTION,
+        "Couldn't fetch thread list from Scripted Process.", error);
+
+  auto create_scripted_thread =
+      [this, &old_thread_list, &error,
+       &new_thread_list](ConstString key, StructuredData::Object *val) -> bool {
+    if (!val)
+      return GetInterface().ErrorWithMessage(
+          LLVM_PRETTY_FUNCTION, "Invalid thread info object", error);
+
+    lldb::tid_t tid = LLDB_INVALID_THREAD_ID;
+    if (!llvm::to_integer(key.AsCString(), tid))
+      return GetInterface().ErrorWithMessage(LLVM_PRETTY_FUNCTION,
+                                                   "Invalid thread id", error);
+
+    if (ThreadSP thread_sp =
+            old_thread_list.FindThreadByID(tid, false /*=can_update*/)) {
+      // If the thread was already in the old_thread_list,
+      // just add it back to the new_thread_list.
+      new_thread_list.AddThread(thread_sp);
+      return true;
+    }
+
+    lldb::ThreadSP thread_sp =
+        std::make_shared(*this, error, val->GetAsGeneric());
+
+    if (!thread_sp || error.Fail())
+      return GetInterface().ErrorWithMessage(LLVM_PRETTY_FUNCTION,
+                                                   error.AsCString(), error);
+
+    RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
+    if (!reg_ctx_sp)
+      return GetInterface().ErrorWithMessage(
+          LLVM_PRETTY_FUNCTION,
+          llvm::Twine("Invalid Register Context for thread " +
+                      llvm::Twine(key.AsCString()))
+              .str(),
+          error);
+
+    new_thread_list.AddThread(thread_sp);
+
+    return true;
+  };
+
+  thread_info_sp->ForEach(create_scripted_thread);
 
   return new_thread_list.GetSize(false) > 0;
 }
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 4185e1b67587b..1b9841c2048ea 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -28,7 +28,8 @@ void ScriptedThread::CheckInterpreterAndScriptObject() const {
   lldbassert(GetInterface() && "Invalid Scripted Thread Interface.");
 }
 
-ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error)
+ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error,
+                               StructuredData::Generic *script_object)
     : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process),
       m_scripted_thread_interface_sp(
           m_scripted_process.GetInterface().CreateScriptedThreadInterface()) {
@@ -54,18 +55,23 @@ ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error)
 
   ExecutionContext exe_ctx(process);
 
-  StructuredData::GenericSP object_sp =
-      scripted_thread_interface->CreatePluginObject(
-          class_name->c_str(), exe_ctx,
-          process.m_scripted_process_info.GetArgsSP());
-  if (!object_sp || !object_sp->IsValid()) {
-    error.SetErrorString("Failed to create valid script object");
+  m_script_object_sp = scripted_thread_interface->CreatePluginObject(
+      class_name->c_str(), exe_ctx, process.m_scripted_process_info.GetArgsSP(),
+      script_object);
+
+  if (!m_script_object_sp) {
+    error.SetErrorString("Failed to create script object");
     return;
   }
 
-  m_script_object_sp = object_sp;
+  if (!m_script_object_sp->IsValid()) {
+    m_script_object_sp = nullptr;
+    error.SetErrorString("Created script object is invalid");
+    return;
+  }
 
-  SetID(scripted_thread_interface->GetThreadID());
+  lldb::tid_t tid = scripted_thread_interface->GetThreadID();
+  SetID(tid);
 }
 
 ScriptedThread::~ScriptedThread() { DestroyThread(); }
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
index 54b095777ab73..d3cd26c57826d 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
@@ -26,7 +26,8 @@ namespace lldb_private {
 
 class ScriptedThread : public lldb_private::Thread {
 public:
-  ScriptedThread(ScriptedProcess &process, Status &error);
+  ScriptedThread(ScriptedProcess &process, Status &error,
+                 StructuredData::Generic *script_object = nullptr);
 
   ~ScriptedThread() override;
 
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
index 29516c4c4501e..e39f8be73e49a 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
@@ -32,7 +32,7 @@ ScriptedProcessPythonInterface::ScriptedProcessPythonInterface(
 
 StructuredData::GenericSP ScriptedProcessPythonInterface::CreatePluginObject(
     llvm::StringRef class_name, ExecutionContext &exe_ctx,
-    StructuredData::DictionarySP args_sp) {
+    StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) {
   if (class_name.empty())
     return {};
 
@@ -47,9 +47,6 @@ StructuredData::GenericSP ScriptedProcessPythonInterface::CreatePluginObject(
       class_name.str().c_str(), m_interpreter.GetDictionaryName(), target_sp,
       args_impl, error_string);
 
-  if (!ret_val)
-    return {};
-
   m_object_instance_sp =
       StructuredData::GenericSP(new StructuredPythonObject(std::move(ret_val)));
 
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
index 83507a93bb973..e34a181849eb4 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
@@ -25,7 +25,8 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface,
   StructuredData::GenericSP
   CreatePluginObject(const llvm::StringRef class_name,
                      ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override;
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override;
 
   Status Launch() override;
 
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
index fb55d44aca840..511a42fe2c26a 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
@@ -31,8 +31,7 @@ ScriptedThreadPythonInterface::ScriptedThreadPythonInterface(
 
 StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject(
     const llvm::StringRef class_name, ExecutionContext &exe_ctx,
-    StructuredData::DictionarySP args_sp) {
-
+    StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) {
   if (class_name.empty())
     return {};
 
@@ -43,9 +42,15 @@ StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject(
   Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
                  Locker::FreeLock);
 
-  PythonObject ret_val = LLDBSwigPythonCreateScriptedThread(
-      class_name.str().c_str(), m_interpreter.GetDictionaryName(), process_sp,
-      args_impl, error_string);
+  PythonObject ret_val;
+
+  if (!script_obj)
+    ret_val = LLDBSwigPythonCreateScriptedThread(
+        class_name.str().c_str(), m_interpreter.GetDictionaryName(), process_sp,
+        args_impl, error_string);
+  else
+    ret_val = PythonObject(PyRefType::Borrowed,
+                           static_cast(script_obj->GetValue()));
 
   if (!ret_val)
     return {};
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
index 4222cdfa8fcfe..59bb182ae3f3d 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
@@ -24,7 +24,8 @@ class ScriptedThreadPythonInterface : public ScriptedThreadInterface,
 
   StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override;
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override;
 
   lldb::tid_t GetThreadID() override;
 
diff --git a/lldb/test/API/functionalities/scripted_process/Makefile b/lldb/test/API/functionalities/scripted_process/Makefile
index 692ba17322859..785b17c7fd698 100644
--- a/lldb/test/API/functionalities/scripted_process/Makefile
+++ b/lldb/test/API/functionalities/scripted_process/Makefile
@@ -1,4 +1,4 @@
-C_SOURCES := main.c
-
+CXX_SOURCES := main.cpp
+ENABLE_THREADS := YES
 include Makefile.rules
 
diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index 2a5eff3122145..be55771c14fb1 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -130,7 +130,8 @@ def cleanup():
 
     def create_stack_skinny_corefile(self, file):
         self.build()
-        target, process, thread, _ = lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.c"))
+        target, process, thread, _ = lldbutil.run_to_source_breakpoint(self, "// break here",
+                                                                       lldb.SBFileSpec("main.cpp"))
         self.assertTrue(process.IsValid(), "Process is invalid.")
         # FIXME: Use SBAPI to save the process corefile.
         self.runCmd("process save-core -s stack  " + file)
@@ -186,14 +187,14 @@ def cleanup():
         self.assertTrue(process, PROCESS_IS_VALID)
         self.assertEqual(process.GetProcessID(), 42)
 
-        self.assertEqual(process.GetNumThreads(), 1)
+        self.assertEqual(process.GetNumThreads(), 3)
         thread = process.GetSelectedThread()
         self.assertTrue(thread, "Invalid thread.")
-        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-1")
+        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-0")
 
-        self.assertEqual(thread.GetNumFrames(), 3)
+        self.assertEqual(thread.GetNumFrames(), 2)
         frame = thread.GetSelectedFrame()
         self.assertTrue(frame, "Invalid frame.")
-        self.assertEqual(frame.GetFunctionName(), "bar")
-        self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
-        self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
+        # self.assertEqual(frame.GetFunctionName(), "bar")
+        # self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
+        # self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
diff --git a/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py b/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py
index 7dfc55bc30c5b..0a2977c69d457 100644
--- a/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py
@@ -9,6 +9,7 @@
 class InvalidScriptedProcess(ScriptedProcess):
     def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData):
         super().__init__(target, args)
+        self.threads[0] = InvalidScriptedThread(self, None)
 
     def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo:
         return None
@@ -81,4 +82,4 @@ def __lldb_init_module(debugger, dict):
                                      InvalidScriptedProcess.__name__))
     else:
         print("Name of the class that will manage the scripted process: '%s.%s'"
-                % (__name__, InvalidScriptedProcess.__name__))
\ No newline at end of file
+                % (__name__, InvalidScriptedProcess.__name__))
diff --git a/lldb/test/API/functionalities/scripted_process/main.c b/lldb/test/API/functionalities/scripted_process/main.c
deleted file mode 100644
index 67d3732441da2..0000000000000
--- a/lldb/test/API/functionalities/scripted_process/main.c
+++ /dev/null
@@ -1,8 +0,0 @@
-int bar(int i) {
-  int j = i * i;
-  return j; // break here
-}
-
-int foo(int i) { return bar(i); }
-
-int main() { return foo(42); }
diff --git a/lldb/test/API/functionalities/scripted_process/main.cpp b/lldb/test/API/functionalities/scripted_process/main.cpp
new file mode 100644
index 0000000000000..26dc123558921
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_process/main.cpp
@@ -0,0 +1,34 @@
+#include 
+#include 
+#include 
+
+int bar(int i) {
+  int j = i * i;
+  return j; // break here
+}
+
+int foo(int i) { return bar(i); }
+
+void call_and_wait(int &n) {
+  std::cout << "waiting for computation!" << std::endl;
+  while (n != 42 * 42)
+    ;
+  std::cout << "finished computation!" << std::endl;
+}
+
+void compute_pow(int &n) { n = foo(n); }
+
+int main() {
+  int n = 42;
+  std::mutex mutex;
+  std::unique_lock lock(mutex);
+
+  std::thread thread_1(call_and_wait, std::ref(n));
+  std::thread thread_2(compute_pow, std::ref(n));
+  lock.unlock();
+
+  thread_1.join();
+  thread_2.join();
+
+  return 0;
+}
diff --git a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
index da7c69ee7b993..ac455fe3d2717 100644
--- a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
@@ -1,4 +1,4 @@
-import os,struct,signal
+import os,json,struct,signal
 
 from typing import Any, Dict
 
@@ -21,6 +21,14 @@ def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData):
                 idx = int(self.backing_target_idx.GetStringValue(100))
             self.corefile_target = target.GetDebugger().GetTargetAtIndex(idx)
             self.corefile_process = self.corefile_target.GetProcess()
+            for corefile_thread in self.corefile_process:
+                structured_data = lldb.SBStructuredData()
+                structured_data.SetFromJSON(json.dumps({
+                    "backing_target_idx" : idx,
+                    "thread_idx" : corefile_thread.GetIndexID()
+                }))
+
+                self.threads[corefile_thread.GetThreadID()] = StackCoreScriptedThread(self, structured_data)
 
     def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo:
         mem_region = lldb.SBMemoryRegionInfo()
@@ -70,23 +78,43 @@ def get_scripted_thread_plugin(self):
 class StackCoreScriptedThread(ScriptedThread):
     def __init__(self, process, args):
         super().__init__(process, args)
-        self.backing_target_idx = args.GetValueForKey("backing_target_idx")
+        backing_target_idx = args.GetValueForKey("backing_target_idx")
+        thread_idx = args.GetValueForKey("thread_idx")
+
+        def extract_value_from_structured_data(data, default_val):
+            if data and data.IsValid():
+                if data.GetType() == lldb.eStructuredDataTypeInteger:
+                    return data.GetIntegerValue(default_val)
+                if data.GetType() == lldb.eStructuredDataTypeString:
+                    return int(data.GetStringValue(100))
+            return None
+
+        #TODO: Change to Walrus operator (:=) with oneline if assignment
+        # Requires python 3.8
+        val = extract_value_from_structured_data(thread_idx, 0)
+        if val is not None:
+            self.idx = val
 
         self.corefile_target = None
         self.corefile_process = None
-        if (self.backing_target_idx and self.backing_target_idx.IsValid()):
-            if self.backing_target_idx.GetType() == lldb.eStructuredDataTypeInteger:
-                idx = self.backing_target_idx.GetIntegerValue(42)
-            if self.backing_target_idx.GetType() == lldb.eStructuredDataTypeString:
-                idx = int(self.backing_target_idx.GetStringValue(100))
-            self.corefile_target = self.target.GetDebugger().GetTargetAtIndex(idx)
+        self.corefile_thread = None
+
+        #TODO: Change to Walrus operator (:=) with oneline if assignment
+        # Requires python 3.8
+        val = extract_value_from_structured_data(backing_target_idx, 42)
+        if val is not None:
+            self.corefile_target = self.target.GetDebugger().GetTargetAtIndex(val)
             self.corefile_process = self.corefile_target.GetProcess()
+            self.corefile_thread = self.corefile_process.GetThreadByIndexID(self.idx)
+
+        if self.corefile_thread:
+            self.id = self.corefile_thread.GetThreadID()
 
     def get_thread_id(self) -> int:
-        return 0x19
+        return self.id
 
     def get_name(self) -> str:
-        return StackCoreScriptedThread.__name__ + ".thread-1"
+        return StackCoreScriptedThread.__name__ + ".thread-" + str(self.id)
 
     def get_stop_reason(self) -> Dict[str, Any]:
         return { "type": lldb.eStopReasonSignal, "data": {
@@ -109,10 +137,9 @@ def __init__(idx, cfa, pc, symbol_ctx):
         return self.frame_zero[0:0]
 
     def get_register_context(self) -> str:
-        thread = self.corefile_process.GetSelectedThread()
-        if not thread or thread.GetNumFrames() == 0:
+        if not self.corefile_thread or self.corefile_thread.GetNumFrames() == 0:
             return None
-        frame = thread.GetFrameAtIndex(0)
+        frame = self.corefile_thread.GetFrameAtIndex(0)
 
         GPRs = None
         registerSet = frame.registers # Returns an SBValueList.

From cfa55bfe781474a30467b1bbf2e7874985171196 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani 
Date: Tue, 18 Jan 2022 12:46:17 +0100
Subject: [PATCH 439/946] [lldb/Plugins] Enrich ScriptedThreads Stop Reasons
 with Exceptions

This patch adds Exceptions to the list of supported stop reasons for
Scripted Threads.

The main motivation for this is that breakpoints are triggered as a
special exception class on ARM platforms, so adding it as a stop reason
allows the ScriptedProcess to selected the ScriptedThread that stopped at
a breakpoint (or crashed :p).

rdar://87430376

Differential Revision: https://reviews.llvm.org/D117074

Signed-off-by: Med Ismail Bennani 
---
 .../Process/scripted/ScriptedProcess.cpp      |  1 -
 .../Process/scripted/ScriptedThread.cpp       | 19 +++++++++++++++++--
 .../scripted_process/TestScriptedProcess.py   | 12 ++++++------
 .../stack_core_scripted_process.py            | 18 +++++++++++++++---
 4 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index f01e599ad5585..e28658e33cdad 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -357,7 +357,6 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
 void ScriptedProcess::RefreshStateAfterStop() {
   // Let all threads recover from stopping and do any clean up based on the
   // previous thread state (if any).
-  m_thread_list.RefreshStateAfterStop();
 }
 
 bool ScriptedProcess::GetProcessInfo(ProcessInstanceInfo &info) {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 1b9841c2048ea..14f4f99cf9c4a 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -145,6 +145,11 @@ bool ScriptedThread::CalculateStopInfo() {
   StructuredData::DictionarySP dict_sp = GetInterface()->GetStopReason();
 
   Status error;
+  if (!dict_sp)
+    return GetInterface()->ErrorWithMessage(
+        LLVM_PRETTY_FUNCTION, "Failed to get scripted thread stop info.", error,
+        LIBLLDB_LOG_THREAD);
+
   lldb::StopInfoSP stop_info_sp;
   lldb::StopReason stop_reason_type;
 
@@ -158,12 +163,12 @@ bool ScriptedThread::CalculateStopInfo() {
   if (!dict_sp->GetValueForKeyAsDictionary("data", data_dict))
     return GetInterface()->ErrorWithMessage(
         LLVM_PRETTY_FUNCTION,
-        "Couldn't find value for key 'type' in stop reason dictionary.", error,
+        "Couldn't find value for key 'data' in stop reason dictionary.", error,
         LIBLLDB_LOG_THREAD);
 
   switch (stop_reason_type) {
   case lldb::eStopReasonNone:
-    break;
+    return true;
   case lldb::eStopReasonBreakpoint: {
     lldb::break_id_t break_id;
     data_dict->GetValueForKeyAsInteger("break_id", break_id,
@@ -180,6 +185,13 @@ bool ScriptedThread::CalculateStopInfo() {
     stop_info_sp =
         StopInfo::CreateStopReasonWithSignal(*this, signal, description.data());
   } break;
+  case lldb::eStopReasonException: {
+    llvm::StringRef description;
+    data_dict->GetValueForKeyAsString("desc", description);
+
+    stop_info_sp =
+        StopInfo::CreateStopReasonWithException(*this, description.data());
+  } break;
   default:
     return GetInterface()->ErrorWithMessage(
         LLVM_PRETTY_FUNCTION,
@@ -189,6 +201,9 @@ bool ScriptedThread::CalculateStopInfo() {
         error, LIBLLDB_LOG_THREAD);
   }
 
+  if (!stop_info_sp)
+    return false;
+
   SetStopInfo(stop_info_sp);
   return true;
 }
diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index be55771c14fb1..4831d48a0b5a9 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -188,13 +188,13 @@ def cleanup():
         self.assertEqual(process.GetProcessID(), 42)
 
         self.assertEqual(process.GetNumThreads(), 3)
-        thread = process.GetSelectedThread()
+        thread = process.GetThreadAtIndex(2)
         self.assertTrue(thread, "Invalid thread.")
-        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-0")
+        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-2")
 
-        self.assertEqual(thread.GetNumFrames(), 2)
+        self.assertEqual(thread.GetNumFrames(), 6)
         frame = thread.GetSelectedFrame()
         self.assertTrue(frame, "Invalid frame.")
-        # self.assertEqual(frame.GetFunctionName(), "bar")
-        # self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
-        # self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
+        self.assertIn("bar", frame.GetFunctionName())
+        self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
+        self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
diff --git a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
index ac455fe3d2717..1fabcf464e7df 100644
--- a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
@@ -117,9 +117,21 @@ def get_name(self) -> str:
         return StackCoreScriptedThread.__name__ + ".thread-" + str(self.id)
 
     def get_stop_reason(self) -> Dict[str, Any]:
-        return { "type": lldb.eStopReasonSignal, "data": {
-            "signal": signal.SIGINT
-        } }
+        stop_reason = { "type": lldb.eStopReasonInvalid, "data": {  }}
+
+        if self.corefile_thread and self.corefile_thread.IsValid:
+            stop_reason["type"] = self.corefile_thread.GetStopReason()
+
+            if self.corefile_thread.GetStopReasonDataCount() > 0:
+                if stop_reason["type"] == lldb.eStopReasonBreakpoint:
+                    stop_reason["data"]["break_id"] = self.corefile_thread.GetStopReasonDataAtIndex(0)
+                    stop_reason["data"]["break_loc_id"] = self.corefile_thread.GetStopReasonDataAtIndex(1)
+                elif stop_reason["type"] == lldb.eStopReasonSignal:
+                    stop_reason["data"]["signal"] = signal.SIGINT
+                elif stop_reason["type"] == lldb.eStopReasonException:
+                    stop_reason["data"]["desc"] = self.corefile_thread.GetStopDescription(100)
+
+        return stop_reason
 
     def get_stackframes(self):
         class ScriptedStackFrame:

From 45148bfe8aece6ca319dcc32351e20bba26b2ea7 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani 
Date: Tue, 18 Jan 2022 12:52:24 +0100
Subject: [PATCH 440/946] [lldb/Plugins] Fix ScriptedThread IndexID reporting

When listing all the Scripted Threads of a ScriptedProcess, we can see that all
have the thread index set to 1. This is caused by the lldb_private::Thread
constructor, which sets the m_index_id member using the provided thread id `tid`.

Because the call to the super constructor is done before instantiating
the `ScriptedThreadInterface`, lldb can't fetch the thread id from the
script instance, so it uses `LLDB_INVALID_THREAD_ID` instead.

To mitigate this, this patch takes advantage of the `ScriptedThread::Create`
fallible constructor idiom to defer calling the `ScriptedThread` constructor
(and the `Thread` super constructor with it), until we can fetch a valid
thread id `tid` from the `ScriptedThreadInterface`.

rdar://87432065

Differential Revision: https://reviews.llvm.org/D117076

Signed-off-by: Med Ismail Bennani 
---
 lldb/include/lldb/Target/Thread.h             |  2 +-
 .../Process/scripted/ScriptedProcess.cpp      | 12 +--
 .../Process/scripted/ScriptedThread.cpp       | 80 ++++++++++---------
 .../Plugins/Process/scripted/ScriptedThread.h | 12 ++-
 .../Python/ScriptedThreadPythonInterface.cpp  |  2 +-
 5 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h
index 91feed310eb97..587b29eb4c661 100644
--- a/lldb/include/lldb/Target/Thread.h
+++ b/lldb/include/lldb/Target/Thread.h
@@ -1244,7 +1244,7 @@ class Thread : public std::enable_shared_from_this,
                                          // the stop info was checked against
                                          // the stop info override
   const uint32_t m_index_id; ///< A unique 1 based index assigned to each thread
-                             ///for easy UI/command line access.
+                             /// for easy UI/command line access.
   lldb::RegisterContextSP m_reg_context_sp; ///< The register context for this
                                             ///thread's current register state.
   lldb::StateType m_state;                  ///< The state of our process.
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index e28658e33cdad..e71a0fdf8de92 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -328,12 +328,14 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
       return true;
     }
 
-    lldb::ThreadSP thread_sp =
-        std::make_shared(*this, error, val->GetAsGeneric());
+    auto thread_or_error = ScriptedThread::Create(*this, val->GetAsGeneric());
 
-    if (!thread_sp || error.Fail())
-      return GetInterface().ErrorWithMessage(LLVM_PRETTY_FUNCTION,
-                                                   error.AsCString(), error);
+    if (!thread_or_error)
+      return GetInterface().ErrorWithMessage(
+          LLVM_PRETTY_FUNCTION, toString(thread_or_error.takeError()), error);
+
+    ThreadSP thread_sp = thread_or_error.get();
+    lldbassert(thread_sp && "Couldn't initialize scripted thread.");
 
     RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
     if (!reg_ctx_sp)
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 14f4f99cf9c4a..b6cbb62fd6e6a 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -28,52 +28,60 @@ void ScriptedThread::CheckInterpreterAndScriptObject() const {
   lldbassert(GetInterface() && "Invalid Scripted Thread Interface.");
 }
 
-ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error,
-                               StructuredData::Generic *script_object)
-    : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process),
-      m_scripted_thread_interface_sp(
-          m_scripted_process.GetInterface().CreateScriptedThreadInterface()) {
-  if (!process.IsValid()) {
-    error.SetErrorString("Invalid scripted process");
-    return;
-  }
+llvm::Expected>
+ScriptedThread::Create(ScriptedProcess &process,
+                       StructuredData::Generic *script_object) {
+  if (!process.IsValid())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Invalid scripted process.");
 
   process.CheckInterpreterAndScriptObject();
 
-  auto scripted_thread_interface = GetInterface();
-  if (!scripted_thread_interface) {
-    error.SetErrorString("Failed to get scripted thread interface.");
-    return;
-  }
-
-  llvm::Optional class_name =
-      process.GetInterface().GetScriptedThreadPluginName();
-  if (!class_name || class_name->empty()) {
-    error.SetErrorString("Failed to get scripted thread class name.");
-    return;
+  auto scripted_thread_interface =
+      process.GetInterface().CreateScriptedThreadInterface();
+  if (!scripted_thread_interface)
+    return llvm::createStringError(
+        llvm::inconvertibleErrorCode(),
+        "Failed to create scripted thread interface.");
+
+  llvm::StringRef thread_class_name;
+  if (!script_object) {
+    llvm::Optional class_name =
+        process.GetInterface().GetScriptedThreadPluginName();
+    if (!class_name || class_name->empty())
+      return llvm::createStringError(
+          llvm::inconvertibleErrorCode(),
+          "Failed to get scripted thread class name.");
+    thread_class_name = *class_name;
   }
 
   ExecutionContext exe_ctx(process);
-
-  m_script_object_sp = scripted_thread_interface->CreatePluginObject(
-      class_name->c_str(), exe_ctx, process.m_scripted_process_info.GetArgsSP(),
-      script_object);
-
-  if (!m_script_object_sp) {
-    error.SetErrorString("Failed to create script object");
-    return;
-  }
-
-  if (!m_script_object_sp->IsValid()) {
-    m_script_object_sp = nullptr;
-    error.SetErrorString("Created script object is invalid");
-    return;
-  }
+  StructuredData::GenericSP owned_script_object_sp =
+      scripted_thread_interface->CreatePluginObject(
+          thread_class_name, exe_ctx,
+          process.m_scripted_process_info.GetArgsSP(), script_object);
+
+  if (!owned_script_object_sp)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Failed to create script object.");
+  if (!owned_script_object_sp->IsValid())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Created script object is invalid.");
 
   lldb::tid_t tid = scripted_thread_interface->GetThreadID();
-  SetID(tid);
+
+  return std::make_shared(process, scripted_thread_interface,
+                                          tid, owned_script_object_sp);
 }
 
+ScriptedThread::ScriptedThread(ScriptedProcess &process,
+                               ScriptedThreadInterfaceSP interface_sp,
+                               lldb::tid_t tid,
+                               StructuredData::GenericSP script_object_sp)
+    : Thread(process, tid), m_scripted_process(process),
+      m_scripted_thread_interface_sp(interface_sp),
+      m_script_object_sp(script_object_sp) {}
+
 ScriptedThread::~ScriptedThread() { DestroyThread(); }
 
 const char *ScriptedThread::GetName() {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
index d3cd26c57826d..8d8a7c2a3df90 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
@@ -25,12 +25,18 @@ class ScriptedProcess;
 namespace lldb_private {
 
 class ScriptedThread : public lldb_private::Thread {
+
 public:
-  ScriptedThread(ScriptedProcess &process, Status &error,
-                 StructuredData::Generic *script_object = nullptr);
+  ScriptedThread(ScriptedProcess &process,
+                 lldb::ScriptedThreadInterfaceSP interface_sp, lldb::tid_t tid,
+                 StructuredData::GenericSP script_object_sp = nullptr);
 
   ~ScriptedThread() override;
 
+  static llvm::Expected>
+  Create(ScriptedProcess &process,
+         StructuredData::Generic *script_object = nullptr);
+
   lldb::RegisterContextSP GetRegisterContext() override;
 
   lldb::RegisterContextSP
@@ -61,8 +67,8 @@ class ScriptedThread : public lldb_private::Thread {
 
   const ScriptedProcess &m_scripted_process;
   lldb::ScriptedThreadInterfaceSP m_scripted_thread_interface_sp = nullptr;
+  lldb_private::StructuredData::GenericSP m_script_object_sp = nullptr;
   std::shared_ptr m_register_info_sp = nullptr;
-  lldb_private::StructuredData::ObjectSP m_script_object_sp = nullptr;
 };
 
 } // namespace lldb_private
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
index 511a42fe2c26a..d471b2c5f7e3d 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
@@ -32,7 +32,7 @@ ScriptedThreadPythonInterface::ScriptedThreadPythonInterface(
 StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject(
     const llvm::StringRef class_name, ExecutionContext &exe_ctx,
     StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) {
-  if (class_name.empty())
+  if (class_name.empty() && !script_obj)
     return {};
 
   ProcessSP process_sp = exe_ctx.GetProcessSP();

From 91bb116190cdc598863a7a3fda57e431dd832449 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani 
Date: Tue, 18 Jan 2022 12:56:42 +0100
Subject: [PATCH 441/946] [lldb/Interpreter] Make
 `ScriptedInterface::ErrorWithMessage` static (NFC)

This patch changes the `ScriptedInterface::ErrorWithMessage` method to
make it `static` which makes it easier to call.

The patch also updates its various call sites to reflect this change.

Differential Revision: https://reviews.llvm.org/D117374

Signed-off-by: Med Ismail Bennani 
---
 .../lldb/Interpreter/ScriptedInterface.h      |  6 +++---
 .../Process/scripted/ScriptedProcess.cpp      | 20 +++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/ScriptedInterface.h
index 27cf9f036e5fd..9eb11832003e6 100644
--- a/lldb/include/lldb/Interpreter/ScriptedInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedInterface.h
@@ -31,9 +31,9 @@ class ScriptedInterface {
                      StructuredData::Generic *script_obj = nullptr) = 0;
 
   template 
-  Ret ErrorWithMessage(llvm::StringRef caller_name, llvm::StringRef error_msg,
-                       Status &error,
-                       uint32_t log_caterogy = LIBLLDB_LOG_PROCESS) {
+  static Ret ErrorWithMessage(llvm::StringRef caller_name,
+                              llvm::StringRef error_msg, Status &error,
+                              uint32_t log_caterogy = LIBLLDB_LOG_PROCESS) {
     LLDB_LOGF(GetLogIfAllCategoriesSet(log_caterogy), "%s ERROR = %s",
               caller_name.data(), error_msg.data());
     error.SetErrorString(llvm::Twine(caller_name + llvm::Twine(" ERROR = ") +
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index e71a0fdf8de92..5eb7cb0e6a5c8 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -222,8 +222,8 @@ bool ScriptedProcess::IsAlive() {
 size_t ScriptedProcess::DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
                                      Status &error) {
   if (!m_interpreter)
-    return GetInterface().ErrorWithMessage(LLVM_PRETTY_FUNCTION,
-                                                   "No interpreter.", error);
+    return ScriptedInterface::ErrorWithMessage(
+        LLVM_PRETTY_FUNCTION, "No interpreter.", error);
 
   lldb::DataExtractorSP data_extractor_sp =
       GetInterface().ReadMemoryAtAddress(addr, size, error);
@@ -235,7 +235,7 @@ size_t ScriptedProcess::DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
       0, data_extractor_sp->GetByteSize(), buf, size, GetByteOrder());
 
   if (!bytes_copied || bytes_copied == LLDB_INVALID_OFFSET)
-    return GetInterface().ErrorWithMessage(
+    return ScriptedInterface::ErrorWithMessage(
         LLVM_PRETTY_FUNCTION, "Failed to copy read memory to buffer.", error);
 
   return size;
@@ -293,7 +293,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
   ScriptLanguage language = m_interpreter->GetLanguage();
 
   if (language != eScriptLanguagePython)
-    return GetInterface().ErrorWithMessage(
+    return ScriptedInterface::ErrorWithMessage(
         LLVM_PRETTY_FUNCTION,
         llvm::Twine("ScriptInterpreter language (" +
                     llvm::Twine(m_interpreter->LanguageToString(language)) +
@@ -304,7 +304,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
   StructuredData::DictionarySP thread_info_sp = GetInterface().GetThreadsInfo();
 
   if (!thread_info_sp)
-    return GetInterface().ErrorWithMessage(
+    return ScriptedInterface::ErrorWithMessage(
         LLVM_PRETTY_FUNCTION,
         "Couldn't fetch thread list from Scripted Process.", error);
 
@@ -312,13 +312,13 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
       [this, &old_thread_list, &error,
        &new_thread_list](ConstString key, StructuredData::Object *val) -> bool {
     if (!val)
-      return GetInterface().ErrorWithMessage(
+      return ScriptedInterface::ErrorWithMessage(
           LLVM_PRETTY_FUNCTION, "Invalid thread info object", error);
 
     lldb::tid_t tid = LLDB_INVALID_THREAD_ID;
     if (!llvm::to_integer(key.AsCString(), tid))
-      return GetInterface().ErrorWithMessage(LLVM_PRETTY_FUNCTION,
-                                                   "Invalid thread id", error);
+      return ScriptedInterface::ErrorWithMessage(
+          LLVM_PRETTY_FUNCTION, "Invalid thread id", error);
 
     if (ThreadSP thread_sp =
             old_thread_list.FindThreadByID(tid, false /*=can_update*/)) {
@@ -331,7 +331,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
     auto thread_or_error = ScriptedThread::Create(*this, val->GetAsGeneric());
 
     if (!thread_or_error)
-      return GetInterface().ErrorWithMessage(
+      return ScriptedInterface::ErrorWithMessage(
           LLVM_PRETTY_FUNCTION, toString(thread_or_error.takeError()), error);
 
     ThreadSP thread_sp = thread_or_error.get();
@@ -339,7 +339,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
 
     RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
     if (!reg_ctx_sp)
-      return GetInterface().ErrorWithMessage(
+      return ScriptedInterface::ErrorWithMessage(
           LLVM_PRETTY_FUNCTION,
           llvm::Twine("Invalid Register Context for thread " +
                       llvm::Twine(key.AsCString()))

From bb1fe369774adb86a6bdeec3f56684b6a01c7ff3 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Wed, 19 Jan 2022 12:51:04 -0800
Subject: [PATCH 442/946] [AMDGPU] Make v8i16/v8f16 legal

Differential Revision: https://reviews.llvm.org/D117721
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   7 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 156 ++-
 llvm/lib/Target/AMDGPU/SIInstructions.td      |  40 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |   8 +-
 .../test/Analysis/CostModel/AMDGPU/add-sub.ll |   4 +-
 .../Analysis/CostModel/AMDGPU/arith-ssat.ll   |  32 +-
 .../Analysis/CostModel/AMDGPU/arith-usat.ll   |  32 +-
 llvm/test/Analysis/CostModel/AMDGPU/cast.ll   |  76 +-
 llvm/test/Analysis/CostModel/AMDGPU/fadd.ll   |   4 +-
 llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll   |   8 +-
 llvm/test/Analysis/CostModel/AMDGPU/fma.ll    |  92 +-
 llvm/test/Analysis/CostModel/AMDGPU/fmul.ll   |   4 +-
 llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll |  12 +-
 llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll |  12 +-
 llvm/test/Analysis/CostModel/AMDGPU/fsub.ll   |   4 +-
 llvm/test/Analysis/CostModel/AMDGPU/mul.ll    |  20 +-
 .../GlobalISel/inst-select-load-constant.mir  |  24 +-
 .../GlobalISel/inst-select-load-flat.mir      |  24 +-
 .../GlobalISel/inst-select-load-global.mir    |  30 +-
 .../GlobalISel/inst-select-load-local-128.mir |  64 +-
 .../GlobalISel/inst-select-store-flat.mir     |  24 +-
 .../GlobalISel/inst-select-store-global.mir   |  30 +-
 llvm/test/CodeGen/AMDGPU/add.v2i16.ll         |   5 +-
 .../CodeGen/AMDGPU/coalesce-vgpr-alignment.ll |   1 -
 .../CodeGen/AMDGPU/extract-subvector-16bit.ll | 437 +++++++++
 .../CodeGen/AMDGPU/extract_vector_elt-f16.ll  |  13 +
 .../CodeGen/AMDGPU/extract_vector_elt-i16.ll  |  49 +
 llvm/test/CodeGen/AMDGPU/function-returns.ll  |   4 +-
 llvm/test/CodeGen/AMDGPU/idot8s.ll            | 586 ++++++------
 llvm/test/CodeGen/AMDGPU/idot8u.ll            | 896 +++++++++---------
 .../CodeGen/AMDGPU/inlineasm-illegal-type.ll  |   6 +-
 .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 339 +++++++
 llvm/test/CodeGen/AMDGPU/kernel-args.ll       |  11 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i16.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/load-global-i16.ll   |   6 +-
 llvm/test/CodeGen/AMDGPU/sub.v2i16.ll         |  10 +-
 36 files changed, 2065 insertions(+), 1013 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 63e8c85b84137..b9d0655feef72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -360,6 +360,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f16, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f16, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom);
@@ -1408,6 +1410,11 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
       Start != 1)
     return Op;
 
+  if (((SrcVT == MVT::v8f16 && VT == MVT::v4f16) ||
+       (SrcVT == MVT::v8i16 && VT == MVT::v4i16)) &&
+      (Start == 0 || Start == 4))
+    return Op;
+
   DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
                             VT.getVectorNumElements());
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4008c7e36e4f4..ec610160b2278 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -134,6 +134,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass);
     addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);
     addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
+    addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass);
+    addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass);
   }
 
   addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -269,7 +271,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
                   MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
                   MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
                   MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64,
-                  MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32 }) {
+                  MVT::v8i16, MVT::v8f16, MVT::v16i64, MVT::v16f64,
+                  MVT::v32i32, MVT::v32f32 }) {
     for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
       switch (Op) {
       case ISD::LOAD:
@@ -611,7 +614,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     if (STI.hasMadF16())
       setOperationAction(ISD::FMAD, MVT::f16, Legal);
 
-    for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) {
+    for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
+                   MVT::v8f16}) {
       for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
         switch (Op) {
         case ISD::LOAD:
@@ -673,6 +677,21 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::STORE, MVT::v4f16, Promote);
     AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
 
+    setOperationAction(ISD::LOAD, MVT::v8i16, Promote);
+    AddPromotedToType(ISD::LOAD, MVT::v8i16, MVT::v4i32);
+    setOperationAction(ISD::LOAD, MVT::v8f16, Promote);
+    AddPromotedToType(ISD::LOAD, MVT::v8f16, MVT::v4i32);
+
+    setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32);
+    setOperationAction(ISD::STORE, MVT::v4f16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
+
+    setOperationAction(ISD::STORE, MVT::v8i16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v8i16, MVT::v4i32);
+    setOperationAction(ISD::STORE, MVT::v8f16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32);
+
     setOperationAction(ISD::ANY_EXTEND, MVT::v2i32, Expand);
     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
@@ -682,6 +701,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Expand);
     setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Expand);
 
+    setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Expand);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Expand);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Expand);
+
     if (!Subtarget->hasVOP3PInsts()) {
       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
@@ -699,9 +722,20 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::v8f16, Custom);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::v8f16, Custom);
 
     setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
     setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
+    setOperationAction(ISD::FMINNUM, MVT::v8f16, Expand);
+    setOperationAction(ISD::FMAXNUM, MVT::v8f16, Expand);
+
+    for (MVT Vec16 : { MVT::v8i16, MVT::v8f16 }) {
+      setOperationAction(ISD::BUILD_VECTOR, Vec16, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec16, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, Vec16, Expand);
+    }
   }
 
   if (Subtarget->hasVOP3PInsts()) {
@@ -735,34 +769,42 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
 
-    setOperationAction(ISD::SHL, MVT::v4i16, Custom);
-    setOperationAction(ISD::SRA, MVT::v4i16, Custom);
-    setOperationAction(ISD::SRL, MVT::v4i16, Custom);
-    setOperationAction(ISD::ADD, MVT::v4i16, Custom);
-    setOperationAction(ISD::SUB, MVT::v4i16, Custom);
-    setOperationAction(ISD::MUL, MVT::v4i16, Custom);
+    for (MVT VT : { MVT::v4i16, MVT::v8i16 }) {
+      // Split vector operations.
+      setOperationAction(ISD::SHL, VT, Custom);
+      setOperationAction(ISD::SRA, VT, Custom);
+      setOperationAction(ISD::SRL, VT, Custom);
+      setOperationAction(ISD::ADD, VT, Custom);
+      setOperationAction(ISD::SUB, VT, Custom);
+      setOperationAction(ISD::MUL, VT, Custom);
 
-    setOperationAction(ISD::SMIN, MVT::v4i16, Custom);
-    setOperationAction(ISD::SMAX, MVT::v4i16, Custom);
-    setOperationAction(ISD::UMIN, MVT::v4i16, Custom);
-    setOperationAction(ISD::UMAX, MVT::v4i16, Custom);
+      setOperationAction(ISD::SMIN, VT, Custom);
+      setOperationAction(ISD::SMAX, VT, Custom);
+      setOperationAction(ISD::UMIN, VT, Custom);
+      setOperationAction(ISD::UMAX, VT, Custom);
 
-    setOperationAction(ISD::UADDSAT, MVT::v4i16, Custom);
-    setOperationAction(ISD::SADDSAT, MVT::v4i16, Custom);
-    setOperationAction(ISD::USUBSAT, MVT::v4i16, Custom);
-    setOperationAction(ISD::SSUBSAT, MVT::v4i16, Custom);
+      setOperationAction(ISD::UADDSAT, VT, Custom);
+      setOperationAction(ISD::SADDSAT, VT, Custom);
+      setOperationAction(ISD::USUBSAT, VT, Custom);
+      setOperationAction(ISD::SSUBSAT, VT, Custom);
+    }
 
-    setOperationAction(ISD::FADD, MVT::v4f16, Custom);
-    setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
-    setOperationAction(ISD::FMA, MVT::v4f16, Custom);
+    for (MVT VT : { MVT::v4f16, MVT::v8f16 }) {
+      // Split vector operations.
+      setOperationAction(ISD::FADD, VT, Custom);
+      setOperationAction(ISD::FMUL, VT, Custom);
+      setOperationAction(ISD::FMA, VT, Custom);
+      setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+    }
 
     setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
     setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
 
     setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
     setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
-    setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
 
     setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
     setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
@@ -799,7 +841,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FABS, MVT::v2f16, Custom);
   }
 
-  for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8 }) {
+  for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
+                  MVT::v8i16, MVT::v8f16 }) {
     setOperationAction(ISD::SELECT, VT, Custom);
   }
 
@@ -4610,7 +4653,8 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
   unsigned Opc = Op.getOpcode();
   EVT VT = Op.getValueType();
   assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
-         VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32);
+         VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8f32 ||
+         VT == MVT::v16f32 || VT == MVT::v32f32);
 
   SDValue Lo0, Hi0;
   std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4631,21 +4675,26 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
                                               SelectionDAG &DAG) const {
   unsigned Opc = Op.getOpcode();
   EVT VT = Op.getValueType();
-  assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
-         VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32);
+  assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
+         VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v8f32 ||
+         VT == MVT::v16f32 || VT == MVT::v32f32);
 
   SDValue Lo0, Hi0;
-  std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
+  SDValue Op0 = Op.getOperand(0);
+  std::tie(Lo0, Hi0) = Op0.getValueType().isVector()
+                         ? DAG.SplitVectorOperand(Op.getNode(), 0)
+                         : std::make_pair(Op0, Op0);
   SDValue Lo1, Hi1;
   std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
   SDValue Lo2, Hi2;
   std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);
 
   SDLoc SL(Op);
+  auto ResVT = DAG.GetSplitDestVTs(VT);
 
-  SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1, Lo2,
+  SDValue OpLo = DAG.getNode(Opc, SL, ResVT.first, Lo0, Lo1, Lo2,
                              Op->getFlags());
-  SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1, Hi2,
+  SDValue OpHi = DAG.getNode(Opc, SL, ResVT.second, Hi0, Hi1, Hi2,
                              Op->getFlags());
 
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
@@ -5307,7 +5356,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
   if (IsIEEEMode)
     return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
 
-  if (VT == MVT::v4f16)
+  if (VT == MVT::v4f16 || VT == MVT::v8f16)
     return splitBinaryVectorOp(Op, DAG);
   return Op;
 }
@@ -5709,7 +5758,6 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   EVT VecVT = Vec.getValueType();
   unsigned VecSize = VecVT.getSizeInBits();
   EVT EltVT = VecVT.getVectorElementType();
-  assert(VecSize <= 64);
 
   DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
 
@@ -5720,6 +5768,28 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
     return Combined;
 
+  if (VecSize == 128) {
+    SDValue Lo, Hi;
+    EVT LoVT, HiVT;
+    SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
+    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
+    Lo =
+        DAG.getBitcast(LoVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
+                                         V2, DAG.getConstant(0, SL, MVT::i32)));
+    Hi =
+        DAG.getBitcast(HiVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
+                                         V2, DAG.getConstant(1, SL, MVT::i32)));
+    EVT IdxVT = Idx.getValueType();
+    unsigned NElem = VecVT.getVectorNumElements();
+    assert(isPowerOf2_32(NElem));
+    SDValue IdxMask = DAG.getConstant(NElem / 2 - 1, SL, IdxVT);
+    SDValue NewIdx = DAG.getNode(ISD::AND, SL, IdxVT, Idx, IdxMask);
+    SDValue Half = DAG.getSelectCC(SL, Idx, IdxMask, Hi, Lo, ISD::SETUGT);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Half, NewIdx);
+  }
+
+  assert(VecSize <= 64);
+
   unsigned EltSize = EltVT.getSizeInBits();
   assert(isPowerOf2_32(EltSize));
 
@@ -5802,20 +5872,27 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
   SDLoc SL(Op);
   EVT VT = Op.getValueType();
 
-  if (VT == MVT::v4i16 || VT == MVT::v4f16) {
-    EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), 2);
+  if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+      VT == MVT::v8i16 || VT == MVT::v8f16) {
+    EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
+                                  VT.getVectorNumElements() / 2);
+    MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits());
 
     // Turn into pair of packed build_vectors.
     // TODO: Special case for constants that can be materialized with s_mov_b64.
-    SDValue Lo = DAG.getBuildVector(HalfVT, SL,
-                                    { Op.getOperand(0), Op.getOperand(1) });
-    SDValue Hi = DAG.getBuildVector(HalfVT, SL,
-                                    { Op.getOperand(2), Op.getOperand(3) });
+    SmallVector LoOps, HiOps;
+    for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I != E; ++I) {
+      LoOps.push_back(Op.getOperand(I));
+      HiOps.push_back(Op.getOperand(I + E));
+    }
+    SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps);
+    SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps);
 
-    SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Lo);
-    SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Hi);
+    SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo);
+    SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi);
 
-    SDValue Blend = DAG.getBuildVector(MVT::v2i32, SL, { CastLo, CastHi });
+    SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL,
+                                       { CastLo, CastHi });
     return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
   }
 
@@ -8427,6 +8504,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
+  if (VT.getSizeInBits() == 128)
+    return splitTernaryVectorOp(Op, DAG);
+
   assert(VT.getSizeInBits() == 64);
 
   SDLoc DL(Op);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b6f19bcc7f132..cba9a77864aaf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1192,6 +1192,26 @@ def : Pat <
   (v2f16 (EXTRACT_SUBREG v4f16:$vec, sub1))
 >;
 
+def : Pat <
+  (extract_subvector v8i16:$vec, (i32 0)),
+  (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+  (extract_subvector v8i16:$vec, (i32 4)),
+  (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub2_sub3))
+>;
+
+def : Pat <
+  (extract_subvector v8f16:$vec, (i32 0)),
+  (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+  (extract_subvector v8f16:$vec, (i32 4)),
+  (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub2_sub3))
+>;
+
 foreach Index = 0-31 in {
   def Extract_Element_v32i32_#Index : Extract_Element <
     i32, v32i32, Index, !cast(sub#Index)
@@ -1287,6 +1307,26 @@ def : BitConvert ;
 def : BitConvert ;
 def : BitConvert ;
 def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
+def : BitConvert ;
 
 // 160-bit bitcast
 def : BitConvert ;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 340e2b48e5cd9..eb9452f4b85ee 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -617,7 +617,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16
   let HasSGPR = 1;
 }
 
-def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
+def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32,
   (add PRIVATE_RSRC_REG)> {
   let isAllocatable = 0;
   let CopyCost = -1;
@@ -784,7 +784,7 @@ multiclass SRegClass;
-defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64], SGPR_128Regs, TTMP_128Regs>;
+defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
 defm "" : SRegClass<5, 16, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
 defm "" : SRegClass<6, 17, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
 defm "" : SRegClass<7, 18, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
@@ -824,7 +824,7 @@ multiclass VRegClass regTypes, dag regList> {
 defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
                                 (add VGPR_64)>;
 defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
-defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64], (add VGPR_128)>;
+defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;
 defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
 
 defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
@@ -846,7 +846,7 @@ multiclass ARegClass regTypes, dag regList> {
 defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
                         (add AGPR_64)>;
 defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
-defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64], (add AGPR_128)>;
+defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;
 defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
 defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
 defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
index 2bd77e252ed01..998cdecabb72e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
@@ -76,7 +76,7 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'add_i16'
@@ -98,7 +98,7 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'add_i16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll
index 54636e70502ba..2b0c5ef65f2e5 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll
@@ -55,10 +55,10 @@ define i32 @add(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -113,10 +113,10 @@ define i32 @add(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -235,10 +235,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -293,10 +293,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll
index 3fe7583041e9a..d347d37cf02d1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll
@@ -55,10 +55,10 @@ define i32 @add(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -113,10 +113,10 @@ define i32 @add(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -235,10 +235,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -293,10 +293,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/cast.ll b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll
index 308dd4c40a100..e6f6b23430a6d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/cast.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll
@@ -299,33 +299,19 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
 }
 
 define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
-; FAST-LABEL: 'sitofp8'
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; SLOW-LABEL: 'sitofp8'
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; FAST-SIZE-LABEL: 'sitofp8'
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-LABEL: 'sitofp8'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; SLOW-SIZE-LABEL: 'sitofp8'
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-SIZE-LABEL: 'sitofp8'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %A1 = sitofp <8 x i1> %a to <8 x float>
   %B1 = sitofp <8 x i8> %b to <8 x float>
@@ -391,33 +377,19 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
 }
 
 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
-; FAST-LABEL: 'uitofp8'
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; SLOW-LABEL: 'uitofp8'
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; FAST-SIZE-LABEL: 'uitofp8'
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-LABEL: 'uitofp8'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; SLOW-SIZE-LABEL: 'uitofp8'
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-SIZE-LABEL: 'uitofp8'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %A1 = uitofp <8 x i1> %a to <8 x float>
   %B1 = uitofp <8 x i8> %b to <8 x float>
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
index 12760920733d1..6961e4d6a4090 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
@@ -115,7 +115,7 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
-; FASTF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fadd_f16'
@@ -135,7 +135,7 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
-; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fadd_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index fe5b57be5d3bf..298557b53c7e4 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -165,7 +165,7 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
@@ -185,7 +185,7 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
@@ -216,7 +216,7 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -236,7 +236,7 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
index cec33c25c6157..e6d635fd5dc0f 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
@@ -1,34 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64,NOPACKEDF32 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,FASTF64,PACKEDF32 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64,NOPACKEDF32 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,FASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64 %s
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE,NOPACKEDF32-SIZE %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,FASTF64-SIZE,PACKEDF32-SIZE %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE,NOPACKEDF32-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,FASTF64-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE %s
 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s
 ; END.
 
 define amdgpu_kernel void @fma_f32() #0 {
-; NOPACKEDF32-LABEL: 'fma_f32'
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; PACKEDF32-LABEL: 'fma_f32'
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+; SLOWF64-LABEL: 'fma_f32'
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FASTF64-LABEL: 'fma_f32'
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fma_f32'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
@@ -40,25 +40,25 @@ define amdgpu_kernel void @fma_f32() #0 {
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; NOPACKEDF32-SIZE-LABEL: 'fma_f32'
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; PACKEDF32-SIZE-LABEL: 'fma_f32'
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; SLOWF64-SIZE-LABEL: 'fma_f32'
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FASTF64-SIZE-LABEL: 'fma_f32'
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fma_f32'
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
@@ -145,7 +145,7 @@ define amdgpu_kernel void @fma_f16() #0 {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) #2
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fma_f16'
@@ -165,7 +165,7 @@ define amdgpu_kernel void @fma_f16() #0 {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) #2
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fma_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
index a5906b070d58e..758d4aa874400 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
@@ -115,7 +115,7 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fmul_f16'
@@ -135,7 +135,7 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fmul_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll
index 3c24d6cdcd286..0c2cccf7083ba 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll
@@ -59,7 +59,7 @@ define i32 @fptosi_double_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptosi_double_i16'
@@ -73,7 +73,7 @@ define i32 @fptosi_double_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptosi_double_i16'
@@ -181,8 +181,8 @@ define i32 @fptosi_float_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptosi_float_i16'
@@ -197,8 +197,8 @@ define i32 @fptosi_float_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptosi_float_i16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll
index e2f75a9f302cf..97c463345fba0 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll
@@ -59,7 +59,7 @@ define i32 @fptoui_double_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptoui_double_i16'
@@ -73,7 +73,7 @@ define i32 @fptoui_double_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptoui_double_i16'
@@ -181,8 +181,8 @@ define i32 @fptoui_float_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptoui_float_i16'
@@ -197,8 +197,8 @@ define i32 @fptoui_float_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptoui_float_i16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
index 38fec3cfe5349..a49d4e35c1ce3 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
@@ -115,7 +115,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
-; FASTF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fsub_f16'
@@ -135,7 +135,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
-; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fsub_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
index 8fddcbefaf764..99a2de65180d7 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@@ -79,7 +79,7 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v17i16 = mul <17 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'mul_i16'
@@ -99,7 +99,7 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = mul <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = mul <16 x i16> undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v17i16 = mul <17 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i16 = mul i16 undef, undef
@@ -144,7 +144,7 @@ define i32 @mul_constpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, 
@@ -182,7 +182,7 @@ define i32 @mul_constpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, 
@@ -244,7 +244,7 @@ define i32 @mul_uniformconstpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, 
@@ -282,7 +282,7 @@ define i32 @mul_uniformconstpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, 
@@ -344,7 +344,7 @@ define i32 @mul_constnegpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, 
@@ -382,7 +382,7 @@ define i32 @mul_constnegpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, 
@@ -444,7 +444,7 @@ define i32 @mul_uniformconstnegpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, 
@@ -482,7 +482,7 @@ define i32 @mul_uniformconstnegpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, 
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, 
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, 
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index 9acb09cb48ff9..2b120b6fd3b67 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -715,27 +715,27 @@ body: |
     ; GFX6-LABEL: name: load_constant_v8s16
     ; GFX6: liveins: $sgpr0_sgpr1
     ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     ; GFX7-LABEL: name: load_constant_v8s16
     ; GFX7: liveins: $sgpr0_sgpr1
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     ; GFX8-LABEL: name: load_constant_v8s16
     ; GFX8: liveins: $sgpr0_sgpr1
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     ; GFX10-LABEL: name: load_constant_v8s16
     ; GFX10: liveins: $sgpr0_sgpr1
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     %0:sgpr(p4) = COPY $sgpr0_sgpr1
     %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 4)
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index f4e1d172dda7e..cce37182ae7b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -741,27 +741,27 @@ body: |
     ; GFX7-LABEL: name: load_flat_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX8-LABEL: name: load_flat_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX9-LABEL: name: load_flat_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX10-LABEL: name: load_flat_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x  s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
index d2bd5e6fcc7d1..df681f95d4e3e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
@@ -873,33 +873,33 @@ body: |
     ; GFX7-LABEL: name: load_global_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX7-FLAT-LABEL: name: load_global_v8s16
     ; GFX7-FLAT: liveins: $vgpr0_vgpr1
     ; GFX7-FLAT-NEXT: {{  $}}
-    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX8-LABEL: name: load_global_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX9-LABEL: name: load_global_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
     ; GFX10-LABEL: name: load_global_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
index 361f1e80d25e3..1083d43381939 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 
@@ -27,6 +27,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align16
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -57,6 +63,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align_8
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -87,6 +99,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_160
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 400
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -123,6 +141,14 @@ body: |
     ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_320
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 4000
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -155,6 +181,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v2s64
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -185,6 +217,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
     ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX10-LABEL: name: load_local_v2p1
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -215,6 +253,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
     ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX10-LABEL: name: load_local_s128
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -235,16 +279,22 @@ body: |
     ; GFX7-LABEL: name: load_local_v8s16
     ; GFX7: liveins: $vgpr0
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
-    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
+    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]]
     ; GFX9-LABEL: name: load_local_v8s16
     ; GFX9: liveins: $vgpr0
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v8s16
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index 35b7f1a684c43..a626a7af89c5b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -503,27 +503,27 @@ body: |
     ; GFX7-LABEL: name: store_flat_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX7-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     ; GFX8-LABEL: name: store_flat_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX8-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     ; GFX9-LABEL: name: store_flat_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     ; GFX10-LABEL: name: store_flat_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX10-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
index 8e883bfcc6669..8568c0e879f15 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
@@ -549,33 +549,33 @@ body: |
     ; GFX7-LABEL: name: store_global_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX7-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1)
     ; GFX7-FLAT-LABEL: name: store_global_v8s16
     ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX7-FLAT-NEXT: {{  $}}
-    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1)
     ; GFX8-LABEL: name: store_global_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX8-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1)
     ; GFX9-LABEL: name: store_global_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1)
     ; GFX10-LABEL: name: store_global_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX10-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index 56e318c96b2f9..3de1aa1dd656d 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -200,12 +200,13 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
 ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i64:
+; GFX9PLUS: v_mov_b32_e32 [[MASK:v[0-9+]]], 0xffff
 ; GFX9PLUS: global_load_dword [[A:v[0-9]+]]
 ; GFX9PLUS: global_load_dword [[B:v[0-9]+]]
 
 ; GFX9PLUS: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
-; GFX9PLUS-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
-; GFX9PLUS-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
+; GFX9PLUS-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], [[MASK]], [[ADD]]
+; GFX9PLUS-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9PLUS: buffer_store_dwordx4
 
 ; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
index b3f581280b644..04d6b0d85b957 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
@@ -23,7 +23,6 @@ bb:
 ; GCN-LABEL: {{^}}test_vector_creation:
 ; GCN:     global_load_dwordx2 v[{{[0-9]*[02468]}}:{{[0-9]+}}],
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]*}}[[LO:[02468]]], v{{[0-9]+}}
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]*}}[[HI:[13579]]], v{{[0-9]+}}
 ; GCN:     global_store_dwordx4 v[{{[0-9]*[02468]:[0-9]*[13579]}}], v[{{[0-9]*[02468]:[0-9]*[13579]}}]
 define amdgpu_kernel void @test_vector_creation() {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
new file mode 100644
index 0000000000000..c686598225a25
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@@ -0,0 +1,437 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+
+define <4 x i16> @extract_4xi16(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) {
+; SI-LABEL: extract_4xi16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_cbranch_scc0 .LBB0_2
+; SI-NEXT:  ; %bb.1: ; %F
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; SI-NEXT:    v_or_b32_e32 v2, v6, v2
+; SI-NEXT:    v_or_b32_e32 v3, v4, v3
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; SI-NEXT:    s_mov_b64 vcc, exec
+; SI-NEXT:    s_cbranch_execz .LBB0_3
+; SI-NEXT:    s_branch .LBB0_4
+; SI-NEXT:  .LBB0_2:
+; SI-NEXT:    ; implicit-def: $vgpr3
+; SI-NEXT:    ; implicit-def: $vgpr4
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:    s_mov_b64 vcc, 0
+; SI-NEXT:  .LBB0_3: ; %T
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SI-NEXT:    v_or_b32_e32 v2, v4, v0
+; SI-NEXT:    v_or_b32_e32 v3, v3, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; SI-NEXT:  .LBB0_4: ; %exit
+; SI-NEXT:    v_bfe_i32 v0, v3, 0, 16
+; SI-NEXT:    v_bfe_i32 v1, v4, 0, 16
+; SI-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; SI-NEXT:    s_mov_b32 s4, 0xffff
+; SI-NEXT:    v_mov_b32_e32 v3, 0x8000
+; SI-NEXT:    v_mov_b32_e32 v4, 0xffff0000
+; SI-NEXT:    v_bfrev_b32_e32 v5, 1
+; SI-NEXT:    v_mov_b32_e32 v6, 0xffff8000
+; SI-NEXT:    v_mov_b32_e32 v7, s4
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, -1, v6, vcc
+; SI-NEXT:    v_or_b32_e32 v0, v0, v1
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v2
+; SI-NEXT:    v_and_b32_e32 v2, s4, v2
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_alignbit_b32 v1, v2, v1, 16
+; SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_4xi16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_cbranch_scc0 .LBB0_2
+; GFX9-NEXT:  ; %bb.1: ; %F
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_cbranch_execz .LBB0_3
+; GFX9-NEXT:    s_branch .LBB0_4
+; GFX9-NEXT:  .LBB0_2:
+; GFX9-NEXT:    s_mov_b32 s8, 0
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_mov_b32 s10, s8
+; GFX9-NEXT:    s_mov_b32 s11, s8
+; GFX9-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-NEXT:    v_mov_b32_e32 v4, s10
+; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:  .LBB0_3: ; %T
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:  .LBB0_4: ; %exit
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v3 op_sel_hi:[0,0]
+; GFX9-NEXT:    s_movk_i32 s4, 0x8000
+; GFX9-NEXT:    v_or_b32_sdwa v1, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v3, s4, v0
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_or_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v0, s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v0, v4, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v3
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> 
+  %b2 = icmp sgt <4 x i16> %v2, 
+  %r2 = select <4 x i1> %b2, <4 x i16> , <4 x i16> 
+  ret <4 x i16> %r2
+}
+
+define <4 x i16> @extract_4xi16_2(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) {
+; SI-LABEL: extract_4xi16_2:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_cbranch_scc0 .LBB1_2
+; SI-NEXT:  ; %bb.1: ; %F
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; SI-NEXT:    v_or_b32_e32 v2, v6, v2
+; SI-NEXT:    v_or_b32_e32 v3, v4, v3
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; SI-NEXT:    s_mov_b64 vcc, exec
+; SI-NEXT:    s_cbranch_execz .LBB1_3
+; SI-NEXT:    s_branch .LBB1_4
+; SI-NEXT:  .LBB1_2:
+; SI-NEXT:    ; implicit-def: $vgpr3
+; SI-NEXT:    ; implicit-def: $vgpr5
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:    ; implicit-def: $vgpr4
+; SI-NEXT:    s_mov_b64 vcc, 0
+; SI-NEXT:  .LBB1_3: ; %T
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SI-NEXT:    v_or_b32_e32 v2, v4, v0
+; SI-NEXT:    v_or_b32_e32 v3, v3, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; SI-NEXT:  .LBB1_4: ; %exit
+; SI-NEXT:    v_bfe_i32 v0, v3, 0, 16
+; SI-NEXT:    v_bfe_i32 v1, v5, 0, 16
+; SI-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; SI-NEXT:    v_bfe_i32 v3, v4, 0, 16
+; SI-NEXT:    v_mov_b32_e32 v4, 0xffff
+; SI-NEXT:    v_mov_b32_e32 v5, 0x8000
+; SI-NEXT:    v_mov_b32_e32 v6, 0xffff0000
+; SI-NEXT:    v_bfrev_b32_e32 v7, 1
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v5, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v3
+; SI-NEXT:    v_cndmask_b32_e32 v3, v6, v7, vcc
+; SI-NEXT:    v_or_b32_e32 v0, v0, v1
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_alignbit_b32 v1, v2, v1, 16
+; SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_4xi16_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_cbranch_scc0 .LBB1_2
+; GFX9-NEXT:  ; %bb.1: ; %F
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_cbranch_execz .LBB1_3
+; GFX9-NEXT:    s_branch .LBB1_4
+; GFX9-NEXT:  .LBB1_2:
+; GFX9-NEXT:    s_mov_b32 s8, 0
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_mov_b32 s10, s8
+; GFX9-NEXT:    s_mov_b32 s11, s8
+; GFX9-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-NEXT:    v_mov_b32_e32 v4, s10
+; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:  .LBB1_3: ; %T
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:  .LBB1_4: ; %exit
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,1]
+; GFX9-NEXT:    s_movk_i32 s4, 0x8000
+; GFX9-NEXT:    v_or_b32_sdwa v1, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v2, s4, v0
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_or_b32_sdwa v3, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v0, s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v0, v4, v0
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> 
+  %b2 = icmp sgt <4 x i16> %v2, 
+  %r2 = select <4 x i1> %b2, <4 x i16> , <4 x i16> 
+  ret <4 x i16> %r2
+}
+
+define <4 x half> @extract_4xf16(<8 x half> addrspace(1) * %p0, <8 x half> addrspace(1) * %p1) {
+; SI-LABEL: extract_4xf16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_cbranch_scc0 .LBB2_2
+; SI-NEXT:  ; %bb.1: ; %F
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; SI-NEXT:    v_or_b32_e32 v2, v6, v2
+; SI-NEXT:    v_or_b32_e32 v4, v4, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v4
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    s_mov_b64 vcc, exec
+; SI-NEXT:    s_cbranch_execz .LBB2_3
+; SI-NEXT:    s_branch .LBB2_4
+; SI-NEXT:  .LBB2_2:
+; SI-NEXT:    ; implicit-def: $vgpr3
+; SI-NEXT:    ; implicit-def: $vgpr4
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:    s_mov_b64 vcc, 0
+; SI-NEXT:  .LBB2_3: ; %T
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
+; SI-NEXT:    v_or_b32_e32 v0, v4, v0
+; SI-NEXT:    v_or_b32_e32 v1, v2, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v0
+; SI-NEXT:  .LBB2_4: ; %exit
+; SI-NEXT:    v_cvt_f16_f32_e32 v0, v3
+; SI-NEXT:    v_cvt_f16_f32_e32 v1, v4
+; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3fa00000
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f200000
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_cmp_nge_f32_e32 vcc, 0.5, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
+; SI-NEXT:    v_cmp_nge_f32_e32 vcc, 0.5, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; SI-NEXT:    v_cmp_nge_f32_e32 vcc, 0.5, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; SI-NEXT:    v_mov_b32_e32 v3, v2
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_4xf16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_cbranch_scc0 .LBB2_2
+; GFX9-NEXT:  ; %bb.1: ; %F
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_cbranch_execz .LBB2_3
+; GFX9-NEXT:    s_branch .LBB2_4
+; GFX9-NEXT:  .LBB2_2:
+; GFX9-NEXT:    s_mov_b32 s8, 0
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_mov_b32 s10, s8
+; GFX9-NEXT:    s_mov_b32 s11, s8
+; GFX9-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-NEXT:    v_mov_b32_e32 v4, s10
+; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:  .LBB2_3: ; %T
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:  .LBB2_4: ; %exit
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v3
+; GFX9-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3800
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3900
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0x3d00
+; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v4, vcc
+; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v4, v3, vcc
+; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
+; GFX9-NEXT:    v_cmp_le_f16_sdwa vcc, v2, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX9-NEXT:    v_pack_b32_f16 v1, v5, v6
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x half>, <8 x half> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x half>, <8 x half> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x half> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> 
+  %b2 = fcmp ugt <4 x half> %v2, 
+  %r2 = select <4 x i1> %b2, <4 x half> , <4 x half> 
+  ret <4 x half> %r2
+}
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
index 090a1e2674556..6fad3653e475e 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
@@ -166,6 +166,19 @@ define amdgpu_kernel void @reduce_load_vector_v8f16_extract_23(<16 x half> addrs
   ret void
 }
 
+; GCN-LABEL: {{^}}v_extractelement_v8f16_dynamic_sgpr:
+; GCN-COUNT-7: v_cndmask_b32_e32
+define amdgpu_kernel void @v_extractelement_v8f16_dynamic_sgpr(half addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %n) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x half>, <8 x half> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x half> %vec, i32 %n
+  store half %vec.extract, half addrspace(1)* %out.gep
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
index 3f7be164a0b6d..133c7e3b07875 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
@@ -166,6 +166,55 @@ define amdgpu_kernel void @reduce_load_vector_v8i16_extract_23(<16 x i16> addrsp
   ret void
 }
 
+; GCN-LABEL: {{^}}v_extractelement_v8i16_2:
+; SI: buffer_load_dword [[RES:v[0-9]+]], v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0 addr64 offset:4
+; SI: buffer_store_short [[RES]]
+; VI: flat_load_dword [[RES:v[0-9]+]]
+; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
+; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:4
+; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
+define amdgpu_kernel void @v_extractelement_v8i16_2(i16 addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x i16> %vec, i32 2
+  store i16 %vec.extract, i16 addrspace(1)* %out.gep
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_extractelement_v8i16_6:
+; SI: buffer_load_dword [[RES:v[0-9]+]], v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0 addr64 offset:12
+; SI: buffer_store_short [[RES]]
+; VI: flat_load_dword [[RES:v[0-9]+]]
+; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
+; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:12
+; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
+define amdgpu_kernel void @v_extractelement_v8i16_6(i16 addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x i16> %vec, i32 6
+  store i16 %vec.extract, i16 addrspace(1)* %out.gep
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_extractelement_v8i16_dynamic_sgpr:
+; GCN-COUNT-7: v_cndmask_b32_e32
+define amdgpu_kernel void @v_extractelement_v8i16_dynamic_sgpr(i16 addrspace(1)* %out, <8 x i16> addrspace(1)* %in, i32 %n) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x i16> %vec, i32 %n
+  store i16 %vec.extract, i16 addrspace(1)* %out.gep
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index 22402597a192f..ff0171095b842 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -388,9 +388,7 @@ define <4 x half> @v4f16_func_void() #0 {
 ; FIXME: Mixing buffer and global
 ; FIXME: Should not scalarize
 ; GCN-LABEL: {{^}}v5i16_func_void:
-; GFX9: buffer_load_dwordx2 v[0:1]
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: global_load_short_d16 v2
+; GFX9: buffer_load_dwordx4 v[0:3]
 ; GFX9-NEXT: s_waitcnt
 ; GFX9-NEXT: s_setpc_b64
 define <5 x i16> @v5i16_func_void() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll
index d3153d12641c0..8563d321c83ae 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll
@@ -2289,58 +2289,58 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 4, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 8, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 12, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 20, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 28, v3
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v16, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
-; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 4, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 8, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 12, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v14, 20, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v17, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 28, v3
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v7, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 20, v3
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v9, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 12, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 8, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 4, v3
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 12, v3
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v18, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX8-NEXT:    s_waitcnt vmcnt(1)
+; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 28, v2
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v14, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 20, v2
 ; GFX8-NEXT:    v_lshlrev_b16_sdwa v5, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshrrev_b32_e32 v16, 12, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 8, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 4, v2
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 12, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 12, v6
+; GFX8-NEXT:    v_lshlrev_b16_e32 v12, 12, v12
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v3, 12, v3
-; GFX8-NEXT:    v_lshlrev_b16_e32 v11, 12, v11
+; GFX8-NEXT:    v_lshlrev_b16_e32 v18, 12, v18
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v2, 12, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v7, 12, v7
-; GFX8-NEXT:    v_lshlrev_b16_e32 v12, 12, v12
-; GFX8-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
-; GFX8-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX8-NEXT:    v_lshlrev_b16_e32 v11, 12, v11
+; GFX8-NEXT:    v_lshlrev_b16_e32 v17, 12, v17
+; GFX8-NEXT:    v_ashrrev_i16_e32 v12, 12, v12
+; GFX8-NEXT:    v_ashrrev_i16_e32 v18, 12, v18
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_mad_u16 v2, v3, v2, v4
-; GFX8-NEXT:    v_lshlrev_b16_e32 v8, 12, v8
-; GFX8-NEXT:    v_lshlrev_b16_e32 v13, 12, v13
-; GFX8-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
-; GFX8-NEXT:    v_ashrrev_i16_e32 v12, 12, v12
-; GFX8-NEXT:    v_mad_u16 v2, v6, v11, v2
-; GFX8-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
-; GFX8-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
-; GFX8-NEXT:    v_mad_u16 v2, v7, v12, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 12, v9
-; GFX8-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
-; GFX8-NEXT:    v_lshlrev_b16_e32 v14, 12, v14
-; GFX8-NEXT:    v_ashrrev_i16_e32 v5, 12, v5
-; GFX8-NEXT:    v_mad_u16 v2, v8, v13, v2
-; GFX8-NEXT:    v_ashrrev_i16_e32 v9, 12, v9
-; GFX8-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
-; GFX8-NEXT:    v_mad_u16 v2, v17, v5, v2
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v10, 12, v10
+; GFX8-NEXT:    v_lshlrev_b16_e32 v16, 12, v16
+; GFX8-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX8-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
+; GFX8-NEXT:    v_mad_u16 v2, v12, v18, v2
+; GFX8-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v16, 12, v16
+; GFX8-NEXT:    v_mad_u16 v2, v11, v17, v2
+; GFX8-NEXT:    v_lshlrev_b16_e32 v8, 12, v8
+; GFX8-NEXT:    v_ashrrev_i16_e32 v9, 12, v9
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v15, 12, v15
-; GFX8-NEXT:    v_ashrrev_i16_e32 v18, 12, v18
-; GFX8-NEXT:    v_mad_u16 v2, v9, v14, v2
-; GFX8-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
+; GFX8-NEXT:    v_ashrrev_i16_e32 v5, 12, v5
+; GFX8-NEXT:    v_mad_u16 v2, v10, v16, v2
+; GFX8-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v15, 12, v15
-; GFX8-NEXT:    v_mad_u16 v2, v16, v18, v2
-; GFX8-NEXT:    v_mad_u16 v2, v10, v15, v2
+; GFX8-NEXT:    v_mad_u16 v2, v9, v5, v2
+; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 12, v6
+; GFX8-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
+; GFX8-NEXT:    v_lshlrev_b16_e32 v13, 12, v13
+; GFX8-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
+; GFX8-NEXT:    v_mad_u16 v2, v8, v15, v2
+; GFX8-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
+; GFX8-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
+; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
+; GFX8-NEXT:    v_mad_u16 v2, v6, v13, v2
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2354,7 +2354,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v4, 12
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[6:7]
@@ -2362,68 +2362,70 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX9-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX9-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX9-NEXT:    v_lshlrev_b16_e32 v6, 12, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v10, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
-; GFX9-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v14, v2, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v17, v2, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v18, v2, 4, 4
-; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
-; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
-; GFX9-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX9-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
-; GFX9-NEXT:    v_and_b32_e32 v10, v4, v10
-; GFX9-NEXT:    v_and_b32_e32 v6, v4, v6
-; GFX9-NEXT:    v_and_b32_e32 v17, v4, v17
-; GFX9-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
-; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
-; GFX9-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
-; GFX9-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX9-NEXT:    v_and_b32_e32 v15, v4, v15
-; GFX9-NEXT:    v_and_b32_e32 v4, v4, v13
-; GFX9-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX9-NEXT:    v_lshl_or_b32 v8, v14, 16, v15
-; GFX9-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX9-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 4, v2
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-NEXT:    v_lshlrev_b16_e32 v13, 12, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 12, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 8, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v16, 20, v2
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v17, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-NEXT:    v_lshlrev_b16_e32 v4, 12, v5
+; GFX9-NEXT:    v_ashrrev_i16_e32 v5, 12, v6
+; GFX9-NEXT:    v_lshlrev_b16_e32 v6, 12, v7
+; GFX9-NEXT:    v_lshlrev_b16_e32 v7, 12, v8
+; GFX9-NEXT:    v_lshlrev_b16_e32 v8, 12, v9
+; GFX9-NEXT:    v_ashrrev_i16_e32 v9, 12, v10
+; GFX9-NEXT:    v_lshlrev_b16_e32 v10, 12, v11
+; GFX9-NEXT:    v_lshlrev_b16_e32 v11, 12, v12
+; GFX9-NEXT:    v_ashrrev_i16_e32 v12, 12, v13
+; GFX9-NEXT:    v_ashrrev_i16_e32 v4, 12, v4
+; GFX9-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
+; GFX9-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
+; GFX9-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX9-NEXT:    v_lshlrev_b16_e32 v13, 12, v14
+; GFX9-NEXT:    v_lshlrev_b16_e32 v14, 12, v15
+; GFX9-NEXT:    v_lshl_or_b32 v6, v6, 16, v7
+; GFX9-NEXT:    v_lshl_or_b32 v7, v11, 16, v12
+; GFX9-NEXT:    v_lshl_or_b32 v4, v4, 16, v5
+; GFX9-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
+; GFX9-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
+; GFX9-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
+; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v4, v7
+; GFX9-NEXT:    v_lshlrev_b16_e32 v15, 12, v16
+; GFX9-NEXT:    v_ashrrev_i16_e32 v16, 12, v17
+; GFX9-NEXT:    v_lshlrev_b16_e32 v17, 12, v18
+; GFX9-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX9-NEXT:    v_lshl_or_b32 v9, v13, 16, v14
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_mul_lo_u16 v6, v9, v6
+; GFX9-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-NEXT:    v_ashrrev_i16_e32 v1, 12, v1
+; GFX9-NEXT:    v_ashrrev_i16_e32 v2, 12, v2
+; GFX9-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
+; GFX9-NEXT:    v_ashrrev_i16_e32 v15, 12, v15
+; GFX9-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
+; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v6, v9
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshl_or_b32 v2, v17, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v10, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v10, v15, 16, v16
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v8, v10
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v5, v4
-; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v8
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2437,7 +2439,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-DL-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-DL-NEXT:    v_mov_b32_e32 v4, 12
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DL-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-DL-NEXT:    global_load_dword v2, v0, s[6:7]
@@ -2445,68 +2447,70 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v6, 12, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v10, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v18, v2, 4, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX9-DL-NEXT:    v_and_b32_e32 v1, v4, v1
-; GFX9-DL-NEXT:    v_and_b32_e32 v2, v4, v2
-; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
-; GFX9-DL-NEXT:    v_and_b32_e32 v10, v4, v10
-; GFX9-DL-NEXT:    v_and_b32_e32 v6, v4, v6
-; GFX9-DL-NEXT:    v_and_b32_e32 v17, v4, v17
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
-; GFX9-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
-; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX9-DL-NEXT:    v_and_b32_e32 v15, v4, v15
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v13
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v14, 16, v15
-; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1]
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v12, 4, v2
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v13, 12, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v14, 12, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v15, 8, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v16, 20, v2
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v17, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v4, 12, v5
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v5, 12, v6
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v6, 12, v7
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v7, 12, v8
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v8, 12, v9
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v9, 12, v10
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v10, 12, v11
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v11, 12, v12
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v12, 12, v13
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v4, 12, v4
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v13, 12, v14
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v14, 12, v15
+; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v6, 16, v7
+; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v11, 16, v12
+; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v4, 16, v5
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v4, v7
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v15, 12, v16
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v16, 12, v17
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v17, 12, v18
+; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v13, 16, v14
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DL-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v6, v9, v6
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v1, 12, v1
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v2, 12, v2
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v15, 12, v15
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v6, v9
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v17, 16, v2
+; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v10, 16, v1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v10, v15, 16, v16
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v2, v8, v10
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-DL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v5, v4
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v8
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-DL-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
@@ -2529,71 +2533,85 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-XNACK-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-DL-XNACK-NEXT:    global_load_ushort v3, v0, s[0:1]
 ; GFX10-DL-XNACK-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v6, 12, v1
 ; GFX10-DL-XNACK-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v13, 15, v2
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v16, v2, 4, 4
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v14, v2, 24, 4
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v12, 4, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v13, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v15, 8, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v5, 12, v5
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v6, 12, v6
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v12, 12, v12
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v13, 12, v13
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v14, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v8, 12, v8
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v15, 12, v15
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v5, 12, v5
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v12, 12, v12
 ; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v13, v4, v13
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v15, v2, 20, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v17, v2, 16, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v18, v2, 12, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v2, v2, 8, 4
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v11, v16, 16, v13
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v17, 16, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v7, 12, v7
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v14, 12, v14
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v8, 12, v8
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v15, 12, v15
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v12, v12, 16, v13
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v16, 20, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v10, 12, v10
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v17, 12, v17
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v7, 12, v7
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v6, 12, v14
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v13, v4, v15
 ; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v2, v4, v2
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v5, v5, v12
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v9, 12, v9
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v16, 12, v16
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v10, 12, v10
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v14, 12, v17
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v6, v6, 16, v13
 ; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v10, v4, v17
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v8
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v8, v15, 16, v10
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
 ; GFX10-DL-XNACK-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v3
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v3, v4, v6
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v4, v4, v14
-; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v2, v9, v2
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v10
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v3, v5, 16, v3
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v2
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v2, 12, v3 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v3, 12, v4 op_sel_hi:[0,1]
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v1, 12, v1
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v9, 12, v9
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v12, 12, v16
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v2, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v5, v4, v14
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v6, v7, v6
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v11, 12, v11
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v18, 12, v18
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v1, 12, v1
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v2, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v5, v12, 16, v5
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v7, v9, 16, v10
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v6
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v11, 12, v11
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v6, 12, v18
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v1, v4, v1
 ; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v4, v7, v5
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v6
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v3, 12, v3 op_sel_hi:[0,1]
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v2, v6, 16, v2
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
 ; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v4
-; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v2, v2, v3
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v5
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v2
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v4
+; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v2, v3, v5
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v2, v1
 ; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v3
 ; GFX10-DL-XNACK-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-DL-XNACK-NEXT:    s_endpgm
@@ -2617,71 +2635,85 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-NOXNACK-NEXT:    global_load_dword v0, v0, s[6:7]
 ; GFX10-DL-NOXNACK-NEXT:    global_load_ushort v3, v2, s[0:1]
 ; GFX10-DL-NOXNACK-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v6, 12, v1
 ; GFX10-DL-NOXNACK-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v13, 15, v0
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v16, v0, 4, 4
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v12, 28, v0
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v14, v0, 24, 4
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v12, 4, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v13, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v15, 8, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v5, 12, v5
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v6, 12, v6
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v12, 12, v12
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v13, 12, v13
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v14, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v8, 12, v8
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v15, 12, v15
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v5, 12, v5
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v12, 12, v12
 ; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v13, v4, v13
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v15, v0, 20, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v17, v0, 16, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v18, v0, 12, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v0, v0, 8, 4
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v11, v16, 16, v13
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v7, 12, v7
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v14, 12, v14
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v8, 12, v8
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v15, 12, v15
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v12, v12, 16, v13
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v16, 20, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v10, 12, v10
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v17, 12, v17
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v7, 12, v7
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v6, 12, v14
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v13, v4, v15
 ; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v0, v4, v0
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v5, v5, v12
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v18, 28, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v9, 12, v9
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v16, 12, v16
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v10, 12, v10
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v14, 12, v17
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v6, v6, 16, v13
 ; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v0, v18, 16, v0
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v10, v4, v17
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v0, 12, v0 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v8
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v8, v15, 16, v10
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v0, 12, v0 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
 ; GFX10-DL-NOXNACK-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v1, v1, v3
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v3, v4, v6
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v4, v4, v14
-; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v0, v9, v0
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v1, v1, v10
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v3, v5, 16, v3
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v1, v0
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v1, 12, v3 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v3, 12, v4 op_sel_hi:[0,1]
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v1, 12, v1
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v9, 12, v9
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v12, 12, v16
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v0, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v5, v4, v14
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v6, v7, v6
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v11, 12, v11
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v18, 12, v18
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v1, 12, v1
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v0, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v5, v12, 16, v5
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v7, v9, 16, v10
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v6
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v11, 12, v11
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v6, 12, v18
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v0, v4, v0
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v1, v4, v1
 ; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v4, v7, v5
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v6
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v3, 12, v3 op_sel_hi:[0,1]
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v0, v6, 16, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
 ; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v4
-; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v5
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v1
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v4
+; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v0, v1, v0
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v1, v3, v5
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v1, v0
 ; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v3
 ; GFX10-DL-NOXNACK-NEXT:    global_store_short v2, v0, s[0:1]
 ; GFX10-DL-NOXNACK-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll
index f26a6ebeaf8cc..0b99cb5cdc813 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8u.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll
@@ -2187,32 +2187,32 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_and_b32_e32 v5, 15, v3
-; GFX8-NEXT:    v_bfe_u32 v6, v3, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v7, v3, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v8, v3, 12, 4
-; GFX8-NEXT:    v_bfe_u32 v9, v3, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v10, v3, 20, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 28, v3
+; GFX8-NEXT:    v_bfe_u32 v6, v3, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v7, v3, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v8, v3, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v9, v3, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v10, v3, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v11, v3, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v3, 15, v3
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_and_b32_e32 v12, 15, v2
-; GFX8-NEXT:    v_bfe_u32 v13, v2, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v14, v2, 8, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
+; GFX8-NEXT:    v_bfe_u32 v13, v2, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v14, v2, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v15, v2, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v16, v2, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v17, v2, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v18, v2, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_mad_u16 v4, v5, v12, v4
-; GFX8-NEXT:    v_mad_u16 v4, v6, v13, v4
-; GFX8-NEXT:    v_bfe_u32 v15, v2, 12, 4
-; GFX8-NEXT:    v_mad_u16 v4, v7, v14, v4
-; GFX8-NEXT:    v_bfe_u32 v16, v2, 16, 4
-; GFX8-NEXT:    v_mad_u16 v4, v8, v15, v4
-; GFX8-NEXT:    v_bfe_u32 v17, v2, 20, 4
-; GFX8-NEXT:    v_mad_u16 v4, v9, v16, v4
-; GFX8-NEXT:    v_bfe_u32 v11, v3, 24, 4
-; GFX8-NEXT:    v_bfe_u32 v18, v2, 24, 4
-; GFX8-NEXT:    v_mad_u16 v4, v10, v17, v4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 28, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX8-NEXT:    v_mad_u16 v4, v11, v18, v4
 ; GFX8-NEXT:    v_mad_u16 v2, v3, v2, v4
+; GFX8-NEXT:    v_mad_u16 v2, v11, v18, v2
+; GFX8-NEXT:    v_mad_u16 v2, v10, v17, v2
+; GFX8-NEXT:    v_mad_u16 v2, v9, v16, v2
+; GFX8-NEXT:    v_mad_u16 v2, v8, v15, v2
+; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
+; GFX8-NEXT:    v_mad_u16 v2, v6, v13, v2
+; GFX8-NEXT:    v_mad_u16 v2, v5, v12, v2
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2234,52 +2234,52 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_bfe_u32 v5, v1, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v7, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 8, 4
-; GFX9-NEXT:    v_and_b32_e32 v11, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-NEXT:    v_bfe_u32 v1, v1, 24, 4
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_bfe_u32 v12, v2, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v14, v2, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 8, 4
-; GFX9-NEXT:    v_and_b32_e32 v18, 15, v2
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 28, v1
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v10, v1, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 28, v2
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v17, v2, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-NEXT:    v_and_b32_e32 v12, v4, v12
-; GFX9-NEXT:    v_and_b32_e32 v5, v4, v5
-; GFX9-NEXT:    v_and_b32_e32 v14, v4, v14
-; GFX9-NEXT:    v_and_b32_e32 v7, v4, v7
-; GFX9-NEXT:    v_and_b32_e32 v16, v4, v16
-; GFX9-NEXT:    v_and_b32_e32 v9, v4, v9
-; GFX9-NEXT:    v_and_b32_e32 v18, v4, v18
-; GFX9-NEXT:    v_and_b32_e32 v4, v4, v11
-; GFX9-NEXT:    v_lshl_or_b32 v2, v2, 16, v18
-; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v4
-; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-NEXT:    v_lshl_or_b32 v11, v13, 16, v12
-; GFX9-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
-; GFX9-NEXT:    v_lshl_or_b32 v6, v15, 16, v14
-; GFX9-NEXT:    v_lshl_or_b32 v7, v8, 16, v7
-; GFX9-NEXT:    v_lshl_or_b32 v8, v17, 16, v16
-; GFX9-NEXT:    v_lshl_or_b32 v9, v10, 16, v9
+; GFX9-NEXT:    v_bfe_u32 v12, v2, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v5, v11
-; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v6
-; GFX9-NEXT:    v_pk_mul_lo_u16 v6, v9, v8
+; GFX9-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2301,52 +2301,52 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 8, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v11, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 8, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v18, 15, v2
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v6, 28, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v13, 28, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v12, v4, v12
-; GFX9-DL-NEXT:    v_and_b32_e32 v5, v4, v5
-; GFX9-DL-NEXT:    v_and_b32_e32 v14, v4, v14
-; GFX9-DL-NEXT:    v_and_b32_e32 v7, v4, v7
-; GFX9-DL-NEXT:    v_and_b32_e32 v16, v4, v16
-; GFX9-DL-NEXT:    v_and_b32_e32 v9, v4, v9
-; GFX9-DL-NEXT:    v_and_b32_e32 v18, v4, v18
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v11
-; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v18
-; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v1, 16, v4
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-DL-NEXT:    v_lshl_or_b32 v11, v13, 16, v12
-; GFX9-DL-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
-; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v15, 16, v14
-; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v8, 16, v7
-; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v17, 16, v16
-; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v10, 16, v9
+; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-DL-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-DL-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-DL-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-DL-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-DL-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DL-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v5, v11
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v6
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v6, v9, v8
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-DL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-DL-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
@@ -2372,52 +2372,52 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-NEXT:    v_and_b32_e32 v7, 15, v1
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(1)
 ; GFX10-DL-NEXT:    v_and_b32_e32 v6, 15, v2
-; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 4, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
 ; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 8, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 8, 4
 ; GFX10-DL-NEXT:    v_and_b32_e32 v7, v4, v7
 ; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
 ; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 16, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v12, v4, v12
-; GFX10-DL-NEXT:    v_lshl_or_b32 v7, v9, 16, v7
+; GFX10-DL-NEXT:    v_bfe_u32 v8, v1, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v9, v4, v9
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v7
 ; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v10, 16, v6
-; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
 ; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 12, 4
 ; GFX10-DL-NEXT:    v_and_b32_e32 v13, v4, v13
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 24, 4
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v6, v7, v6
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 16, 4
-; GFX10-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v12
+; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 16, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v5, v5, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v6, v2, 16, 4
 ; GFX10-DL-NEXT:    v_lshl_or_b32 v10, v10, 16, v13
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v12, v4, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v6, v3
-; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 20, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v2, 20, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v7, v4, v7
-; GFX10-DL-NEXT:    v_and_b32_e32 v11, v4, v11
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v9, v9, v10
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v12
-; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 24, 4
-; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v6, 16, v7
-; GFX10-DL-NEXT:    v_lshl_or_b32 v1, v1, 16, v11
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 16, v9
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v2, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v8, v8, v10
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
 ; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v9
+; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v2, 24, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v11, 16, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v8
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX10-DL-NEXT:    v_and_b32_e32 v9, v4, v10
-; GFX10-DL-NEXT:    v_and_b32_e32 v4, v4, v5
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v6
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v7
-; GFX10-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v9
-; GFX10-DL-NEXT:    v_lshl_or_b32 v4, v8, 16, v4
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX10-DL-NEXT:    v_add_nc_u16 v1, v3, v1
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v2, v4, v2
-; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v5
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v2
+; GFX10-DL-NEXT:    v_and_b32_e32 v8, v4, v9
+; GFX10-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v4, v6, v5
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v10
+; GFX10-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v8
+; GFX10-DL-NEXT:    v_lshl_or_b32 v1, v7, 16, v1
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v4
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX10-DL-NEXT:    v_add_nc_u16 v2, v3, v5
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX10-DL-NEXT:    v_add_nc_u16 v1, v2, v1
 ; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v3
 ; GFX10-DL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-DL-NEXT:    s_endpgm
@@ -2575,52 +2575,52 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_bfe_u32 v5, v3, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v6, v3, 20, 4
-; GFX8-NEXT:    v_bfe_u32 v7, v3, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 28, v3
-; GFX8-NEXT:    v_bfe_u32 v9, v3, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v10, v3, 12, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 28, v3
+; GFX8-NEXT:    v_bfe_u32 v10, v3, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v11, v3, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v7, v3, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v8, v3, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v12, v3, 16, 4
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_bfe_u32 v12, v2, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v13, v2, 20, 4
-; GFX8-NEXT:    v_bfe_u32 v14, v2, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
-; GFX8-NEXT:    v_and_b32_e32 v11, 15, v3
-; GFX8-NEXT:    v_bfe_u32 v3, v3, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v16, v2, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v17, v2, 12, 4
-; GFX8-NEXT:    v_and_b32_e32 v18, 15, v2
-; GFX8-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX8-NEXT:    v_mul_lo_u16_e32 v19, v5, v12
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_mul_lo_u16_e32 v13, v7, v14
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v8, v8, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_mul_lo_u16_e32 v9, v9, v16
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v10, v10, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v15, v3, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v3, v19, v6
-; GFX8-NEXT:    v_or_b32_e32 v6, v13, v8
-; GFX8-NEXT:    v_or_b32_e32 v8, v9, v10
-; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 16, v6
-; GFX8-NEXT:    v_mul_lo_u16_e32 v11, v11, v18
-; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v8
-; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v9, v11, v15
-; GFX8-NEXT:    v_or_b32_e32 v10, v15, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 8, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v16, 28, v2
+; GFX8-NEXT:    v_bfe_u32 v17, v2, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v18, v2, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v19, v2, 16, 4
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v11, v11, v18 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_mul_lo_u16_e32 v18, v10, v17
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_bfe_u32 v5, v3, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v6, 15, v3
+; GFX8-NEXT:    v_bfe_u32 v3, v2, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v12, v19
+; GFX8-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v9, v18, v9
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v5, v5, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v3, v2, v11
+; GFX8-NEXT:    v_or_b32_e32 v7, v8, v7
+; GFX8-NEXT:    v_lshlrev_b32_e32 v8, 16, v9
+; GFX8-NEXT:    v_mul_lo_u16_e32 v6, v6, v13
+; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
+; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v6, v6, v5
+; GFX8-NEXT:    v_or_b32_e32 v5, v5, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 8, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 24, v[2:3]
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_add_u16_e32 v3, v9, v4
-; GFX8-NEXT:    v_add_u16_e32 v3, v3, v10
-; GFX8-NEXT:    v_add_u16_e32 v3, v3, v8
+; GFX8-NEXT:    v_add_u16_e32 v3, v6, v4
+; GFX8-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX8-NEXT:    v_add_u16_e32 v3, v3, v7
 ; GFX8-NEXT:    v_add_u16_e32 v2, v3, v2
-; GFX8-NEXT:    v_mad_u16 v2, v5, v12, v2
-; GFX8-NEXT:    v_add_u16_e32 v2, v2, v11
-; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
-; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
-; GFX8-NEXT:    v_add_u16_e32 v2, v2, v6
+; GFX8-NEXT:    v_mad_u16 v2, v12, v19, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, v2, v8
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; GFX8-NEXT:    v_mad_u16 v2, v10, v17, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, v2, v9
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2641,52 +2641,52 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    global_load_ubyte v4, v3, s[2:3]
 ; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_bfe_u32 v0, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
+; GFX9-NEXT:    v_bfe_u32 v9, v1, 24, 4
+; GFX9-NEXT:    v_bfe_u32 v10, v1, 20, 4
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_bfe_u32 v12, v2, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 28, v2
-; GFX9-NEXT:    v_bfe_u32 v5, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-NEXT:    v_and_b32_e32 v10, 15, v1
-; GFX9-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-NEXT:    v_bfe_u32 v11, v2, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-NEXT:    v_and_b32_e32 v17, 15, v2
-; GFX9-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v0, v0, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_mul_lo_u16_e32 v12, v6, v13
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_mul_lo_u16_e32 v18, v5, v11
-; GFX9-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_mul_lo_u16_e32 v10, v10, v17
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v2, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_or_b32_e32 v7, v12, v7
-; GFX9-NEXT:    v_or_b32_e32 v1, v18, v0
-; GFX9-NEXT:    v_or_b32_e32 v8, v8, v9
-; GFX9-NEXT:    v_or_b32_e32 v9, v10, v2
-; GFX9-NEXT:    v_lshlrev_b32_e32 v10, 16, v7
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v8
-; GFX9-NEXT:    v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
+; GFX9-NEXT:    v_bfe_u32 v16, v2, 24, 4
+; GFX9-NEXT:    v_bfe_u32 v17, v2, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v0, v1, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v5, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v6, v1, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v7, v1, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v11, v1, 16, 4
+; GFX9-NEXT:    v_bfe_u32 v1, v2, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v12, 15, v2
+; GFX9-NEXT:    v_bfe_u32 v13, v2, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v14, v2, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v2, v2, 16, 4
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v10, v10, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_mul_lo_u16_e32 v17, v9, v16
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v8, v8, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_mul_lo_u16_e32 v18, v11, v2
+; GFX9-NEXT:    v_mul_lo_u16_e32 v7, v7, v14
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v8, v17, v8
+; GFX9-NEXT:    v_mul_lo_u16_e32 v5, v5, v12
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v12, v0, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v1, v18, v10
+; GFX9-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX9-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
+; GFX9-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v5, v5, v12
+; GFX9-NEXT:    v_or_b32_e32 v7, v12, v0
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 8, v1
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 24, v[0:1]
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u16_e32 v1, v9, v4
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v2
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v8
+; GFX9-NEXT:    v_add_u16_e32 v1, v5, v4
+; GFX9-NEXT:    v_add_u16_e32 v1, v1, v7
+; GFX9-NEXT:    v_add_u16_e32 v1, v1, v6
 ; GFX9-NEXT:    v_add_u16_e32 v0, v1, v0
-; GFX9-NEXT:    v_mad_legacy_u16 v0, v5, v11, v0
+; GFX9-NEXT:    v_mad_legacy_u16 v0, v11, v2, v0
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v10
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
-; GFX9-NEXT:    v_mad_legacy_u16 v0, v6, v13, v0
-; GFX9-NEXT:    v_add_u16_e32 v0, v0, v7
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; GFX9-NEXT:    v_mad_legacy_u16 v0, v9, v16, v0
+; GFX9-NEXT:    v_add_u16_e32 v0, v0, v8
 ; GFX9-NEXT:    global_store_byte v3, v0, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2707,52 +2707,52 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    global_load_ubyte v4, v3, s[2:3]
 ; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_bfe_u32 v0, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 24, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 20, 4
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v14, 28, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v10, 15, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v11, v2, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v17, 15, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v12, v6, v13
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v18, v5, v11
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v10, v10, v17
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v2, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_or_b32_e32 v7, v12, v7
-; GFX9-DL-NEXT:    v_or_b32_e32 v1, v18, v0
-; GFX9-DL-NEXT:    v_or_b32_e32 v8, v8, v9
-; GFX9-DL-NEXT:    v_or_b32_e32 v9, v10, v2
-; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v10, 16, v7
-; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v8
-; GFX9-DL-NEXT:    v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX9-DL-NEXT:    v_or_b32_e32 v2, v2, v0
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 24, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v0, v1, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v5, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v11, v1, 16, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v1, v2, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v12, 15, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 16, 4
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v10, v10, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v17, v9, v16
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v8, v8, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v18, v11, v2
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v7, v7, v14
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_or_b32_e32 v8, v17, v8
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v5, v5, v12
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v12, v0, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_or_b32_e32 v1, v18, v10
+; GFX9-DL-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
+; GFX9-DL-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX9-DL-NEXT:    v_or_b32_e32 v5, v5, v12
+; GFX9-DL-NEXT:    v_or_b32_e32 v7, v12, v0
 ; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v10, 8, v1
 ; GFX9-DL-NEXT:    v_lshrrev_b64 v[0:1], 24, v[0:1]
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v9, v4
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v2
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v8
+; GFX9-DL-NEXT:    v_add_u16_e32 v1, v5, v4
+; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v7
+; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v6
 ; GFX9-DL-NEXT:    v_add_u16_e32 v0, v1, v0
-; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v5, v11, v0
+; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v11, v2, v0
 ; GFX9-DL-NEXT:    v_add_u16_e32 v0, v0, v10
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
-; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v6, v13, v0
-; GFX9-DL-NEXT:    v_add_u16_e32 v0, v0, v7
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v9, v16, v0
+; GFX9-DL-NEXT:    v_add_u16_e32 v0, v0, v8
 ; GFX9-DL-NEXT:    global_store_byte v3, v0, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
@@ -2774,55 +2774,55 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX10-DL-NEXT:    global_load_ubyte v3, v4, s[0:1]
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 12, 4
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(1)
 ; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 12, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v7, v1, 8, 4
 ; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v14, 28, v2
-; GFX10-DL-NEXT:    v_mul_lo_u16 v9, v9, v10
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 16, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v0, v1, 20, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v11, 15, v1
-; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v15, v2, 4, 4
-; GFX10-DL-NEXT:    v_mul_lo_u16 v8, v8, v13
-; GFX10-DL-NEXT:    v_lshlrev_b16 v9, 8, v9
-; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 20, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX10-DL-NEXT:    v_mul_lo_u16 v7, v7, v14
-; GFX10-DL-NEXT:    v_bfe_u32 v12, v2, 16, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX10-DL-NEXT:    v_mul_lo_u16 v1, v1, v15
-; GFX10-DL-NEXT:    v_or_b32_e32 v8, v8, v9
-; GFX10-DL-NEXT:    v_mul_lo_u16 v9, v0, v10
-; GFX10-DL-NEXT:    v_mul_lo_u16 v10, v6, v13
-; GFX10-DL-NEXT:    v_lshlrev_b16 v7, 8, v7
+; GFX10-DL-NEXT:    v_mul_lo_u16 v6, v6, v10
+; GFX10-DL-NEXT:    v_bfe_u32 v0, v1, 4, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v5, 15, v1
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 20, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 16, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v1, v2, 4, 4
+; GFX10-DL-NEXT:    v_mul_lo_u16 v7, v7, v13
+; GFX10-DL-NEXT:    v_lshlrev_b16 v6, 8, v6
+; GFX10-DL-NEXT:    v_and_b32_e32 v10, 15, v2
+; GFX10-DL-NEXT:    v_bfe_u32 v15, v2, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 20, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v16, v2, 16, 4
+; GFX10-DL-NEXT:    v_mul_lo_u16 v2, v8, v14
+; GFX10-DL-NEXT:    v_mul_lo_u16 v0, v0, v1
+; GFX10-DL-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX10-DL-NEXT:    v_mul_lo_u16 v1, v11, v13
+; GFX10-DL-NEXT:    v_mul_lo_u16 v7, v9, v15
+; GFX10-DL-NEXT:    v_lshlrev_b16 v2, 8, v2
+; GFX10-DL-NEXT:    v_lshlrev_b16 v8, 8, v0
+; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
+; GFX10-DL-NEXT:    v_mul_lo_u16 v5, v5, v10
+; GFX10-DL-NEXT:    v_mul_lo_u16 v10, v12, v16
 ; GFX10-DL-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v8
-; GFX10-DL-NEXT:    v_mul_lo_u16 v2, v11, v2
-; GFX10-DL-NEXT:    v_mul_lo_u16 v11, v5, v12
-; GFX10-DL-NEXT:    v_lshlrev_b16 v9, 8, v9
-; GFX10-DL-NEXT:    v_or_b32_e32 v7, v10, v7
-; GFX10-DL-NEXT:    v_or_b32_sdwa v10, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-DL-NEXT:    v_or_b32_e32 v1, v2, v1
-; GFX10-DL-NEXT:    v_or_b32_e32 v2, v11, v9
-; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v9, 16, v7
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; GFX10-DL-NEXT:    v_or_b32_e32 v7, v7, v2
+; GFX10-DL-NEXT:    v_or_b32_sdwa v2, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX10-DL-NEXT:    v_or_b32_e32 v5, v5, v8
+; GFX10-DL-NEXT:    v_or_b32_e32 v1, v10, v1
+; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v8, 16, v7
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v1, v3
-; GFX10-DL-NEXT:    v_or_b32_sdwa v1, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-DL-NEXT:    v_add_nc_u16 v9, v3, v10
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NEXT:    v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX10-DL-NEXT:    v_add_nc_u16 v5, v3, v2
 ; GFX10-DL-NEXT:    v_lshrrev_b64 v[2:3], 24, v[0:1]
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
-; GFX10-DL-NEXT:    v_add_nc_u16 v0, v9, v8
+; GFX10-DL-NEXT:    v_add_nc_u16 v0, v5, v6
 ; GFX10-DL-NEXT:    v_add_nc_u16 v0, v0, v2
-; GFX10-DL-NEXT:    v_mad_u16 v0, v5, v12, v0
+; GFX10-DL-NEXT:    v_mad_u16 v0, v12, v16, v0
 ; GFX10-DL-NEXT:    v_add_nc_u16 v0, v0, v1
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v1, 8, v7
-; GFX10-DL-NEXT:    v_mad_u16 v0, v6, v13, v0
+; GFX10-DL-NEXT:    v_mad_u16 v0, v9, v15, v0
 ; GFX10-DL-NEXT:    v_add_nc_u16 v0, v0, v1
 ; GFX10-DL-NEXT:    global_store_byte v4, v0, s[0:1]
 ; GFX10-DL-NEXT:    s_endpgm
@@ -2941,40 +2941,32 @@ define amdgpu_kernel void @udot8_acc4_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_and_b32_e32 v5, 15, v3
-; GFX8-NEXT:    v_bfe_u32 v6, v3, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v7, v3, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v8, v3, 12, 4
-; GFX8-NEXT:    v_bfe_u32 v9, v3, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v10, v3, 20, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 28, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 24, v3
+; GFX8-NEXT:    v_bfe_u32 v7, v3, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v8, v3, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v9, v3, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v10, v3, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v11, v3, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v3, 15, v3
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_and_b32_e32 v12, 15, v2
-; GFX8-NEXT:    v_bfe_u32 v13, v2, 4, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v5, v5, v12
-; GFX8-NEXT:    v_bfe_u32 v14, v2, 8, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v6, v6, v13
+; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 24, v2
+; GFX8-NEXT:    v_bfe_u32 v14, v2, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v15, v2, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v16, v2, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v17, v2, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v18, v2, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_add_u16_e32 v4, v5, v4
-; GFX8-NEXT:    v_bfe_u32 v15, v2, 12, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v7, v7, v14
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v6
-; GFX8-NEXT:    v_bfe_u32 v16, v2, 16, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v8, v8, v15
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v7
-; GFX8-NEXT:    v_bfe_u32 v17, v2, 20, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v9, v9, v16
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v8
-; GFX8-NEXT:    v_bfe_u32 v11, v3, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 28, v3
-; GFX8-NEXT:    v_bfe_u32 v18, v2, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX8-NEXT:    v_mul_u32_u24_e32 v10, v10, v17
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v9
-; GFX8-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
-; GFX8-NEXT:    v_mul_u32_u24_e32 v3, v11, v18
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v10
-; GFX8-NEXT:    v_add_u16_e32 v3, v4, v3
-; GFX8-NEXT:    v_add_u16_e32 v2, v3, v2
+; GFX8-NEXT:    v_mad_u16 v2, v3, v2, v4
+; GFX8-NEXT:    v_mad_u16 v2, v11, v18, v2
+; GFX8-NEXT:    v_mad_u16 v2, v10, v17, v2
+; GFX8-NEXT:    v_mad_u16 v2, v9, v16, v2
+; GFX8-NEXT:    v_mad_u16 v2, v8, v15, v2
+; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
+; GFX8-NEXT:    v_mad_u16 v2, v6, v13, v2
+; GFX8-NEXT:    v_mad_u16 v2, v5, v12, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
@@ -2989,47 +2981,60 @@ define amdgpu_kernel void @udot8_acc4_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    global_load_ubyte v3, v0, s[2:3]
+; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_and_b32_e32 v4, 15, v1
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_and_b32_e32 v11, 15, v2
 ; GFX9-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    v_bfe_u32 v12, v2, 4, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v4, v4, v11
-; GFX9-NEXT:    v_bfe_u32 v6, v1, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v5, v5, v12
+; GFX9-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_add_u16_e32 v3, v4, v3
-; GFX9-NEXT:    v_bfe_u32 v7, v1, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v14, v2, 12, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v6, v6, v13
+; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
 ; GFX9-NEXT:    v_add_u16_e32 v3, v3, v5
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v7, v7, v14
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v6
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 20, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v8, v8, v15
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v7
-; GFX9-NEXT:    v_bfe_u32 v10, v1, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 28, v1
-; GFX9-NEXT:    v_bfe_u32 v17, v2, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX9-NEXT:    v_mul_u32_u24_e32 v9, v9, v16
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v8
-; GFX9-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX9-NEXT:    v_mul_u32_u24_e32 v2, v10, v17
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v9
-; GFX9-NEXT:    v_add_u16_e32 v2, v3, v2
-; GFX9-NEXT:    v_add_u16_e32 v1, v2, v1
+; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v2, v2, v1
+; GFX9-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    v_and_b32_e32 v1, 15, v1
 ; GFX9-NEXT:    global_store_byte v0, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
@@ -3044,106 +3049,135 @@ define amdgpu_kernel void @udot8_acc4_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-DL-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DL-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-DL-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-DL-NEXT:    global_load_ubyte v3, v0, s[2:3]
+; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, 15, v1
-; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_and_b32_e32 v11, 15, v2
 ; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 4, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v11
-; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v5, v5, v12
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-DL-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-DL-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-DL-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-DL-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-DL-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-DL-NEXT:    v_add_u16_e32 v3, v4, v3
-; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 12, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v6, v6, v13
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
 ; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v5
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v7, v7, v14
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v6
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 20, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v8, v8, v15
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v7
-; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v1, 28, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v9, v9, v16
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v8
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v2, v10, v17
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v9
-; GFX9-DL-NEXT:    v_add_u16_e32 v2, v3, v2
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v2, v1
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-DL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v2, v2, v1
+; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-DL-NEXT:    v_and_b32_e32 v1, 15, v1
 ; GFX9-DL-NEXT:    global_store_byte v0, v1, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
 ; GFX10-DL-LABEL: udot8_acc4_vecMul:
 ; GFX10-DL:       ; %bb.0: ; %entry
+; GFX10-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-DL-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX10-DL-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
 ; GFX10-DL-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
 ; GFX10-DL-NEXT:    s_mov_b32 s10, -1
 ; GFX10-DL-NEXT:    s_mov_b32 s11, 0x31c16000
 ; GFX10-DL-NEXT:    s_add_u32 s8, s8, s3
-; GFX10-DL-NEXT:    s_clause 0x1
-; GFX10-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX10-DL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX10-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-DL-NEXT:    s_clause 0x1
 ; GFX10-DL-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX10-DL-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX10-DL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-DL-NEXT:    global_load_ubyte v3, v0, s[2:3]
+; GFX10-DL-NEXT:    global_load_ubyte v3, v0, s[0:1]
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-NEXT:    v_and_b32_e32 v4, 15, v1
+; GFX10-DL-NEXT:    v_and_b32_e32 v7, 15, v1
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-DL-NEXT:    v_and_b32_e32 v5, 15, v2
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v2, 8, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 8, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v6, v6, v7
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, 15, v2
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 4, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 8, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v7, v4, v7
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v8, v1, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v9, v4, v9
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v7
+; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v10, 16, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 16, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v5, v5, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v6, v2, 16, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v10, v10, 16, v13
+; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v12, v4, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v4, v3
-; GFX10-DL-NEXT:    v_bfe_u32 v4, v1, 12, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v5, v5, v8
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v2, 16, 4
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v6
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 16, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v7
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 20, 4
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v5
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 20, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v6, v6, v8
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v2, 24, 4
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v2, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v8, v8, v10
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v9
+; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v2, 24, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v11, 16, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v8
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
+; GFX10-DL-NEXT:    v_and_b32_e32 v8, v4, v9
+; GFX10-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v4, v6, v5
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v10
+; GFX10-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v8
+; GFX10-DL-NEXT:    v_lshl_or_b32 v1, v7, 16, v1
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v4
-; GFX10-DL-NEXT:    v_bfe_u32 v4, v1, 24, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v5, v5, v7
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v1, 28, v1
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v6
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v8
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v5
-; GFX10-DL-NEXT:    v_add_nc_u16 v2, v3, v4
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX10-DL-NEXT:    v_add_nc_u16 v2, v3, v5
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
 ; GFX10-DL-NEXT:    v_add_nc_u16 v1, v2, v1
+; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v3
 ; GFX10-DL-NEXT:    v_and_b32_e32 v1, 15, v1
-; GFX10-DL-NEXT:    global_store_byte v0, v1, s[2:3]
+; GFX10-DL-NEXT:    global_store_byte v0, v1, s[0:1]
 ; GFX10-DL-NEXT:    s_endpgm
                                              <8 x i4> addrspace(1)* %src2,
                                              i4 addrspace(1)* nocapture %dst) {
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
index cda3f4871381a..8db5c5754e208 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
@@ -20,9 +20,9 @@ define amdgpu_kernel void @v_input_output_i8() {
 
 ; GCN: error: couldn't allocate output register for constraint 's'
 ; GCN: error: couldn't allocate input reg for constraint 's'
-define amdgpu_kernel void @s_input_output_v8f16() {
-  %v = tail call <8 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
-  tail call void asm sideeffect "; use $0", "s"(<8 x half> %v)
+define amdgpu_kernel void @s_input_output_v16f16() {
+  %v = tail call <16 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
+  tail call void asm sideeffect "; use $0", "s"(<16 x half> %v)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index f18d77b190e2b..a4eb3a409c47f 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -1745,6 +1745,345 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa
   ret void
 }
 
+define amdgpu_kernel void @v_insertelement_v8f16_3(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %val) {
+; GFX9-LABEL: v_insertelement_v8f16_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x10
+; GFX9-NEXT:    s_add_u32 s0, s0, s7
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[10:11]
+; GFX9-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshl_or_b32 v1, v5, 16, v1
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_insertelement_v8f16_3:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; VI-NEXT:    s_add_u32 s0, s0, s7
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s11
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s10, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, s4
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; VI-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; VI-NEXT:    buffer_load_dword v6, off, s[0:3], 0 offset:16
+; VI-NEXT:    v_mov_b32_e32 v5, s9
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s8, v4
+; VI-NEXT:    s_mov_b32 s4, 0xffff
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    s_waitcnt vmcnt(2)
+; VI-NEXT:    v_bfi_b32 v3, s4, v3, v3
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
+; VI-NEXT:    v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
+;
+; CI-LABEL: v_insertelement_v8f16_3:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_load_dword s4, s[4:5], 0x4
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v1, s3
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v4
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; CI-NEXT:    v_mov_b32_e32 v5, s1
+; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
+; CI-NEXT:    s_lshl_b32 s0, s4, 16
+; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; CI-NEXT:    v_or_b32_e32 v1, s0, v1
+; CI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; CI-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x half>, <8 x half> addrspace(1)* %in.gep
+  %val.trunc = trunc i32 %val to i16
+  %val.cvt = bitcast i16 %val.trunc to half
+  %vecins = insertelement <8 x half> %vec, half %val.cvt, i32 3
+  store <8 x half> %vecins, <8 x half> addrspace(1)* %out.gep
+  ret void
+}
+
+define amdgpu_kernel void @v_insertelement_v8i16_6(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in, i32 %val) {
+; GFX9-LABEL: v_insertelement_v8i16_6:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x10
+; GFX9-NEXT:    s_add_u32 s0, s0, s7
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[10:11]
+; GFX9-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_bfi_b32 v3, v6, v5, v3
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_insertelement_v8i16_6:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; VI-NEXT:    s_add_u32 s0, s0, s7
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s11
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s10, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, s4
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; VI-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; VI-NEXT:    buffer_load_dword v6, off, s[0:3], 0 offset:16
+; VI-NEXT:    s_mov_b32 s4, 0xffff
+; VI-NEXT:    v_mov_b32_e32 v5, s9
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s8, v4
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    s_waitcnt vmcnt(2)
+; VI-NEXT:    v_bfi_b32 v1, s4, v1, v1
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_bfi_b32 v3, s4, v6, v3
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
+;
+; CI-LABEL: v_insertelement_v8i16_6:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_load_dword s4, s[4:5], 0x4
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v1, s3
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v4
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; CI-NEXT:    v_mov_b32_e32 v5, s1
+; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
+; CI-NEXT:    s_mov_b32 s0, 0xffff
+; CI-NEXT:    v_mov_b32_e32 v6, s4
+; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    v_bfi_b32 v3, s0, v6, v3
+; CI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; CI-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %val.trunc = trunc i32 %val to i16
+  %val.cvt = bitcast i16 %val.trunc to i16
+  %vecins = insertelement <8 x i16> %vec, i16 %val.cvt, i32 6
+  store <8 x i16> %vecins, <8 x i16> addrspace(1)* %out.gep
+  ret void
+}
+
+define amdgpu_kernel void @v_insertelement_v8f16_dynamic(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %val, i32 %n) {
+; GFX9-LABEL: v_insertelement_v8f16_dynamic:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 7
+; GFX9-NEXT:    v_mov_b32_e32 v6, s6
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 6
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 5
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 4
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; GFX9-NEXT:    v_and_b32_e32 v3, v5, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 2
+; GFX9-NEXT:    v_lshl_or_b32 v3, v7, 16, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; GFX9-NEXT:    v_and_b32_e32 v2, v5, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 0
+; GFX9-NEXT:    v_lshl_or_b32 v2, v8, 16, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v10, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_and_b32_e32 v1, v5, v1
+; GFX9-NEXT:    v_and_b32_e32 v0, v5, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v7, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v0, v8, 16, v0
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_insertelement_v8f16_dynamic:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x10
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
+; VI-NEXT:    s_cmp_eq_u32 s5, 6
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    v_mov_b32_e32 v6, s4
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 7
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 4
+; VI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 5
+; VI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; VI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 2
+; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; VI-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 3
+; VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; VI-NEXT:    v_or_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 0
+; VI-NEXT:    v_or_b32_sdwa v2, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 1
+; VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    v_cndmask_b32_e32 v6, v10, v6, vcc
+; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
+; VI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
+; VI-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
+;
+; CI-LABEL: v_insertelement_v8f16_dynamic:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x4
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v1, s3
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v4
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; CI-NEXT:    v_mov_b32_e32 v5, s1
+; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
+; CI-NEXT:    v_cvt_f32_f16_e32 v6, s4
+; CI-NEXT:    s_cmp_eq_u32 s5, 7
+; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 6
+; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 5
+; CI-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 4
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; CI-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; CI-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; CI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; CI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; CI-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; CI-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; CI-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[0:1]
+; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 3
+; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; CI-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 2
+; CI-NEXT:    v_cndmask_b32_e32 v9, v9, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 1
+; CI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 0
+; CI-NEXT:    v_cndmask_b32_e64 v8, v8, v6, s[2:3]
+; CI-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; CI-NEXT:    v_cndmask_b32_e32 v10, v10, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[0:1]
+; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT:    v_cvt_f16_f32_e32 v8, v8
+; CI-NEXT:    v_cvt_f16_f32_e32 v9, v9
+; CI-NEXT:    v_cvt_f16_f32_e32 v10, v10
+; CI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
+; CI-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; CI-NEXT:    v_lshlrev_b32_e32 v8, 16, v9
+; CI-NEXT:    v_or_b32_e32 v3, v3, v6
+; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v10
+; CI-NEXT:    v_or_b32_e32 v2, v2, v7
+; CI-NEXT:    v_or_b32_e32 v1, v1, v8
+; CI-NEXT:    v_or_b32_e32 v0, v0, v6
+; CI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; CI-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x half>, <8 x half> addrspace(1)* %in.gep
+  %val.trunc = trunc i32 %val to i16
+  %val.cvt = bitcast i16 %val.trunc to half
+  %vecins = insertelement <8 x half> %vec, half %val.cvt, i32 %n
+  store <8 x half> %vecins, <8 x half> addrspace(1)* %out.gep
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index ec5445f96e046..3105f4b11b91e 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1875,16 +1875,15 @@ define amdgpu_kernel void @v5i16_arg(<5 x i16> addrspace(1)* nocapture %out, <5
 ;
 ; GFX9-LABEL: v5i16_arg:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x18
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, s6
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    global_store_short v2, v3, s[2:3] offset:8
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    global_store_short v2, v3, s[6:7] offset:8
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; EG-LABEL: v5i16_arg:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 5ebd97802f6b6..23150da4e53bc 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -4727,10 +4727,10 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
 ; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s2, 0xffff
-; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s1, s2, 16
-; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
+; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s2, 16
+; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s2, 0xffff
+; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
+; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
 ; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GCN-NOHSA-VI-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 5e99dc0e60c22..29c8d6a8b21d0 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -5581,14 +5581,14 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)
 ; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
-; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
 ; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
 ; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
 ; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GCN-NOHSA-VI-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index 4e6608dd0bb5b..a229ce1146aba 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -571,6 +571,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v2, v0, s[6:7] glc
@@ -580,8 +581,8 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX9-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    v_pk_sub_i16 v2, v2, v3
-; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX9-NEXT:    v_and_b32_e32 v0, v4, v2
+; GFX9-NEXT:    v_and_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
@@ -617,6 +618,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
@@ -627,8 +629,8 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX10-NEXT:    s_mov_b32 s6, -1
 ; GFX10-NEXT:    v_pk_sub_i16 v2, v1, v2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v3, v2
+; GFX10-NEXT:    v_and_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX10-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX10-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GFX10-NEXT:    s_endpgm

From 9d32847b331565eb1b38749aa7a721c6be8b64aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= 
Date: Mon, 24 Jan 2022 19:22:56 +0100
Subject: [PATCH 443/946] [DAGCombine] Remove unused param in
 combineCarryDiamond(). NFC

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 77a6e7bba3660..1137f8b16977f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3058,9 +3058,8 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
 //
 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
 // a single path for carry/borrow out propagation:
-static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
-                                   const TargetLowering &TLI, SDValue Carry0,
-                                   SDValue Carry1, SDNode *N) {
+static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
+                                   SDValue Carry0, SDValue Carry1, SDNode *N) {
   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
     return SDValue();
   unsigned Opcode = Carry0.getOpcode();
@@ -5879,7 +5878,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue Shuffle = XformToShuffleWithZero(N))
       return Shuffle;
 
-  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
     return Combined;
 
   // fold (and (or x, C), D) -> D if (C & D) == D
@@ -6676,7 +6675,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (SDValue Combined = visitORLike(N0, N1, N))
     return Combined;
 
-  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
     return Combined;
 
   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
@@ -8173,7 +8172,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
     return Combined;
 
   return SDValue();

From d4be9720e7e68f1316cda971e9b29dc880222200 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht 
Date: Mon, 24 Jan 2022 12:01:25 -0800
Subject: [PATCH 444/946] [test] Fix no-undef-type-md.ll.

There are two test issues:
- The test assumes the current directory is writeable, but it may not be. Use `%t.o`-like paths instead of implicit `a.out`.
- The `RUN llvm-nm` line is missing a colon, so the test was not being exercised.
---
 llvm/test/LTO/Resolution/X86/no-undef-type-md.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
index afee5e656df17..8748375c84315 100644
--- a/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
+++ b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
@@ -1,19 +1,19 @@
 ; RUN: opt <%s -o %t0.o -thinlto-bc -thinlto-split-lto-unit
 ; RUN: llvm-as -o %t1.o %S/Inputs/no-undef-type-md.ll
-; RUN: llvm-lto2 run -o a.out \
+; RUN: llvm-lto2 run -o %t-obj.o \
 ; RUN: %t0.o \
 ; RUN: -r=%t0.o,a, \
 ; RUN: -r=%t0.o,b,pl \
 ; RUN: %t1.o \
 ; RUN: -r=%t1.o,a,pl \
 ; RUN: | FileCheck --allow-empty --check-prefix=ERROR %s
-; RUN llvm-nm a.out.0 a.out.1 -S | FileCheck %s
+; RUN: llvm-nm %t-obj.o.0 %t-obj.o.1 -S | FileCheck %s
 
 ; ERROR-NOT: expected a Function or null
 ; ERROR-NOT: i32 (%0*, i32*)* undef
 
-; CHECK: a.out.0:
-; CHECK: a.out.1:
+; CHECK: -obj.o.0:
+; CHECK: -obj.o.1:
 
 ; ModuleID = 'test.cpp.o'
 source_filename = "test.cpp"

From c548bc258c693df2087eda824308a8da55fd6003 Mon Sep 17 00:00:00 2001
From: Sanjay Patel 
Date: Mon, 24 Jan 2022 14:16:23 -0500
Subject: [PATCH 445/946] [InstCombine] add tests for icmp with masked mul
 operand; NFC

More coverage for D114272
---
 .../Transforms/InstCombine/icmp-mul-and.ll    | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
index b15404c0aaa2f..2d63bfac0ffcc 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
@@ -1,6 +1,103 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+declare void @use(i8)
+
+define i1 @mul_mask_pow2_eq0(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_eq0(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 4
+  %cmp = icmp eq i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_ne0_use1(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_ne0_use1(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 40
+; CHECK-NEXT:    call void @use(i8 [[MUL]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 40
+  call void @use(i8 %mul)
+  %and = and i8 %mul, 8
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_ne0_use2(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_ne0_use2(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 40
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 8
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 40
+  %and = and i8 %mul, 8
+  call void @use(i8 %and)
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_sgt0(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_sgt0(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 4
+  %cmp = icmp sgt i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_fakepow2_ne0(i8 %x) {
+; CHECK-LABEL: @mul_mask_fakepow2_ne0(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 5
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_eq4(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_eq4(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 4
+  %cmp = icmp eq i8 %and, 4
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_notpow2_ne(i8 %x) {
+; CHECK-LABEL: @mul_mask_notpow2_ne(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 60
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 12
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 60
+  %and = and i8 %mul, 12
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
 define i1 @pr40493(i32 %area) {
 ; CHECK-LABEL: @pr40493(
 ; CHECK-NEXT:  entry:

From 6d020a5ac2d5e31ab8d472e139e9caec405a5006 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Mon, 24 Jan 2022 12:03:35 -0800
Subject: [PATCH 446/946] [BOLT] Add missing  in
 InstrumentationRuntimeLibrary.h

 is no longer included as a result of 5f290c090a24
("Move STLFunctionalExtras out of STLExtras").

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D118064
---
 bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h b/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h
index 3257deab1db45..55f86dd42fa26 100644
--- a/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h
+++ b/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h
@@ -16,6 +16,7 @@
 
 #include "bolt/Passes/InstrumentationSummary.h"
 #include "bolt/RuntimeLibs/RuntimeLibrary.h"
+#include 
 
 namespace llvm {
 namespace bolt {

From 0407ab4114dbdbd1845df712639bdbc84ec6df2c Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Fri, 14 Jan 2022 12:30:22 -0500
Subject: [PATCH 447/946] [libc++] Make sure basic_string::reserve(n) never
 shrinks in all Standard modes

Since basic_string::reserve(n) is instantiated in the shared library but also
available to the compiler for inlining, its definition should not depend on
things like the Standard mode in use. Indeed, that flag may not match between
how the shared library is compiled and how users are compiling their own code,
resulting in ODR violations.

However, note that we retain the behavior of basic_string::reserve() to
shrink the string for backwards compatibility reasons. While it would
technically be conforming to not shrink, we believe user expectation is
for it to shrink, and so existing code might have been written based on
that assumption. We prefer to not break such code, even though that makes
basic_string::reserve() and basic_string::reserve(0) not equivalent anymore.

Fixes llvm-project#53170

Differential Revision: https://reviews.llvm.org/D117332
---
 libcxx/docs/ReleaseNotes.rst                  | 12 +++
 libcxx/include/string                         |  9 ++-
 .../string.capacity/PR53170.pass.cpp          | 79 +++++++++++++++++++
 .../string.capacity/reserve.pass.cpp          | 50 ------------
 4 files changed, 96 insertions(+), 54 deletions(-)
 create mode 100644 libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp
 delete mode 100644 libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp

diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index b35c8a3329515..7210e1971b10f 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -115,6 +115,18 @@ API Changes
   You must now explicitly initialize with a ``chrono::month`` and
   ``chrono::weekday_indexed`` instead of "meh, whenever".
 
+- C++20 requires that ``std::basic_string::reserve(n)`` never reduce the capacity
+  of the string. (For that, use ``shrink_to_fit()``.) Prior to this release, libc++'s
+  ``std::basic_string::reserve(n)`` could reduce capacity in C++17 and before, but
+  not in C++20 and later. This caused ODR violations when mixing code compiled under
+  different Standard modes. After this change, libc++'s ``std::basic_string::reserve(n)``
+  never reduces capacity, even in C++17 and before.
+  C++20 deprecates the zero-argument overload of ``std::basic_string::reserve()``,
+  but specifically permits it to reduce capacity. To avoid breaking existing code
+  assuming that ``std::basic_string::reserve()`` will shrink, libc++ maintains
+  the behavior to shrink, even though that makes ``std::basic_string::reserve()`` not
+  a synonym for ``std::basic_string::reserve(0)`` in any Standard mode anymore.
+
 ABI Changes
 -----------
 
diff --git a/libcxx/include/string b/libcxx/include/string
index b2eef646f9827..6f22f02afa0ba 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -3265,10 +3265,11 @@ basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacit
     if (__requested_capacity > max_size())
         this->__throw_length_error();
 
-#if _LIBCPP_STD_VER > 17
-    // Reserve never shrinks as of C++20.
-    if (__requested_capacity <= capacity()) return;
-#endif
+    // Make sure reserve(n) never shrinks. This is technically only required in C++20
+    // and later (since P0966R1), however we provide consistent behavior in all Standard
+    // modes because this function is instantiated in the shared library.
+    if (__requested_capacity <= capacity())
+        return;
 
     size_type __target_capacity = _VSTD::max(__requested_capacity, size());
     __target_capacity = __recommend(__target_capacity);
diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp
new file mode 100644
index 0000000000000..1be3dc0af2675
--- /dev/null
+++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// 
+
+// void reserve(); // Deprecated in C++20.
+// void reserve(size_type);
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
+// This test ensures that libc++ implements https://wg21.link/P0966R1 (reserve never shrinks)
+// even before C++20. This is required in order to avoid ODR violations because basic_string::reserve(size)
+// is compiled into the shared library. Hence, it needs to have the same definition in all Standard modes.
+//
+// However, note that reserve() does shrink, and it does so in all Standard modes.
+//
+// Reported as https://llvm.org/PR53170.
+
+// reserve(n) used to shrink the string until https://llvm.org/D117332 was shipped.
+// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
+// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{11|12}}
+
+#include 
+#include 
+#include 
+
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template 
+void test() {
+    // Test that a call to reserve() does shrink the string.
+    {
+        S s(1000, 'a');
+        typename S::size_type old_cap = s.capacity();
+        s.resize(20);
+        assert(s.capacity() == old_cap);
+
+        s.reserve();
+        assert(s.capacity() < old_cap);
+    }
+
+    // Test that a call to reserve(smaller-than-capacity) never shrinks the string.
+    {
+        S s(1000, 'a');
+        typename S::size_type old_cap = s.capacity();
+        s.resize(20);
+        assert(s.capacity() == old_cap);
+
+        s.reserve(10);
+        assert(s.capacity() == old_cap);
+    }
+
+    // In particular, test that reserve(0) does NOT shrink the string.
+    {
+        S s(1000, 'a');
+        typename S::size_type old_cap = s.capacity();
+        s.resize(20);
+        assert(s.capacity() == old_cap);
+
+        s.reserve(0);
+        assert(s.capacity() == old_cap);
+    }
+}
+
+int main(int, char**) {
+    test();
+
+#if TEST_STD_VER >= 11
+    test, min_allocator > >();
+#endif
+
+    return 0;
+}
diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp
deleted file mode 100644
index 358f51fd6e4cf..0000000000000
--- a/libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// 
-
-// void reserve(); // Deprecated in C++20.
-
-// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
-
-#include 
-#include 
-#include 
-
-#include "test_macros.h"
-#include "min_allocator.h"
-
-template 
-void
-test()
-{
-    // Tests that a call to reserve() on a long string is equivalent to shrink_to_fit().
-    S s(1000, 'a');
-    typename S::size_type old_cap = s.capacity();
-    s.resize(20);
-    assert(s.capacity() == old_cap);
-    s.reserve();
-    assert(s.capacity() < old_cap);
-}
-
-int main(int, char**)
-{
-    {
-    typedef std::string S;
-    test();
-    }
-#if TEST_STD_VER >= 11
-    {
-    typedef min_allocator A;
-    typedef std::basic_string, A> S;
-    test();
-    }
-#endif
-
-    return 0;
-}

From 1f6af9c9cefd226f2edbc36040f2ffb81c68a3eb Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Sun, 9 Jan 2022 09:36:08 -0500
Subject: [PATCH 448/946] [libc++][CI] Re-enable all CI jobs

This essentially reverts commit 89f4a18f371d8 now that our CI is back
online at full capacity.

Differential Revision: https://reviews.llvm.org/D116891
---
 libcxx/utils/ci/buildkite-pipeline.yml | 467 ++++++++++++-------------
 1 file changed, 233 insertions(+), 234 deletions(-)

diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml
index c6f4d700abafb..cf2b9d9c82120 100644
--- a/libcxx/utils/ci/buildkite-pipeline.yml
+++ b/libcxx/utils/ci/buildkite-pipeline.yml
@@ -160,8 +160,6 @@ steps:
   #
   - wait
 
-  # TODO: Due to ongoing CI outage on our Linux nodes, most configurations running on Linux
-  #       are disabled. We are currently running off of a much smaller fleet than normally.
   # Tests with the supported compilers.
   - label: "GCC 11 / C++11"
     command: "libcxx/utils/ci/run-buildbot generic-gcc-cxx11"
@@ -189,18 +187,18 @@ steps:
           limit: 2
     timeout_in_minutes: 120
 
-  # - label: "Clang 13"
-  #   command: "libcxx/utils/ci/run-buildbot generic-clang-13"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Clang 13"
+    command: "libcxx/utils/ci/run-buildbot generic-clang-13"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   # Tests with the sanitizers.
   - label: "ASAN"
@@ -216,33 +214,34 @@ steps:
           limit: 2
     timeout_in_minutes: 120
 
-  # - label: "TSAN"
-  #   command: "libcxx/utils/ci/run-buildbot generic-tsan"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "TSAN"
+    command: "libcxx/utils/ci/run-buildbot generic-tsan"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "UBSAN"
-  #   command: "libcxx/utils/ci/run-buildbot generic-ubsan"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "UBSAN"
+    command: "libcxx/utils/ci/run-buildbot generic-ubsan"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   # # Tests with the various supported ways to build libc++.
+  # TODO: Fix failures with the GDB pretty printers
   # - label: "Bootstrapping build"
   #   command: "libcxx/utils/ci/run-buildbot bootstrapping-build"
   #   artifact_paths:
@@ -256,58 +255,58 @@ steps:
   #         limit: 2
   #   timeout_in_minutes: 120
 
-  # - label: "Legacy Lit configuration"
-  #   command: "libcxx/utils/ci/run-buildbot legacy-test-config"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Legacy Lit configuration"
+    command: "libcxx/utils/ci/run-buildbot legacy-test-config"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Legacy standalone build"
-  #   command: "libcxx/utils/ci/run-buildbot legacy-standalone"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Legacy standalone build"
+    command: "libcxx/utils/ci/run-buildbot legacy-standalone"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Legacy LLVM_ENABLE_PROJECTS build"
-  #   command: "libcxx/utils/ci/run-buildbot legacy-project-build"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Legacy LLVM_ENABLE_PROJECTS build"
+    command: "libcxx/utils/ci/run-buildbot legacy-project-build"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # # Tests with various build configurations.
-  # - label: "-fno-exceptions"
-  #   command: "libcxx/utils/ci/run-buildbot generic-noexceptions"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  # Tests with various build configurations.
+  - label: "-fno-exceptions"
+    command: "libcxx/utils/ci/run-buildbot generic-noexceptions"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   - label: "Modular build"
     command: "libcxx/utils/ci/run-buildbot generic-modules"
@@ -322,164 +321,164 @@ steps:
           limit: 2
     timeout_in_minutes: 120
 
-  # - label: "Static libraries"
-  #   command: "libcxx/utils/ci/run-buildbot generic-static"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Static libraries"
+    command: "libcxx/utils/ci/run-buildbot generic-static"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Assertions enabled"
-  #   command: "libcxx/utils/ci/run-buildbot generic-assertions"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #     - "**/*.abilist"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Assertions enabled"
+    command: "libcxx/utils/ci/run-buildbot generic-assertions"
+    artifact_paths:
+      - "**/test-results.xml"
+      - "**/*.abilist"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Debug iterators"
-  #   command: "libcxx/utils/ci/run-buildbot generic-debug-iterators"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #     - "**/*.abilist"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Debug iterators"
+    command: "libcxx/utils/ci/run-buildbot generic-debug-iterators"
+    artifact_paths:
+      - "**/test-results.xml"
+      - "**/*.abilist"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "With LLVM's libunwind"
-  #   command: "libcxx/utils/ci/run-buildbot generic-with_llvm_unwinder"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "With LLVM's libunwind"
+    command: "libcxx/utils/ci/run-buildbot generic-with_llvm_unwinder"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Single-threaded"
-  #   command: "libcxx/utils/ci/run-buildbot generic-singlethreaded"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Single-threaded"
+    command: "libcxx/utils/ci/run-buildbot generic-singlethreaded"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No debug mode"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-debug"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No debug mode"
+    command: "libcxx/utils/ci/run-buildbot generic-no-debug"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No filesystem"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-filesystem"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No filesystem"
+    command: "libcxx/utils/ci/run-buildbot generic-no-filesystem"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No random device"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-random_device"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No random device"
+    command: "libcxx/utils/ci/run-buildbot generic-no-random_device"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No locale"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-localization"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No locale"
+    command: "libcxx/utils/ci/run-buildbot generic-no-localization"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No Unicode"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-unicode"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No Unicode"
+    command: "libcxx/utils/ci/run-buildbot generic-no-unicode"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No wide characters"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-wide-characters"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No wide characters"
+    command: "libcxx/utils/ci/run-buildbot generic-no-wide-characters"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # # Other non-testing CI jobs
-  # - label: "Benchmarks"
-  #   command: "libcxx/utils/ci/run-buildbot benchmarks"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  # Other non-testing CI jobs
+  - label: "Benchmarks"
+    command: "libcxx/utils/ci/run-buildbot benchmarks"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   - label: "Documentation"
     command: "libcxx/utils/ci/run-buildbot documentation"

From c3ca2c6b14f91f8232525373c4f5b1dc504a39a1 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani 
Date: Mon, 24 Jan 2022 21:45:00 +0100
Subject: [PATCH 449/946] [lldb/test] Fix
 `TestScriptedProcess.test_scripted_process_and_scripted_thread`

This patch updates `dummy_scripted_process.py` to report the dummy
thread correctly to reflect the changes introduced by `d3e0f7e`.

Signed-off-by: Med Ismail Bennani 
---
 .../scripted_process/TestScriptedProcess.py                | 7 +++++--
 .../scripted_process/dummy_scripted_process.py             | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index 4831d48a0b5a9..0c215f082c5d3 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -41,6 +41,7 @@ def test_python_plugin_package(self):
         self.expect('script dir(ScriptedProcess)',
                     substrs=["launch"])
 
+    @skipUnlessDarwin
     def test_invalid_scripted_register_context(self):
         """Test that we can launch an lldb scripted process with an invalid
         Scripted Thread, with invalid register context."""
@@ -77,7 +78,7 @@ def cleanup():
 
         self.assertIn("Failed to get scripted thread registers data.", log)
 
-    @skipIf(archs=no_match(['x86_64']))
+    @skipIf(archs=no_match(['x86_64', 'arm64', 'arm64e']))
     def test_scripted_process_and_scripted_thread(self):
         """Test that we can launch an lldb scripted process using the SBAPI,
         check its process ID, read string from memory, check scripted thread
@@ -124,8 +125,10 @@ def cleanup():
                 break
 
         self.assertTrue(GPRs, "Invalid General Purpose Registers Set")
-        self.assertEqual(GPRs.GetNumChildren(), 21)
+        self.assertGreater(GPRs.GetNumChildren(), 0)
         for idx, reg in enumerate(GPRs, start=1):
+            if idx > 21:
+                break
             self.assertEqual(idx, int(reg.value, 16))
 
     def create_stack_skinny_corefile(self, file):
diff --git a/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py b/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py
index d7f428d408457..67850cf57a73d 100644
--- a/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py
@@ -9,6 +9,7 @@
 class DummyScriptedProcess(ScriptedProcess):
     def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData):
         super().__init__(target, args)
+        self.threads[0] = DummyScriptedThread(self, None)
 
     def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo:
         return None

From 997e128e2a78f5a5434fc75997441ae1ee76f8a4 Mon Sep 17 00:00:00 2001
From: Casey Carter 
Date: Wed, 29 Dec 2021 17:21:52 -0800
Subject: [PATCH 450/946] [libcxx][test] the domain of == for forward iterators
 is iterator values from the same range

* Default-initialized `basic_string` iterators are not portably in the domain of `==`.
* Avoid comparing iterators from non-equal string_views which MSVCSTL considers not to be in the domain of equality.
* Don't test invalid range `[in, out + N)`.

Also silence some truncation warnings by testing with a non-narrowing conversion.

Differential Revision: https://reviews.llvm.org/D118049
---
 .../range.all/range.owning.view/begin_end.pass.cpp     |  8 ++++----
 .../range.transform/iterator/plus_minus.pass.cpp       |  4 ++--
 .../basic.string/string.iterators/iterators.pass.cpp   |  1 -
 .../format.parse.ctx/advance_to.pass.cpp               |  4 ++--
 .../ranges_uninitialized_copy.pass.cpp                 | 10 +++++-----
 .../ranges_uninitialized_copy_n.pass.cpp               |  4 ++--
 .../ranges_uninitialized_move.pass.cpp                 | 10 +++++-----
 .../ranges_uninitialized_move_n.pass.cpp               |  4 ++--
 8 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp
index 9319ac3b8986a..34898f0d3df94 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp
@@ -117,10 +117,10 @@ constexpr bool test()
     // Test a non-view.
     std::array a = {1, 2};
     auto ov = std::ranges::owning_view(std::move(a));
-    assert(ov.begin() != a.begin()); // because it points into the copy
-    assert(std::as_const(ov).begin() != a.begin());
-    assert(ov.end() != a.end());
-    assert(std::as_const(ov).end() != a.end());
+    assert(std::to_address(ov.begin()) != std::to_address(a.begin())); // because it points into the copy
+    assert(std::to_address(std::as_const(ov).begin()) != std::to_address(a.begin()));
+    assert(std::to_address(ov.end()) != std::to_address(a.end()));
+    assert(std::to_address(std::as_const(ov).end()) != std::to_address(a.end()));
   }
   return true;
 }
diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp
index 9baff10cc034d..6138df97929a2 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp
@@ -21,12 +21,12 @@ constexpr bool test() {
   std::ranges::transform_view transformView1;
   auto iter1 = std::move(transformView1).begin();
   std::ranges::transform_view transformView2;
-  auto iter2 = std::move(transformView2).begin();
+  [[maybe_unused]] auto iter2 = std::move(transformView2).begin();
   iter1 += 4;
   assert((iter1 + 1).base() == globalBuff + 5);
   assert((1 + iter1).base() == globalBuff + 5);
   assert((iter1 - 1).base() == globalBuff + 3);
-  assert(iter1 - iter2 == 4);
+  LIBCPP_ASSERT(iter1 - iter2 == 4);
   assert((iter1 + 2) - 2 == iter1);
   assert((iter1 - 2) + 2 == iter1);
 
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
index 187452b6020e0..0ab3a549dd5af 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
@@ -54,7 +54,6 @@ TEST_CONSTEXPR_CXX20 void test()
         C a;
         typename C::iterator i1 = a.begin();
         typename C::iterator i2;
-        assert ( i1 != i2 );
         i2 = i1;
         assert ( i1 == i2 );
     }
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp
index fe0405f2e534c..d392f993343a6 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp
@@ -39,10 +39,10 @@ constexpr void test(const CharT* fmt) {
     std::basic_format_parse_context context(view);
 
     context.advance_to(context.begin() + 1);
-    assert(context.begin() == view.begin() + 1);
+    assert(std::to_address(context.begin()) == std::to_address(view.begin()) + 1);
 
     context.advance_to(context.begin() + 1);
-    assert(context.begin() == view.begin() + 2);
+    assert(std::to_address(context.begin()) == std::to_address(view.begin()) + 2);
 
     context.advance_to(context.begin() + 1);
     assert(context.begin() == context.end());
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp
index f6b40fd145635..4ce2d720aa66f 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp
@@ -218,7 +218,7 @@ int main(int, char**) {
     constexpr int N = 5;
     int in[N] = {1, 2, 3, 4, 5};
     int out[N] = {6, 7, 8, 9, 10};
-    assert(!std::equal(in, in + N, in, out + N));
+    assert(!std::equal(in, in + N, out, out + N));
 
     std::ranges::uninitialized_copy(in, in + 1, out, out + N);
     assert(out[0] == 1);
@@ -314,8 +314,8 @@ int main(int, char**) {
   // Conversions, (iter, sentinel) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer out;
+    int in[N] = {1, 2, 3};
+    Buffer out;
 
     std::ranges::uninitialized_copy(in, in + N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));
@@ -324,8 +324,8 @@ int main(int, char**) {
   // Conversions, (range) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer out;
+    int in[N] = {1, 2, 3};
+    Buffer out;
 
     std::ranges::uninitialized_copy(in, out);
     assert(std::equal(in, in + N, out.begin(), out.end()));
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp
index f32af6cf4b38b..aee2cfd1f91b5 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp
@@ -122,8 +122,8 @@ int main(int, char**) {
   // Conversions.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer out;
+    int in[N] = {1, 2, 3};
+    Buffer out;
 
     std::ranges::uninitialized_copy_n(in, N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp
index 934ac6a4f23fe..c764440d84e77 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp
@@ -250,7 +250,7 @@ int main(int, char**) {
     constexpr int N = 5;
     int in[N] = {1, 2, 3, 4, 5};
     int out[N] = {6, 7, 8, 9, 10};
-    assert(!std::equal(in, in + N, in, out + N));
+    assert(!std::equal(in, in + N, out, out + N));
 
     std::ranges::uninitialized_move(in, in + 1, out, out + N);
     assert(out[0] == 1);
@@ -350,8 +350,8 @@ int main(int, char**) {
   // Conversions, (iter, sentinel) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer out;
+    int in[N] = {1, 2, 3};
+    Buffer out;
 
     std::ranges::uninitialized_move(in, in + N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));
@@ -360,8 +360,8 @@ int main(int, char**) {
   // Conversions, (range) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer out;
+    int in[N] = {1, 2, 3};
+    Buffer out;
 
     std::ranges::uninitialized_move(in, out);
     assert(std::equal(in, in + N, out.begin(), out.end()));
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp
index 9c6691de92297..01f3c6ca8e00d 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp
@@ -156,8 +156,8 @@ int main(int, char**) {
   // Conversions.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer out;
+    int in[N] = {1, 2, 3};
+    Buffer out;
 
     std::ranges::uninitialized_move_n(in, N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));

From 6a028296fe62252791a6b470eeb8409b17d48cd0 Mon Sep 17 00:00:00 2001
From: Quinn Pham 
Date: Mon, 29 Nov 2021 09:12:51 -0600
Subject: [PATCH 451/946] [PowerPC] Emit warning when SP is clobbered by asm

This patch emits a warning when the stack pointer register (`R1`) is found in
the clobber list of an inline asm statement. Clobbering the stack pointer is
not supported.

Reviewed By: #powerpc, nemanjai

Differential Revision: https://reviews.llvm.org/D112073
---
 clang/lib/Basic/Targets/PPC.cpp               | 39 +++++++++++--------
 .../Misc/ppc-inline-asm-clobber-warning.c     | 38 ++++++++++++++++++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp   | 12 ++++++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.h     |  2 +
 .../PowerPC/inline-asm-clobber-warning.ll     | 22 +++++++++++
 5 files changed, 96 insertions(+), 17 deletions(-)
 create mode 100644 clang/test/Misc/ppc-inline-asm-clobber-warning.c
 create mode 100644 llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll

diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 19b7ded40402a..1eb0317af60b6 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -734,23 +734,28 @@ ArrayRef PPCTargetInfo::getGCCRegNames() const {
 const TargetInfo::GCCRegAlias PPCTargetInfo::GCCRegAliases[] = {
     // While some of these aliases do map to different registers
     // they still share the same register name.
-    {{"0"}, "r0"},     {{"1"}, "r1"},     {{"2"}, "r2"},     {{"3"}, "r3"},
-    {{"4"}, "r4"},     {{"5"}, "r5"},     {{"6"}, "r6"},     {{"7"}, "r7"},
-    {{"8"}, "r8"},     {{"9"}, "r9"},     {{"10"}, "r10"},   {{"11"}, "r11"},
-    {{"12"}, "r12"},   {{"13"}, "r13"},   {{"14"}, "r14"},   {{"15"}, "r15"},
-    {{"16"}, "r16"},   {{"17"}, "r17"},   {{"18"}, "r18"},   {{"19"}, "r19"},
-    {{"20"}, "r20"},   {{"21"}, "r21"},   {{"22"}, "r22"},   {{"23"}, "r23"},
-    {{"24"}, "r24"},   {{"25"}, "r25"},   {{"26"}, "r26"},   {{"27"}, "r27"},
-    {{"28"}, "r28"},   {{"29"}, "r29"},   {{"30"}, "r30"},   {{"31"}, "r31"},
-    {{"fr0"}, "f0"},   {{"fr1"}, "f1"},   {{"fr2"}, "f2"},   {{"fr3"}, "f3"},
-    {{"fr4"}, "f4"},   {{"fr5"}, "f5"},   {{"fr6"}, "f6"},   {{"fr7"}, "f7"},
-    {{"fr8"}, "f8"},   {{"fr9"}, "f9"},   {{"fr10"}, "f10"}, {{"fr11"}, "f11"},
-    {{"fr12"}, "f12"}, {{"fr13"}, "f13"}, {{"fr14"}, "f14"}, {{"fr15"}, "f15"},
-    {{"fr16"}, "f16"}, {{"fr17"}, "f17"}, {{"fr18"}, "f18"}, {{"fr19"}, "f19"},
-    {{"fr20"}, "f20"}, {{"fr21"}, "f21"}, {{"fr22"}, "f22"}, {{"fr23"}, "f23"},
-    {{"fr24"}, "f24"}, {{"fr25"}, "f25"}, {{"fr26"}, "f26"}, {{"fr27"}, "f27"},
-    {{"fr28"}, "f28"}, {{"fr29"}, "f29"}, {{"fr30"}, "f30"}, {{"fr31"}, "f31"},
-    {{"cc"}, "cr0"},
+    {{"0"}, "r0"},     {{"1", "sp"}, "r1"}, {{"2"}, "r2"},
+    {{"3"}, "r3"},     {{"4"}, "r4"},       {{"5"}, "r5"},
+    {{"6"}, "r6"},     {{"7"}, "r7"},       {{"8"}, "r8"},
+    {{"9"}, "r9"},     {{"10"}, "r10"},     {{"11"}, "r11"},
+    {{"12"}, "r12"},   {{"13"}, "r13"},     {{"14"}, "r14"},
+    {{"15"}, "r15"},   {{"16"}, "r16"},     {{"17"}, "r17"},
+    {{"18"}, "r18"},   {{"19"}, "r19"},     {{"20"}, "r20"},
+    {{"21"}, "r21"},   {{"22"}, "r22"},     {{"23"}, "r23"},
+    {{"24"}, "r24"},   {{"25"}, "r25"},     {{"26"}, "r26"},
+    {{"27"}, "r27"},   {{"28"}, "r28"},     {{"29"}, "r29"},
+    {{"30"}, "r30"},   {{"31"}, "r31"},     {{"fr0"}, "f0"},
+    {{"fr1"}, "f1"},   {{"fr2"}, "f2"},     {{"fr3"}, "f3"},
+    {{"fr4"}, "f4"},   {{"fr5"}, "f5"},     {{"fr6"}, "f6"},
+    {{"fr7"}, "f7"},   {{"fr8"}, "f8"},     {{"fr9"}, "f9"},
+    {{"fr10"}, "f10"}, {{"fr11"}, "f11"},   {{"fr12"}, "f12"},
+    {{"fr13"}, "f13"}, {{"fr14"}, "f14"},   {{"fr15"}, "f15"},
+    {{"fr16"}, "f16"}, {{"fr17"}, "f17"},   {{"fr18"}, "f18"},
+    {{"fr19"}, "f19"}, {{"fr20"}, "f20"},   {{"fr21"}, "f21"},
+    {{"fr22"}, "f22"}, {{"fr23"}, "f23"},   {{"fr24"}, "f24"},
+    {{"fr25"}, "f25"}, {{"fr26"}, "f26"},   {{"fr27"}, "f27"},
+    {{"fr28"}, "f28"}, {{"fr29"}, "f29"},   {{"fr30"}, "f30"},
+    {{"fr31"}, "f31"}, {{"cc"}, "cr0"},
 };
 
 ArrayRef PPCTargetInfo::getGCCRegAliases() const {
diff --git a/clang/test/Misc/ppc-inline-asm-clobber-warning.c b/clang/test/Misc/ppc-inline-asm-clobber-warning.c
new file mode 100644
index 0000000000000..bc323243b6e2d
--- /dev/null
+++ b/clang/test/Misc/ppc-inline-asm-clobber-warning.c
@@ -0,0 +1,38 @@
+/// This test checks that the warning includes the location in the C source
+/// file that contains the inline asm. Although this warning is emitted in llvm
+/// it cannot be tested from IR as it does not have that location information at
+/// that stage.
+
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang --target=powerpc-unknown-unknown -mcpu=pwr7 \
+// RUN:   -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang --target=powerpc64-unknown-unknown -mcpu=pwr7 \
+// RUN:   -c %s -o /dev/null 2>&1 | FileCheck %s
+
+void test_r1_clobber() {
+  __asm__("nop":::"r1");
+}
+
+// CHECK:      ppc-inline-asm-clobber-warning.c:14:11: warning: inline asm clobber list contains reserved registers: R1 [-Winline-asm]
+// CHECK-NEXT:   __asm__("nop":::"r1");
+// CHECK-NEXT:           ^
+// CHECK-NEXT: ppc-inline-asm-clobber-warning.c:14:11: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
+
+void test_1_clobber() {
+  __asm__("nop":::"1");
+}
+
+// CHECK:      ppc-inline-asm-clobber-warning.c:23:11: warning: inline asm clobber list contains reserved registers: R1 [-Winline-asm]
+// CHECK-NEXT:   __asm__("nop":::"1");
+// CHECK-NEXT:           ^
+// CHECK-NEXT: ppc-inline-asm-clobber-warning.c:23:11: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
+
+void test_sp_clobber() {
+  __asm__("nop":::"sp");
+}
+
+// CHECK:      ppc-inline-asm-clobber-warning.c:32:11: warning: inline asm clobber list contains reserved registers: R1 [-Winline-asm]
+// CHECK-NEXT:   __asm__("nop":::"sp");
+// CHECK-NEXT:           ^
+// CHECK-NEXT: ppc-inline-asm-clobber-warning.c:32:11: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 422b3db4f978a..76b016c0ee792 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -390,6 +390,18 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+bool PPCRegisterInfo::isAsmClobberable(const MachineFunction &MF,
+                                       MCRegister PhysReg) const {
+  // We cannot use getReservedRegs() to find the registers that are not asm
+  // clobberable because there are some reserved registers which can be
+  // clobbered by inline asm. For example, when LR is clobbered, the register is
+  // saved and restored. We will hardcode the registers that are not asm
+  // cloberable in this function.
+
+  // The stack pointer (R1/X1) is not clobberable by inline asm
+  return PhysReg != PPC::R1 && PhysReg != PPC::X1;
+}
+
 bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
   const PPCSubtarget &Subtarget = MF.getSubtarget();
   const PPCInstrInfo *InstrInfo =  Subtarget.getInstrInfo();
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index ce2a343cfa357..114f6d0f4c666 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -91,6 +91,8 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
+  bool isAsmClobberable(const MachineFunction &MF,
+                        MCRegister PhysReg) const override;
   bool isCallerPreservedPhysReg(MCRegister PhysReg,
                                 const MachineFunction &MF) const override;
 
diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll b/llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll
new file mode 100644
index 0000000000000..7f13f5072d97f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc-unknown-unkown \
+; RUN:   -mcpu=pwr7 2>&1 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64-unknown-unkown \
+; RUN:   -mcpu=pwr7 2>&1 | FileCheck %s
+
+define void @test_r1_clobber() {
+entry:
+  call void asm sideeffect "nop", "~{r1}"()
+  ret void
+}
+
+; CHECK: warning: inline asm clobber list contains reserved registers: R1
+; CHECK-NEXT: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
+
+define void @test_x1_clobber() {
+entry:
+  call void asm sideeffect "nop", "~{x1}"()
+  ret void
+}
+
+; CHECK: warning: inline asm clobber list contains reserved registers: X1
+; CHECK-NEXT: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.

From d3932c690d97df83f030f527456ddadf098b3d04 Mon Sep 17 00:00:00 2001
From: Igor Kirillov 
Date: Thu, 13 Jan 2022 12:57:50 +0000
Subject: [PATCH 452/946] [LoopVectorize] Add tests with reductions that are
 stored in invariant address

This patch adds tests for functionality that is to be implemented in D110235.

Differential Revision: https://reviews.llvm.org/D117213
---
 .../reductions-across-inner-and-outer-loop.ll |  38 ++
 .../AArch64/scalable-reductions.ll            |  25 ++
 .../LoopVectorize/AArch64/strict-fadd.ll      |  31 ++
 .../reduction-with-invariant-store.ll         | 333 ++++++++++++++++++
 4 files changed, 427 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
index 03bd5b71af947..52e604a00df32 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -151,6 +151,44 @@ for1.loopexit:                                 ; preds = %for1.inc
   ret i64 %sum.inc.lcssa2
 }
 
+; Check that we do not interchange if reduction is stored in an invariant address inside inner loop
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            UnsupportedPHIOuter
+; REMARKS-NEXT: Function:        test4
+
+define i64 @test4([100 x [100 x i64]]* %Arr, i64* %dst) {
+entry:
+  %gep.dst = getelementptr inbounds i64, i64* %dst, i64 42
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %sum.inner, %lv
+  store i64 %sum.inc, i64* %gep.dst, align 4
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}
+
 ; Check that we do not interchange or crash if the PHI in the outer loop gets a
 ; constant from the inner loop.
 ; REMARKS: --- !Missed
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index 8a0a71ff63365..feafd8831b668 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -318,6 +318,31 @@ for.end:
   ret float %.sroa.speculated
 }
 
+; ADD (with reduction stored in invariant address)
+
+; CHECK-REMARK: loop not vectorized: value that could not be identified as reduction is used outside the loop
+define void @invariant_store(i32* %dst, i32* readonly %src) {
+; CHECK-LABEL: @invariant_store
+; CHECK-NOT: vector.body
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 0, i32* %gep.dst, align 4
+  br label %for.body
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  store i32 %add, i32* %gep.dst, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
 ; Reduction cannot be vectorized
 
 ; MUL
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
index e890468f06ddf..3aa88d7fc18c1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -1309,6 +1309,37 @@ for.end:
 
 declare float @llvm.fmuladd.f32(float, float, float)
 
+; Test case with invariant store where fadd is strict.
+define void @reduction_store_to_invariant_address(float* %dst, float* readonly %src) {
+; CHECK-ORDERED-LABEL: @reduction_store_to_invariant_address(
+; CHECK-ORDERED-NOT: vector.body
+
+; CHECK-UNORDERED-LABEL: @reduction_store_to_invariant_address(
+; CHECK-UNORDERED-NOT: vector.body
+
+; CHECK-NOT-VECTORIZED-LABEL: @reduction_store_to_invariant_address(
+; CHECK-NOT-VECTORIZED-NOT: vector.body
+
+entry:
+  %arrayidx = getelementptr inbounds float, float* %dst, i64 42
+  store float 0.000000e+00, float* %arrayidx, align 4
+  br label %for.body
+
+for.body:
+  %0 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx1 = getelementptr inbounds float, float* %src, i64 %indvars.iv
+  %1 = load float, float* %arrayidx1, align 4
+  %add = fadd float %0, %1
+  store float %add, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
 !0 = distinct !{!0, !5, !9, !11}
 !1 = distinct !{!1, !5, !10, !11}
 !2 = distinct !{!2, !6, !9, !11}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
new file mode 100644
index 0000000000000..d002b1b289ea0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -0,0 +1,333 @@
+; RUN: opt < %s -passes="loop-vectorize" -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; This test checks that we can vectorize loop with reduction variable
+; stored in an invariant address.
+;
+; int sum = 0;
+; for(i=0..N) {
+;   sum += src[i];
+;   dst[42] = sum;
+; }
+; CHECK-LABEL: @reduc_store
+; CHECK-NOT: vector.body
+define void @reduc_store(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 0, i32* %gep.dst, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  store i32 %add, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; Same as above but with floating point numbers instead.
+;
+; float sum = 0;
+; for(i=0..N) {
+;   sum += src[i];
+;   dst[42] = sum;
+; }
+; CHECK-LABEL: @reduc_store_fadd_fast
+; CHECK-NOT: vector.body
+define void @reduc_store_fadd_fast(float* %dst, float* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds float, float* %dst, i64 42
+  store float 0.000000e+00, float* %gep.dst, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds float, float* %src, i64 %iv
+  %0 = load float, float* %gep.src, align 4
+  %add = fadd fast float %sum, %0
+  store float %add, float* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; Check that if we have a read from an invariant address, we do not vectorize.
+;
+; int sum = 0;
+; for(i=0..N) {
+;   sum += src[i];
+;   dst.2[i] = dst[42];
+;   dst[42] = sum;
+; }
+; CHECK-LABEL: @reduc_store_load
+; CHECK-NOT: vector.body
+define void @reduc_store_load(i32* %dst, i32* readonly %src, i32* noalias %dst.2) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 0, i32* %gep.dst, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  %lv = load i32, i32* %gep.dst
+  %gep.dst.2  = getelementptr inbounds i32, i32* %dst.2, i64 %iv
+  store i32 %lv, i32* %gep.dst.2, align 4
+  store i32 %add, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; Final value is not guaranteed to be stored in an invariant address.
+; We don't vectorize in that case.
+;
+; int sum = 0;
+; for(i=0..N) {
+;   int diff = y[i] - x[i];
+;   if (diff > 0) {
+;     sum = += diff;
+;     *t = sum;
+;   }
+; }
+; CHECK-LABEL: @reduc_cond_store
+; CHECK-NOT: vector.body
+define void @reduc_cond_store(i32* %t, i32* readonly %x, i32* readonly %y) {
+entry:
+  store i32 0, i32* %t, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %sum.2, %if.end ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %if.end ]
+  %gep.y = getelementptr inbounds i32, i32* %y, i64 %iv
+  %0 = load i32, i32* %gep.y, align 4
+  %gep.x = getelementptr inbounds i32, i32* %x, i64 %iv
+  %1 = load i32, i32* %gep.x, align 4
+  %diff = sub nsw i32 %0, %1
+  %cmp2 = icmp sgt i32 %diff, 0
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:
+  %sum.1 = add nsw i32 %diff, %sum
+  store i32 %sum.1, i32* %t, align 4
+  br label %if.end
+
+if.end:
+  %sum.2 = phi i32 [ %sum.1, %if.then ], [ %0, %for.body ]
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; Check that we can vectorize code with several stores to an invariant address
+; with condition that final reduction value is stored too.
+;
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    dst[42] = sum;
+;    sum += src[i+1];
+;    dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_store_inside_unrolled
+; CHECK-NOT: vector.body
+define void @reduc_store_inside_unrolled(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %1 = or i64 %iv, 1
+  %gep.src.1 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %gep.src.1, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  store i32 %sum.2, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp = icmp slt i64 %iv.next, 1000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; We cannot vectorize if two (or more) invariant stores exist in a loop.
+;
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    dst[42] = sum;
+;    sum += src[i+1];
+;    other_dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_double_invariant_store
+; CHECK-NOT: vector.body:
+define void @reduc_double_invariant_store(i32* %dst, i32* %other_dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  %gep.other_dst = getelementptr inbounds i32, i32* %other_dst, i64 42
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %1 = or i64 %iv, 1
+  %arrayidx4 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %arrayidx4, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  store i32 %sum.2, i32* %gep.other_dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp = icmp slt i64 %iv.next, 1000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    if (src[i+1] > 0)
+;      dst[42] = sum;
+;    sum += src[i+1];
+;    dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_store_middle_store_predicated
+; CHECK-NOT: vector.body
+define void @reduc_store_middle_store_predicated(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:                                         ; preds = %latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.2, %latch ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %predicated, label %latch
+
+predicated:                                       ; preds = %for.body
+  store i32 %sum.1, i32* %gep.dst, align 4
+  br label %latch
+
+latch:                                            ; preds = %predicated, %for.body
+  %1 = or i64 %iv, 1
+  %gep.src.1 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %gep.src.1, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  store i32 %sum.2, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp.1 = icmp slt i64 %iv.next, 1000
+  br i1 %cmp.1, label %for.body, label %exit
+
+exit:                                 ; preds = %latch
+  ret void
+}
+
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    dst[42] = sum;
+;    sum += src[i+1];
+;    if (src[i+1] > 0)
+;      dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_store_final_store_predicated
+; CHECK-NOT: vector.body:
+define void @reduc_store_final_store_predicated(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:                                         ; preds = %latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %latch ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %1 = or i64 %iv, 1
+  %gep.src.1 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %gep.src.1, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  %cmp1 = icmp sgt i32 %2, 0
+  br i1 %cmp1, label %predicated, label %latch
+
+predicated:                                       ; preds = %for.body
+  store i32 %sum.2, i32* %gep.dst, align 4
+  br label %latch
+
+latch:                                            ; preds = %predicated, %for.body
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp = icmp slt i64 %iv.next, 1000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:                                 ; preds = %latch
+  ret void
+}
+
+; Final value used outside of loop does not prevent vectorization
+;
+; int sum = 0;
+; for(int i=0; i < 1000; i++) {
+;   sum += src[i];
+;   dst[42] = sum;
+; }
+; dst[43] = sum;
+; CHECK-LABEL: @reduc_store_inoutside
+; CHECK-NOT: vector.body
+define void @reduc_store_inoutside(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %sum.lcssa = phi i32 [ %sum.1, %for.body ]
+  %gep.dst.1 = getelementptr inbounds i32, i32* %dst, i64 43
+  store i32 %sum.lcssa, i32* %gep.dst.1, align 4
+  ret void
+}

From adb6494660eb234d009fe333e65bf94e8becf955 Mon Sep 17 00:00:00 2001
From: Rahul Joshi 
Date: Sat, 22 Jan 2022 06:00:29 -0800
Subject: [PATCH 453/946] [MLIR] Add generic walk support to OpState

- This allows calling the generic walkers on specific operation instances.

Differential Revision: https://reviews.llvm.org/D117949
---
 mlir/include/mlir/IR/OpDefinition.h | 32 ++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index c68c6b12bf0a4..d1400c8557834 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -157,10 +157,40 @@ class OpState {
   /// See Operation::walk for more details.
   template >
-  RetT walk(FnT &&callback) {
+  typename std::enable_if<
+      llvm::function_traits>::num_args == 1, RetT>::type
+  walk(FnT &&callback) {
     return state->walk(std::forward(callback));
   }
 
+  /// Generic walker with a stage aware callback. Walk the operation by calling
+  /// the callback for each nested operation (including this one) N+1 times,
+  /// where N is the number of regions attached to that operation.
+  ///
+  /// The callback method can take any of the following forms:
+  ///   void(Operation *, const WalkStage &) : Walk all operation opaquely
+  ///     * op.walk([](Operation *nestedOp, const WalkStage &stage) { ...});
+  ///   void(OpT, const WalkStage &) : Walk all operations of the given derived
+  ///                                  type.
+  ///     * op.walk([](ReturnOp returnOp, const WalkStage &stage) { ...});
+  ///   WalkResult(Operation*|OpT, const WalkStage &stage) : Walk operations,
+  ///          but allow for interruption/skipping.
+  ///     * op.walk([](... op, const WalkStage &stage) {
+  ///         // Skip the walk of this op based on some invariant.
+  ///         if (some_invariant)
+  ///           return WalkResult::skip();
+  ///         // Interrupt, i.e cancel, the walk based on some invariant.
+  ///         if (another_invariant)
+  ///           return WalkResult::interrupt();
+  ///         return WalkResult::advance();
+  ///       });
+  template >
+  typename std::enable_if<
+      llvm::function_traits>::num_args == 2, RetT>::type
+  walk(FnT &&callback) {
+    return state->walk(std::forward(callback));
+  }
+
   // These are default implementations of customization hooks.
 public:
   /// This hook returns any canonicalization pattern rewrites that the operation

From 8d298355ca3778a47fd6b3110aeee03ea5e8e02b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Mon, 24 Jan 2022 21:41:57 +0000
Subject: [PATCH 454/946] [X86] combineSetCCMOVMSK - detect
 and(pcmpeq(),pcmpeq()) ptest pattern.

Handle cases where we've split an allof(cmpeq()) pattern to a legal vector type
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 17 +++++-
 .../test/CodeGen/X86/vector-compare-all_of.ll | 18 +++---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 61 +++++++++----------
 3 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f8e97e63fd49..a790777bbdc2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44460,14 +44460,29 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
   // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
   if (IsAllOf && Subtarget.hasSSE41()) {
+    MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
     SDValue BC = peekThroughBitcasts(Vec);
     if (BC.getOpcode() == X86ISD::PCMPEQ) {
-      MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
       SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
                               BC.getOperand(0), BC.getOperand(1));
       V = DAG.getBitcast(TestVT, V);
       return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
     }
+    // Check for 256-bit split vector cases.
+    if (BC.getOpcode() == ISD::AND &&
+        BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
+        BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
+      SDValue LHS = BC.getOperand(0);
+      SDValue RHS = BC.getOperand(1);
+      LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
+                        LHS.getOperand(0), LHS.getOperand(1));
+      RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
+                        RHS.getOperand(0), RHS.getOperand(1));
+      LHS = DAG.getBitcast(TestVT, LHS);
+      RHS = DAG.getBitcast(TestVT, RHS);
+      SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
+      return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+    }
   }
 
   // See if we can avoid a PACKSS by calling MOVMSK on the sources.
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index b399712553606..91fa60ef09d59 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1329,11 +1329,10 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ; SSE-LABEL: bool_reduction_v32i8:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
-; SSE-NEXT:    pand %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    psubb %xmm3, %xmm1
+; SSE-NEXT:    psubb %xmm2, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm0
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
@@ -1341,11 +1340,10 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index be9ebf466eb0d..4f485cabdb1bc 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1268,12 +1268,8 @@ define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
 ; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1594,12 +1590,8 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -2097,25 +2089,33 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
 }
 
 define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
-; SSE-LABEL: icmp_v32i8_v32i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
-; SSE-NEXT:    pand %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v32i8_v32i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v32i8_v32i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm3, %xmm1
+; SSE41-NEXT:    psubb %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -2452,11 +2452,10 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
 ;
 ; AVX2-LABEL: icmp_v64i8_v64i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From 3b64ab574d985b70cb4ec0f69c1fc1c1c4527cde Mon Sep 17 00:00:00 2001
From: Yuanfang Chen 
Date: Mon, 24 Jan 2022 13:42:04 -0800
Subject: [PATCH 455/946] [NFC][clangd] Use table to collect option aliases

* Suppress a lot of `-Wtautological-compare` warning
* Speed up file build a little bit

Reviewed By: kadircet

Differential Revision: https://reviews.llvm.org/D98110
---
 clang-tools-extra/clangd/CompileCommands.cpp | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index 7d6f612cb8b96..df5f84c894e7b 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -463,13 +463,26 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) {
 #define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
                HELP, METAVAR, VALUES)                                          \
-  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr)  \
-    AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS);                       \
   Prefixes[DriverID::OPT_##ID] = PREFIX;
 #include "clang/Driver/Options.inc"
 #undef OPTION
 #undef PREFIX
 
+    struct {
+      DriverID ID;
+      DriverID AliasID;
+      void *AliasArgs;
+    } AliasTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELP, METAVAR, VALUES)                                          \
+  {DriverID::OPT_##ID, DriverID::OPT_##ALIAS, (void *)ALIASARGS},
+#include "clang/Driver/Options.inc"
+#undef OPTION
+    };
+    for (auto &E : AliasTable)
+      if (E.AliasID != DriverID::OPT_INVALID && E.AliasArgs == nullptr)
+        AddAlias(E.ID, E.AliasID);
+
     auto Result = std::make_unique();
     // Iterate over distinct options (represented by the canonical alias).
     // Every spelling of this option will get the same set of rules.

From d87459a0b8e98afce89309459f1cc5ef33065f8e Mon Sep 17 00:00:00 2001
From: Yuanfang Chen 
Date: Mon, 24 Jan 2022 13:42:39 -0800
Subject: [PATCH 456/946] [CMake] Fixes /INCREMENTAL detection when considering
 adding /Brepro

/INCREMENTAL is the linker default (lld-link and MSVC link). Specifying
"/INCREMENTAL:NO" is the only way to disable it. So checking for the
negative flag instead and check exe/module/shared link flags
independently.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D117381
---
 llvm/cmake/modules/HandleLLVMOptions.cmake | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 81f4f6c9de1af..c2feecc21a802 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -277,6 +277,17 @@ function(add_flag_or_print_warning flag name)
   endif()
 endfunction()
 
+function(has_msvc_incremental_no_flag flags incr_no_flag_on)
+  set(${incr_no_flag_on} OFF PARENT_SCOPE)
+  string(FIND "${flags}" "/INCREMENTAL" idx REVERSE)
+  if (${idx} GREATER -1)
+    string(SUBSTRING "${flags}" ${idx} 15 no_flag)
+    if (${no_flag} MATCHES "/INCREMENTAL:NO")
+      set(${incr_no_flag_on} ON PARENT_SCOPE)
+    endif()
+  endif()
+endfunction()
+
 if( LLVM_ENABLE_LLD )
   if ( LLVM_USE_LINKER )
     message(FATAL_ERROR "LLVM_ENABLE_LLD and LLVM_USE_LINKER can't be set at the same time")
@@ -535,11 +546,13 @@ if( MSVC )
     if (SUPPORTS_BREPRO)
       # Check if /INCREMENTAL is passed to the linker and complain that it
       # won't work with /Brepro.
-      string(FIND "${all_linker_flags_uppercase}" "/INCREMENTAL" linker_flag_idx)
-      if (${linker_flag_idx} GREATER -1)
-        message(WARNING "/Brepro not compatible with /INCREMENTAL linking - builds will be non-deterministic")
-      else()
+      has_msvc_incremental_no_flag("${CMAKE_EXE_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_EXE_LINKER_FLAGS}" NO_INCR_EXE)
+      has_msvc_incremental_no_flag("${CMAKE_MODULE_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_MODULE_LINKER_FLAGS}" NO_INCR_MODULE)
+      has_msvc_incremental_no_flag("${CMAKE_SHARED_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_SHARED_LINKER_FLAGS}" NO_INCR_SHARED)
+      if (NO_INCR_EXE AND NO_INCR_MODULE AND NO_INCR_SHARED)
         append("/Brepro" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+      else()
+        message(WARNING "/Brepro not compatible with /INCREMENTAL linking - builds will be non-deterministic")
       endif()
     endif()
   endif()

From da85307ba699ea2260252f523e089c85e863d671 Mon Sep 17 00:00:00 2001
From: Yuanfang Chen 
Date: Mon, 24 Jan 2022 13:42:47 -0800
Subject: [PATCH 457/946] [CMake] Pass CMAKE_C/CXX_COMPILER_LAUNCHER down to
 cross-compile and runtime build

Similar to D59032.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D117746
---
 clang/runtime/CMakeLists.txt                      | 2 ++
 llvm/cmake/modules/CrossCompile.cmake             | 2 ++
 llvm/cmake/modules/LLVMExternalProjectUtils.cmake | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/clang/runtime/CMakeLists.txt b/clang/runtime/CMakeLists.txt
index 61b1c60bf590b..0388008792511 100644
--- a/clang/runtime/CMakeLists.txt
+++ b/clang/runtime/CMakeLists.txt
@@ -78,6 +78,8 @@ if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/)
                -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
+               -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
+               -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
                -DLLVM_CONFIG_PATH=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-config
                -DLLVM_LIT_ARGS=${LLVM_LIT_ARGS}
                -DCOMPILER_RT_OUTPUT_DIR=${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}
diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake
index 2d637f035a7c0..2a39b6a40a285 100644
--- a/llvm/cmake/modules/CrossCompile.cmake
+++ b/llvm/cmake/modules/CrossCompile.cmake
@@ -70,6 +70,8 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype)
   add_custom_command(OUTPUT ${${project_name}_${target_name}_BUILD}/CMakeCache.txt
     COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
         -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
+        -DCMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER}"
+        -DCMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER}"
         ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_CURRENT_SOURCE_DIR}
         ${CROSS_TOOLCHAIN_FLAGS_${project_name}_${target_name}}
         -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE
diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
index 7c417b41cd344..f99a50df22801 100644
--- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
+++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
@@ -314,6 +314,8 @@ function(llvm_ExternalProject_Add name source_dir)
                -DPACKAGE_VERSION=${PACKAGE_VERSION}
                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
+               -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
+               -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
                -DCMAKE_EXPORT_COMPILE_COMMANDS=1
                ${cmake_args}
                ${PASSTHROUGH_VARIABLES}

From 4cfea311cb8e448e7f66feb25f7aef7f68fa59b8 Mon Sep 17 00:00:00 2001
From: Changpeng Fang 
Date: Mon, 24 Jan 2022 14:33:12 -0800
Subject: [PATCH 458/946] [AMDGPU][NFC] Update to AMDGPUUsage for default Code
 Object Version

Summary:
  Update the documentation for default code object version (from v3 to v4).

Reviewers:
  kzhuravl

Differential Revision:
  https://reviews.llvm.org/D117845
---
 llvm/docs/AMDGPUUsage.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 8875d178015bc..f592660f4965e 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1018,12 +1018,12 @@ The AMDGPU backend uses the following ELF header:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V3`` is used to specify the version of AMD HSA
     runtime ABI for code object V3. Specify using the Clang option
-    ``-mcode-object-version=3``. This is the default code object
-    version if not specified.
+    ``-mcode-object-version=3``.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
-    ``-mcode-object-version=4``.
+    ``-mcode-object-version=4``. This is the default code object
+    version if not specified.
 
   * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
     runtime ABI.
@@ -2990,6 +2990,10 @@ non-AMD key names should be prefixed by "*vendor-name*.".
 Code Object V3 Metadata
 +++++++++++++++++++++++
 
+.. warning::
+  Code object V3 is not the default code object version emitted by this version
+  of LLVM.
+
 Code object V3 to V4 metadata is specified by the ``NT_AMDGPU_METADATA`` note
 record (see :ref:`amdgpu-note-records-v3-v4`).
 
@@ -3425,10 +3429,6 @@ same *vendor-name*.
 Code Object V4 Metadata
 +++++++++++++++++++++++
 
-.. warning::
-  Code object V4 is not the default code object version emitted by this version
-  of LLVM.
-
 Code object V4 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
 defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v3`.

From 11bb4a11116c4937b5f7e851189c593abf28e682 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Mon, 24 Jan 2022 22:43:16 +0000
Subject: [PATCH 459/946] [X86] combinePredicateReduction - split vXi16
 allof(cmpeq()) to vXi8 allof(cmpeq())

vXi16 patterns allof(cmp()) reduction patterns will have to be pack the comparison results to vXi8 to use PMOVMSKB.

If we're reducing cmpeq(), then we can compare the vXi8 halves directly - similar to what we already do for vXi64 -> vXi32 for cases without PCMPEQQ.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  15 ++-
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 121 +++++++++---------
 2 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a790777bbdc2d..4360bc68ffaee 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42215,18 +42215,21 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
       EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
       Movmsk = DAG.getBitcast(MovmskVT, Match);
     } else {
-      // For all_of(setcc(vec,0,eq)) - avoid vXi64 comparisons if we don't have
-      // PCMPEQQ (SSE41+), use PCMPEQD instead.
-      if (BinOp == ISD::AND && !Subtarget.hasSSE41() &&
+      // For all_of(setcc(vec,0,eq))
+      // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD.
+      // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()).
+      if (BinOp == ISD::AND &&
           Match.getOpcode() == ISD::SETCC &&
           ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) &&
           cast(Match.getOperand(2))->get() ==
               ISD::CondCode::SETEQ) {
         SDValue Vec = Match.getOperand(0);
-        if (Vec.getValueType().getScalarType() == MVT::i64 &&
-            (2 * NumElts) <= MaxElts) {
+        EVT VecSVT = Vec.getValueType().getScalarType();
+        if ((VecSVT == MVT::i16 && !Subtarget.hasBWI()) ||
+            (VecSVT == MVT::i64 && !Subtarget.hasSSE41())) {
           NumElts *= 2;
-          EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+          VecSVT = VecSVT.getHalfSizedIntegerVT(*DAG.getContext());
+          EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumElts);
           MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
           Match = DAG.getSetCC(
               DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 4f485cabdb1bc..fa48b72490ebf 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -938,23 +938,24 @@ define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
 }
 
 define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp0_v8i16_v8i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
-; SSE-NEXT:    packsswb %xmm1, %xmm1
-; SSE-NEXT:    pmovmskb %xmm1, %eax
-; SSE-NEXT:    cmpb $-1, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp0_v8i16_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp0_v8i16_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    cmpb $-1, %al
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1184,26 +1185,28 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
 }
 
 define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp0_v16i16_v16i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pxor %xmm2, %xmm2
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp0_v16i16_v16i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp0_v16i16_v16i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1477,47 +1480,41 @@ define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
 }
 
 define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp0_v32i16_v32i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pxor %xmm4, %xmm4
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm3
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm2
-; SSE-NEXT:    packsswb %xmm3, %xmm2
-; SSE-NEXT:    pand %xmm0, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp0_v32i16_v32i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm4, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm1
+; SSE2-NEXT:    por %xmm2, %xmm1
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp0_v32i16_v32i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    por %xmm3, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    ptest %xmm1, %xmm1
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From 22b0fe3fd9d404f0aa96b42fe0eeb899e90ad487 Mon Sep 17 00:00:00 2001
From: Nancy Wang 
Date: Mon, 24 Jan 2022 17:44:17 -0500
Subject: [PATCH 460/946] [SystemZ][z/OS]: fix lit tmp_dir to use - instead of
 _

Latest upstream change in https://reviews.llvm.org/D117179 causes lit regressions on z/OS, when TMPDIR is exported and contains _, ld linker fails, it doesnt recognize _ specified in SYSLIN. this seems a limitation on z/OS. we need to fix lit.

Differential Revision: https://reviews.llvm.org/D118071
---
 llvm/utils/lit/lit/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py
index 99777eef8ec02..41f124a27ad7f 100755
--- a/llvm/utils/lit/lit/main.py
+++ b/llvm/utils/lit/lit/main.py
@@ -245,7 +245,8 @@ def execute_in_tmp_dir(run, lit_config):
     tmp_dir = None
     if 'LIT_PRESERVES_TMP' not in os.environ:
         import tempfile
-        tmp_dir = tempfile.mkdtemp(prefix='lit_tmp_')
+        # z/OS linker does not support '_' in paths, so use '-'.
+        tmp_dir = tempfile.mkdtemp(prefix='lit-tmp-')
         tmp_dir_envs = {k: tmp_dir for k in ['TMP', 'TMPDIR', 'TEMP', 'TEMPDIR']}
         os.environ.update(tmp_dir_envs)
         for cfg in {t.config for t in run.tests}:

From c1562683ee9a3be4246aa2546bf41e40b9cb123c Mon Sep 17 00:00:00 2001
From: Arjun P 
Date: Tue, 25 Jan 2022 03:47:02 +0530
Subject: [PATCH 461/946] [MLIR][Presburger] LinearTransform: rename
 multiplication functions to be more intuitive

---
 mlir/include/mlir/Analysis/Presburger/LinearTransform.h   | 5 +++--
 mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp        | 2 +-
 mlir/lib/Analysis/Presburger/LinearTransform.cpp          | 8 ++++----
 .../unittests/Analysis/Presburger/LinearTransformTest.cpp | 3 ++-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/mlir/include/mlir/Analysis/Presburger/LinearTransform.h b/mlir/include/mlir/Analysis/Presburger/LinearTransform.h
index 73bea008836f8..54d12173826d0 100644
--- a/mlir/include/mlir/Analysis/Presburger/LinearTransform.h
+++ b/mlir/include/mlir/Analysis/Presburger/LinearTransform.h
@@ -39,11 +39,12 @@ class LinearTransform {
 
   // The given vector is interpreted as a row vector v. Post-multiply v with
   // this transform, say T, and return vT.
-  SmallVector postMultiplyRow(ArrayRef rowVec) const;
+  SmallVector preMultiplyWithRow(ArrayRef rowVec) const;
 
   // The given vector is interpreted as a column vector v. Pre-multiply v with
   // this transform, say T, and return Tv.
-  SmallVector preMultiplyColumn(ArrayRef colVec) const;
+  SmallVector
+  postMultiplyWithColumn(ArrayRef colVec) const;
 
 private:
   Matrix matrix;
diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
index 0080756dfb760..96519738a1188 100644
--- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
@@ -770,7 +770,7 @@ Optional> IntegerPolyhedron::findIntegerSample() const {
   // 6) Return transform * concat(boundedSample, coneSample).
   SmallVector &sample = boundedSample.getValue();
   sample.append(coneSample.begin(), coneSample.end());
-  return transform.preMultiplyColumn(sample);
+  return transform.postMultiplyWithColumn(sample);
 }
 
 /// Helper to evaluate an affine expression at a point.
diff --git a/mlir/lib/Analysis/Presburger/LinearTransform.cpp b/mlir/lib/Analysis/Presburger/LinearTransform.cpp
index 09d7eb731576e..073b64cf8c337 100644
--- a/mlir/lib/Analysis/Presburger/LinearTransform.cpp
+++ b/mlir/lib/Analysis/Presburger/LinearTransform.cpp
@@ -112,7 +112,7 @@ LinearTransform::makeTransformToColumnEchelon(Matrix m) {
 }
 
 SmallVector
-LinearTransform::postMultiplyRow(ArrayRef rowVec) const {
+LinearTransform::preMultiplyWithRow(ArrayRef rowVec) const {
   assert(rowVec.size() == matrix.getNumRows() &&
          "row vector dimension should match transform output dimension");
 
@@ -124,7 +124,7 @@ LinearTransform::postMultiplyRow(ArrayRef rowVec) const {
 }
 
 SmallVector
-LinearTransform::preMultiplyColumn(ArrayRef colVec) const {
+LinearTransform::postMultiplyWithColumn(ArrayRef colVec) const {
   assert(matrix.getNumColumns() == colVec.size() &&
          "column vector dimension should match transform input dimension");
 
@@ -144,7 +144,7 @@ LinearTransform::applyTo(const IntegerPolyhedron &poly) const {
 
     int64_t c = eq.back();
 
-    SmallVector newEq = postMultiplyRow(eq.drop_back());
+    SmallVector newEq = preMultiplyWithRow(eq.drop_back());
     newEq.push_back(c);
     result.addEquality(newEq);
   }
@@ -154,7 +154,7 @@ LinearTransform::applyTo(const IntegerPolyhedron &poly) const {
 
     int64_t c = ineq.back();
 
-    SmallVector newIneq = postMultiplyRow(ineq.drop_back());
+    SmallVector newIneq = preMultiplyWithRow(ineq.drop_back());
     newIneq.push_back(c);
     result.addInequality(newIneq);
   }
diff --git a/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp b/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp
index 8a6650f609f79..01b6b7fc960ce 100644
--- a/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp
+++ b/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp
@@ -22,7 +22,8 @@ void testColumnEchelonForm(const Matrix &m, unsigned expectedRank) {
   // In column echelon form, each row's last non-zero value can be at most one
   // column to the right of the last non-zero column among the previous rows.
   for (unsigned row = 0, nRows = m.getNumRows(); row < nRows; ++row) {
-    SmallVector rowVec = transform.postMultiplyRow(m.getRow(row));
+    SmallVector rowVec =
+        transform.preMultiplyWithRow(m.getRow(row));
     for (unsigned col = lastAllowedNonZeroCol + 1, nCols = m.getNumColumns();
          col < nCols; ++col) {
       EXPECT_EQ(rowVec[col], 0);

From 0e98fadc79534dde843275fcb142cc6d3a0eee01 Mon Sep 17 00:00:00 2001
From: Arjun P 
Date: Tue, 25 Jan 2022 04:08:14 +0530
Subject: [PATCH 462/946] [MLIR][Presburger] use braces for single-line loop
 when inner if uses braces [NFC]

---
 mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
index cf68c92650548..683c46ea5c0e8 100644
--- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
+++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
@@ -635,10 +635,11 @@ static void checkDivisionRepresentation(
   // Check that the `dividends` and `expectedDividends` match. If the
   // denominator for a division is zero, we ignore its dividend.
   EXPECT_TRUE(dividends.size() == expectedDividends.size());
-  for (unsigned i = 0, e = dividends.size(); i < e; ++i)
+  for (unsigned i = 0, e = dividends.size(); i < e; ++i) {
     if (denominators[i] != 0) {
       EXPECT_TRUE(expectedDividends[i] == dividends[i]);
     }
+  }
 }
 
 TEST(IntegerPolyhedronTest, computeLocalReprSimple) {

From fe0c5309c4a8bf022b2ec66a0d28513e7fb1441b Mon Sep 17 00:00:00 2001
From: Petr Hosek 
Date: Mon, 24 Jan 2022 13:31:58 -0800
Subject: [PATCH 463/946] [Fuchsia] Remove i386 from iossim architectures

This is no longer supported in newer SDK versions.

Differential Revision: https://reviews.llvm.org/D118075
---
 clang/cmake/caches/Fuchsia-stage2.cmake | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index de8ac89fbfaa0..aed39d0ca9fff 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -71,9 +71,8 @@ if(APPLE)
   set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
   set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
   set(DARWIN_ios_ARCHS armv7;armv7s;arm64 CACHE STRING "")
-  set(DARWIN_iossim_ARCHS i386;x86_64 CACHE STRING "")
+  set(DARWIN_iossim_ARCHS arm64;x86_64 CACHE STRING "")
   set(DARWIN_osx_ARCHS arm64;x86_64 CACHE STRING "")
-  set(SANITIZER_MIN_OSX_VERSION 10.7 CACHE STRING "")
 endif()
 
 if(WIN32)

From f1c9e7bdc921cec0cc3f61c19c4ac4a7f1bd8525 Mon Sep 17 00:00:00 2001
From: Chaoshuai Lu 
Date: Mon, 24 Jan 2022 14:51:37 -0800
Subject: [PATCH 464/946] [ObjC Availability] Add missing const to getVersion
 function of ObjCAvailabilityCheckExpr class

Add missing const to `getVersion` function of `ObjCAvailabilityCheckExpr` class.

This feels like a bug on the original change D22171. We cannot really call this function from a const object pointer because the function is not marked as const.

This diff adds the missing const specifier to fix the issue.

Reviewed By: manmanren

Differential Revision: https://reviews.llvm.org/D112119
---
 clang/include/clang/AST/ExprObjC.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h
index b0f057dbaa02f..3b7ad8662ad95 100644
--- a/clang/include/clang/AST/ExprObjC.h
+++ b/clang/include/clang/AST/ExprObjC.h
@@ -1706,7 +1706,7 @@ class ObjCAvailabilityCheckExpr : public Expr {
 
   /// This may be '*', in which case this should fold to true.
   bool hasVersion() const { return !VersionToCheck.empty(); }
-  VersionTuple getVersion() { return VersionToCheck; }
+  VersionTuple getVersion() const { return VersionToCheck; }
 
   child_range children() {
     return child_range(child_iterator(), child_iterator());

From 572fa9642cb50f3c2d79e138e789c4b23f3ab8cf Mon Sep 17 00:00:00 2001
From: Mogball 
Date: Mon, 24 Jan 2022 23:00:39 +0000
Subject: [PATCH 465/946] [mlir] Add a ControlFlowSink pass.

Control-Flow Sink moves operations whose only uses are in conditionally-executed regions into those regions so that paths in which their results are not needed do not perform unnecessary computation.

Depends on D115087

Reviewed By: jpienaar, rriddle, bondhugula

Differential Revision: https://reviews.llvm.org/D115088
---
 .../mlir/Interfaces/ControlFlowInterfaces.h   |  29 +++
 .../mlir/Interfaces/ControlFlowInterfaces.td  |  30 ++-
 mlir/include/mlir/Transforms/Passes.h         |   3 +
 mlir/include/mlir/Transforms/Passes.td        |  22 ++
 mlir/include/mlir/Transforms/Utils.h          |  48 ++++
 mlir/lib/Transforms/CMakeLists.txt            |   1 +
 mlir/lib/Transforms/ControlFlowSink.cpp       |  71 ++++++
 mlir/lib/Transforms/Utils/CMakeLists.txt      |   1 +
 .../Transforms/Utils/ControlFlowSinkUtils.cpp | 152 +++++++++++++
 mlir/test/Transforms/control-flow-sink.mlir   | 210 ++++++++++++++++++
 mlir/test/lib/Dialect/Test/TestDialect.cpp    |  34 ++-
 mlir/test/lib/Dialect/Test/TestOps.td         |  13 +-
 12 files changed, 604 insertions(+), 10 deletions(-)
 create mode 100644 mlir/lib/Transforms/ControlFlowSink.cpp
 create mode 100644 mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
 create mode 100644 mlir/test/Transforms/control-flow-sink.mlir

diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
index 8ff76edb3068c..806821a988bf3 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
@@ -83,6 +83,35 @@ class RegionSuccessor {
   ValueRange inputs;
 };
 
+/// This class represents upper and lower bounds on the number of times a region
+/// of a `RegionBranchOpInterface` can be invoked. The lower bound is at least
+/// zero, but the upper bound may not be known.
+class InvocationBounds {
+public:
+  /// Create invocation bounds. The lower bound must be at least 0 and only the
+  /// upper bound can be unknown.
+  InvocationBounds(unsigned lb, Optional ub) : lower(lb), upper(ub) {
+    assert(!ub || ub >= lb && "upper bound cannot be less than lower bound");
+  }
+
+  /// Return the lower bound.
+  unsigned getLowerBound() const { return lower; }
+
+  /// Return the upper bound.
+  Optional getUpperBound() const { return upper; }
+
+  /// Returns the unknown invocation bounds, i.e., there is no information on
+  /// how many times a region may be invoked.
+  static InvocationBounds getUnknown() { return {0, llvm::None}; }
+
+private:
+  /// The minimum number of times the successor region will be invoked.
+  unsigned lower;
+  /// The maximum number of times the successor region will be invoked or `None`
+  /// if an upper bound is not known.
+  Optional upper;
+};
+
 /// Return `true` if `a` and `b` are in mutually exclusive regions as per
 /// RegionBranchOpInterface.
 bool insideMutuallyExclusiveRegions(Operation *a, Operation *b);
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
index 0633426cf50af..429a5356428f7 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
@@ -102,9 +102,10 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
   let methods = [
     InterfaceMethod<[{
         Returns the operands of this operation used as the entry arguments when
-        entering the region at `index`, which was specified as a successor of this
-        operation by `getSuccessorRegions`. These operands should correspond 1-1
-        with the successor inputs specified in `getSuccessorRegions`.
+        entering the region at `index`, which was specified as a successor of
+        this operation by `getSuccessorRegions`. These operands should
+        correspond 1-1 with the successor inputs specified in
+        `getSuccessorRegions`.
       }],
       "::mlir::OperandRange", "getSuccessorEntryOperands",
       (ins "unsigned":$index), [{}], /*defaultImplementation=*/[{
@@ -127,9 +128,28 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
         successor region must be non-empty.
       }],
       "void", "getSuccessorRegions",
-      (ins "::mlir::Optional":$index, "::mlir::ArrayRef<::mlir::Attribute>":$operands,
+      (ins "::mlir::Optional":$index,
+           "::mlir::ArrayRef<::mlir::Attribute>":$operands,
            "::mlir::SmallVectorImpl<::mlir::RegionSuccessor> &":$regions)
-    >
+    >,
+    InterfaceMethod<[{
+        Populates `invocationBounds` with the minimum and maximum number of
+        times this operation will invoke the attached regions (assuming the
+        regions yield normally, i.e. do not abort or invoke an infinite loop).
+        The minimum number of invocations is at least 0. If the maximum number
+        of invocations cannot be statically determined, then it will not have a
+        value (i.e., it is set to `llvm::None`).
+
+        `operands` is a set of optional attributes that either correspond to a
+        constant values for each operand of this operation, or null if that
+        operand is not a constant.
+      }],
+      "void", "getRegionInvocationBounds",
+      (ins "::mlir::ArrayRef<::mlir::Attribute>":$operands,
+           "::llvm::SmallVectorImpl<::mlir::InvocationBounds> &"
+             :$invocationBounds), [{}],
+       [{ invocationBounds.append($_op->getNumRegions(), {0, ::llvm::None}); }]
+    >,
   ];
 
   let verify = [{
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 6aab120fb469f..46bab0047c1b6 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -74,6 +74,9 @@ createCanonicalizerPass(const GreedyRewriteConfig &config,
                         ArrayRef disabledPatterns = llvm::None,
                         ArrayRef enabledPatterns = llvm::None);
 
+/// Creates a pass to perform control-flow sinking.
+std::unique_ptr createControlFlowSinkPass();
+
 /// Creates a pass to perform common sub expression elimination.
 std::unique_ptr createCSEPass();
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index ff15df372aa26..56e90644363e5 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -307,6 +307,28 @@ def Canonicalizer : Pass<"canonicalize"> {
   ] # RewritePassUtils.options;
 }
 
+def ControlFlowSink : Pass<"control-flow-sink"> {
+  let summary = "Sink operations into conditional blocks";
+  let description = [{
+    This pass implements a simple control-flow sink on operations that implement
+    `RegionBranchOpInterface` by moving dominating operations whose only uses
+    are in a single conditionally-executed region into that region so that
+    executions paths where their results are not needed do not perform
+    unnecessary computations.
+
+    This is similar (but opposite) to loop-invariant code motion, which hoists
+    operations out of regions executed more than once.
+
+    It is recommended to run canonicalization first to remove unreachable
+    blocks: ops in unreachable blocks may prevent other operations from being
+    sunk as they may contain uses of their results
+  }];
+  let constructor = "::mlir::createControlFlowSinkPass()";
+  let statistics = [
+    Statistic<"numSunk", "num-sunk", "Number of operations sunk">,
+  ];
+}
+
 def CSE : Pass<"cse"> {
   let summary = "Eliminate common sub-expressions";
   let description = [{
diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h
index 5280c2648bfae..5efbb19b08d25 100644
--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@@ -25,6 +25,7 @@ namespace mlir {
 
 class AffineApplyOp;
 class AffineForOp;
+class DominanceInfo;
 class Location;
 class OpBuilder;
 
@@ -147,6 +148,53 @@ Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
 void createAffineComputationSlice(Operation *opInst,
                                   SmallVectorImpl *sliceOps);
 
+/// Given a list of regions, perform control flow sinking on them. For each
+/// region, control-flow sinking moves operations that dominate the region but
+/// whose only users are in the region into the regions so that they aren't
+/// executed on paths where their results are not needed.
+///
+/// TODO: For the moment, this is a *simple* control-flow sink, i.e., no
+/// duplicating of ops. It should be made to accept a cost model to determine
+/// whether duplicating a particular op is profitable.
+///
+/// Example:
+///
+/// ```mlir
+/// %0 = arith.addi %arg0, %arg1
+/// scf.if %cond {
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// After control-flow sink:
+///
+/// ```mlir
+/// scf.if %cond {
+///   %0 = arith.addi %arg0, %arg1
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// Users must supply a callback `shouldMoveIntoRegion` that determines whether
+/// the given operation that only has users in the given operation should be
+/// moved into that region.
+///
+/// Returns the number of operations sunk.
+size_t
+controlFlowSink(ArrayRef regions, DominanceInfo &domInfo,
+                function_ref shouldMoveIntoRegion);
+
+/// Populates `regions` with regions of the provided region branch op that are
+/// executed at most once at that are reachable given the current operands of
+/// the op. These regions can be passed to `controlFlowSink` to perform sinking
+/// on the regions of the operation.
+void getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
+                                    SmallVectorImpl ®ions);
+
 } // namespace mlir
 
 #endif // MLIR_TRANSFORMS_UTILS_H
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index ae1468fa17ea3..3e10b4a321311 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -5,6 +5,7 @@ add_mlir_library(MLIRTransforms
   BufferResultsToOutParams.cpp
   BufferUtils.cpp
   Canonicalizer.cpp
+  ControlFlowSink.cpp
   CSE.cpp
   Inliner.cpp
   LocationSnapshot.cpp
diff --git a/mlir/lib/Transforms/ControlFlowSink.cpp b/mlir/lib/Transforms/ControlFlowSink.cpp
new file mode 100644
index 0000000000000..71afc5702edf0
--- /dev/null
+++ b/mlir/lib/Transforms/ControlFlowSink.cpp
@@ -0,0 +1,71 @@
+//===- ControlFlowSink.cpp - Code to perform control-flow sinking ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a basic control-flow sink pass. Control-flow sinking
+// moves operations whose only uses are in conditionally-executed blocks in to
+// those blocks so that they aren't executed on paths where their results are
+// not needed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/IR/Dominance.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/Passes.h"
+#include "mlir/Transforms/Utils.h"
+
+using namespace mlir;
+
+namespace {
+/// A basic control-flow sink pass. This pass analyzes the regions of operations
+/// that implement `RegionBranchOpInterface` that are reachable and executed at
+/// most once and sinks candidate operations that are side-effect free.
+struct ControlFlowSink : public ControlFlowSinkBase {
+  void runOnOperation() override;
+};
+} // end anonymous namespace
+
+/// Returns true if the given operation is side-effect free as are all of its
+/// nested operations.
+static bool isSideEffectFree(Operation *op) {
+  if (auto memInterface = dyn_cast(op)) {
+    // If the op has side-effects, it cannot be moved.
+    if (!memInterface.hasNoEffect())
+      return false;
+    // If the op does not have recursive side effects, then it can be moved.
+    if (!op->hasTrait())
+      return true;
+  } else if (!op->hasTrait()) {
+    // Otherwise, if the op does not implement the memory effect interface and
+    // it does not have recursive side effects, then it cannot be known that the
+    // op is moveable.
+    return false;
+  }
+
+  // Recurse into the regions and ensure that all nested ops can also be moved.
+  for (Region ®ion : op->getRegions())
+    for (Operation &op : region.getOps())
+      if (!isSideEffectFree(&op))
+        return false;
+  return true;
+}
+
+void ControlFlowSink::runOnOperation() {
+  auto &domInfo = getAnalysis();
+  getOperation()->walk([&](RegionBranchOpInterface branch) {
+    SmallVector regionsToSink;
+    getSinglyExecutedRegionsToSink(branch, regionsToSink);
+    numSunk = mlir::controlFlowSink(
+        regionsToSink, domInfo,
+        [](Operation *op, Region *) { return isSideEffectFree(op); });
+  });
+}
+
+std::unique_ptr mlir::createControlFlowSinkPass() {
+  return std::make_unique();
+}
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
index 33deb17fe1378..c42d45325e1b2 100644
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_library(MLIRTransformUtils
+  ControlFlowSinkUtils.cpp
   DialectConversion.cpp
   FoldUtils.cpp
   GreedyPatternRewriteDriver.cpp
diff --git a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
new file mode 100644
index 0000000000000..cffbd922f88c8
--- /dev/null
+++ b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
@@ -0,0 +1,152 @@
+//===- ControlFlowSinkUtils.cpp - Code to perform control-flow sinking ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilityies for control-flow sinking. Control-flow
+// sinking moves operations whose only uses are in conditionally-executed blocks
+// into those blocks so that they aren't executed on paths where their results
+// are not needed.
+//
+// Control-flow sinking is not implemented on BranchOpInterface because
+// sinking ops into the successors of branch operations may move ops into loops.
+// It is idiomatic MLIR to perform optimizations at IR levels that readily
+// provide the necessary information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/Utils.h"
+#include 
+
+#define DEBUG_TYPE "cf-sink"
+
+using namespace mlir;
+
+namespace {
+/// A helper struct for control-flow sinking.
+class Sinker {
+public:
+  /// Create an operation sinker with given dominance info.
+  Sinker(function_ref shouldMoveIntoRegion,
+         DominanceInfo &domInfo)
+      : shouldMoveIntoRegion(shouldMoveIntoRegion), domInfo(domInfo),
+        numSunk(0) {}
+
+  /// Given a list of regions, find operations to sink and sink them. Return the
+  /// number of operations sunk.
+  size_t sinkRegions(ArrayRef regions) &&;
+
+private:
+  /// Given a region and an op which dominates the region, returns true if all
+  /// users of the given op are dominated by the entry block of the region, and
+  /// thus the operation can be sunk into the region.
+  bool allUsersDominatedBy(Operation *op, Region *region);
+
+  /// Given a region and a top-level op (an op whose parent region is the given
+  /// region), determine whether the defining ops of the op's operands can be
+  /// sunk into the region.
+  ///
+  /// Add moved ops to the work queue.
+  void tryToSinkPredecessors(Operation *user, Region *region,
+                             std::vector &stack);
+
+  /// Iterate over all the ops in a region and try to sink their predecessors.
+  /// Recurse on subgraphs using a work queue.
+  void sinkRegion(Region *region);
+
+  /// The callback to determine whether an op should be moved in to a region.
+  function_ref shouldMoveIntoRegion;
+  /// Dominance info to determine op user dominance with respect to regions.
+  DominanceInfo &domInfo;
+  /// The number of operations sunk.
+  size_t numSunk;
+};
+} // end anonymous namespace
+
+bool Sinker::allUsersDominatedBy(Operation *op, Region *region) {
+  assert(region->findAncestorOpInRegion(*op) == nullptr &&
+         "expected op to be defined outside the region");
+  return llvm::all_of(op->getUsers(), [&](Operation *user) {
+    // The user is dominated by the region if its containing block is dominated
+    // by the region's entry block.
+    return domInfo.dominates(®ion->front(), user->getBlock());
+  });
+}
+
+void Sinker::tryToSinkPredecessors(Operation *user, Region *region,
+                                   std::vector &stack) {
+  LLVM_DEBUG(user->print(llvm::dbgs() << "\nContained op:\n"));
+  for (Value value : user->getOperands()) {
+    Operation *op = value.getDefiningOp();
+    // Ignore block arguments and ops that are already inside the region.
+    if (!op || op->getParentRegion() == region)
+      continue;
+    LLVM_DEBUG(op->print(llvm::dbgs() << "\nTry to sink:\n"));
+
+    // If the op's users are all in the region and it can be moved, then do so.
+    if (allUsersDominatedBy(op, region) && shouldMoveIntoRegion(op, region)) {
+      // Move the op into the region's entry block. If the op is part of a
+      // subgraph, dependee ops would have been moved first, so inserting before
+      // the start of the block will ensure dominance is preserved. Ops can only
+      // be safely moved into the entry block as the region's other blocks may
+      // for a loop.
+      op->moveBefore(®ion->front(), region->front().begin());
+      ++numSunk;
+      // Add the op to the work queue.
+      stack.push_back(op);
+    }
+  }
+}
+
+void Sinker::sinkRegion(Region *region) {
+  // Initialize the work queue with all the ops in the region.
+  std::vector stack;
+  for (Operation &op : region->getOps())
+    stack.push_back(&op);
+
+  // Process all the ops depth-first. This ensures that nodes of subgraphs are
+  // sunk in the correct order.
+  while (!stack.empty()) {
+    Operation *op = stack.back();
+    stack.pop_back();
+    tryToSinkPredecessors(op, region, stack);
+  }
+}
+
+size_t Sinker::sinkRegions(ArrayRef regions) && {
+  for (Region *region : regions)
+    if (!region->empty())
+      sinkRegion(region);
+  return numSunk;
+}
+
+size_t mlir::controlFlowSink(
+    ArrayRef regions, DominanceInfo &domInfo,
+    function_ref shouldMoveIntoRegion) {
+  return Sinker(shouldMoveIntoRegion, domInfo).sinkRegions(regions);
+}
+
+void mlir::getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
+                                          SmallVectorImpl ®ions) {
+  // Collect constant operands.
+  SmallVector operands(branch->getNumOperands(), Attribute());
+  for (auto &it : llvm::enumerate(branch->getOperands()))
+    matchPattern(it.value(), m_Constant(&operands[it.index()]));
+  // Get the invocation bounds.
+  SmallVector bounds;
+  branch.getRegionInvocationBounds(operands, bounds);
+
+  // For a simple control-flow sink, only consider regions that are executed at
+  // most once.
+  for (auto it : llvm::zip(branch->getRegions(), bounds)) {
+    const InvocationBounds &bound = std::get<1>(it);
+    if (bound.getUpperBound() && *bound.getUpperBound() <= 1)
+      regions.push_back(&std::get<0>(it));
+  }
+}
diff --git a/mlir/test/Transforms/control-flow-sink.mlir b/mlir/test/Transforms/control-flow-sink.mlir
new file mode 100644
index 0000000000000..58dffe19c0aae
--- /dev/null
+++ b/mlir/test/Transforms/control-flow-sink.mlir
@@ -0,0 +1,210 @@
+// RUN: mlir-opt -split-input-file -control-flow-sink %s | FileCheck %s
+
+// Test that operations can be sunk.
+
+// CHECK-LABEL: @test_simple_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = arith.subi %[[ARG2]], %[[ARG1]]
+// CHECK-NEXT: %[[V1:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V2:.*]] = arith.subi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:   test.region_if_yield %[[V2]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   %[[V2:.*]] = arith.addi %[[ARG1]], %[[ARG1]]
+// CHECK-NEXT:   %[[V3:.*]] = arith.addi %[[V0]], %[[V2]]
+// CHECK-NEXT:   test.region_if_yield %[[V3]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   %[[V2:.*]] = arith.addi %[[ARG2]], %[[ARG2]]
+// CHECK-NEXT:   %[[V3:.*]] = arith.addi %[[V2]], %[[V0]]
+// CHECK-NEXT:   test.region_if_yield %[[V3]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V1]]
+func @test_simple_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.subi %arg1, %arg2 : i32
+  %1 = arith.subi %arg2, %arg1 : i32
+  %2 = arith.addi %arg1, %arg1 : i32
+  %3 = arith.addi %arg2, %arg2 : i32
+  %4 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %0 : i32
+  } else {
+    %5 = arith.addi %1, %2 : i32
+    test.region_if_yield %5 : i32
+  } join {
+    %5 = arith.addi %3, %1 : i32
+    test.region_if_yield %5 : i32
+  }
+  return %4 : i32
+}
+
+// -----
+
+// Test that a region op can be sunk.
+
+// CHECK-LABEL: @test_region_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V1:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:     test.region_if_yield %[[ARG1]]
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     %[[V2:.*]] = arith.subi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:     test.region_if_yield %[[V2]]
+// CHECK-NEXT:   } join {
+// CHECK-NEXT:     test.region_if_yield %[[ARG2]]
+// CHECK-NEXT:   }
+// CHECK-NEXT:   test.region_if_yield %[[V1]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   test.region_if_yield %[[ARG2]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V0]]
+func @test_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.subi %arg1, %arg2 : i32
+  %1 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %arg1 : i32
+  } else {
+    test.region_if_yield %0 : i32
+  } join {
+    test.region_if_yield %arg2 : i32
+  }
+  %2 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %1 : i32
+  } else {
+    test.region_if_yield %arg1 : i32
+  } join {
+    test.region_if_yield %arg2 : i32
+  }
+  return %2 : i32
+}
+
+// -----
+
+// Test that an entire subgraph can be sunk.
+
+// CHECK-LABEL: @test_subgraph_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V1:.*]] = arith.subi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:   %[[V2:.*]] = arith.addi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:   %[[V3:.*]] = arith.subi %[[ARG2]], %[[ARG1]]
+// CHECK-NEXT:   %[[V4:.*]] = arith.muli %[[V3]], %[[V3]]
+// CHECK-NEXT:   %[[V5:.*]] = arith.muli %[[V2]], %[[V1]]
+// CHECK-NEXT:   %[[V6:.*]] = arith.addi %[[V5]], %[[V4]]
+// CHECK-NEXT:   test.region_if_yield %[[V6]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   test.region_if_yield %[[ARG2]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V0]]
+func @test_subgraph_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.addi %arg1, %arg2 : i32
+  %1 = arith.subi %arg1, %arg2 : i32
+  %2 = arith.subi %arg2, %arg1 : i32
+  %3 = arith.muli %0, %1 : i32
+  %4 = arith.muli %2, %2 : i32
+  %5 = arith.addi %3, %4 : i32
+  %6 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %5 : i32
+  } else {
+    test.region_if_yield %arg1 : i32
+  } join {
+    test.region_if_yield %arg2 : i32
+  }
+  return %6 : i32
+}
+
+// -----
+
+// Test that ops can be sunk into regions with multiple blocks.
+
+// CHECK-LABEL: @test_multiblock_region_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = arith.addi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT: %[[V1:.*]] = "test.any_cond"() ({
+// CHECK-NEXT:   %[[V3:.*]] = arith.addi %[[V0]], %[[ARG2]]
+// CHECK-NEXT:   %[[V4:.*]] = arith.addi %[[V3]], %[[ARG1]]
+// CHECK-NEXT:   br ^bb1(%[[V4]] : i32)
+// CHECK-NEXT: ^bb1(%[[V5:.*]]: i32):
+// CHECK-NEXT:   %[[V6:.*]] = arith.addi %[[V5]], %[[V4]]
+// CHECK-NEXT:   "test.yield"(%[[V6]])
+// CHECK-NEXT: })
+// CHECK-NEXT: %[[V2:.*]] = arith.addi %[[V0]], %[[V1]]
+// CHECK-NEXT: return %[[V2]]
+func @test_multiblock_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.addi %arg1, %arg2 : i32
+  %1 = arith.addi %0, %arg2 : i32
+  %2 = arith.addi %1, %arg1 : i32
+  %3 = "test.any_cond"() ({
+    br ^bb1(%2 : i32)
+  ^bb1(%5: i32):
+    %6 = arith.addi %5, %2 : i32
+    "test.yield"(%6) : (i32) -> ()
+  }) : () -> i32
+  %4 = arith.addi %0, %3 : i32
+  return %4 : i32
+}
+
+// -----
+
+// Test that ops can be sunk recursively into nested regions.
+
+// CHECK-LABEL: @test_nested_region_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32) -> i32 {
+// CHECK-NEXT: %[[V0:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V1:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:     %[[V2:.*]] = arith.addi %[[ARG1]], %[[ARG1]]
+// CHECK-NEXT:     test.region_if_yield %[[V2]]
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     test.region_if_yield %[[ARG1]]
+// CHECK-NEXT:   } join {
+// CHECK-NEXT:     test.region_if_yield %[[ARG1]]
+// CHECK-NEXT:   }
+// CHECK-NEXT:   test.region_if_yield %[[V1]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V0]]
+func @test_nested_region_sink(%arg0: i1, %arg1: i32) -> i32 {
+  %0 = arith.addi %arg1, %arg1 : i32
+  %1 = test.region_if %arg0: i1 -> i32 then {
+    %2 = test.region_if %arg0: i1 -> i32 then {
+      test.region_if_yield %0 : i32
+    } else {
+      test.region_if_yield %arg1 : i32
+    } join {
+      test.region_if_yield %arg1 : i32
+    }
+    test.region_if_yield %2 : i32
+  } else {
+    test.region_if_yield %arg1 : i32
+  } join {
+    test.region_if_yield %arg1 : i32
+  }
+  return %1 : i32
+}
+
+// -----
+
+// Test that ops are only moved into the entry block, even when their only uses
+// are further along.
+
+// CHECK-LABEL: @test_not_sunk_deeply
+// CHECK-SAME:  (%[[ARG0:.*]]: i32) -> i32 {
+// CHECK-NEXT: %[[V0:.*]] = "test.any_cond"() ({
+// CHECK-NEXT:   %[[V1:.*]] = arith.addi %[[ARG0]], %[[ARG0]]
+// CHECK-NEXT:   br ^bb1
+// CHECK-NEXT: ^bb1:
+// CHECK-NEXT:   "test.yield"(%[[V1]]) : (i32) -> ()
+// CHECK-NEXT: })
+// CHECK-NEXT: return %[[V0]]
+func @test_not_sunk_deeply(%arg0: i32) -> i32 {
+  %0 = arith.addi %arg0, %arg0 : i32
+  %1 = "test.any_cond"() ({
+    br ^bb1
+  ^bb1:
+    "test.yield"(%0) : (i32) -> ()
+  }) : () -> i32
+  return %1 : i32
+}
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp
index 2b915a9fd6c2a..d82f7af73decb 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -1127,15 +1127,15 @@ static void print(OpAsmPrinter &p, RegionIfOp op) {
   p.printOperands(op.getOperands());
   p << ": " << op.getOperandTypes();
   p.printArrowTypeList(op.getResultTypes());
-  p << " then";
+  p << " then ";
   p.printRegion(op.getThenRegion(),
                 /*printEntryBlockArgs=*/true,
                 /*printBlockTerminators=*/true);
-  p << " else";
+  p << " else ";
   p.printRegion(op.getElseRegion(),
                 /*printEntryBlockArgs=*/true,
                 /*printBlockTerminators=*/true);
-  p << " join";
+  p << " join ";
   p.printRegion(op.getJoinRegion(),
                 /*printEntryBlockArgs=*/true,
                 /*printBlockTerminators=*/true);
@@ -1189,6 +1189,34 @@ void RegionIfOp::getSuccessorRegions(
   regions.push_back(RegionSuccessor(&getElseRegion(), getElseArgs()));
 }
 
+void RegionIfOp::getRegionInvocationBounds(
+    ArrayRef operands,
+    SmallVectorImpl &invocationBounds) {
+  // Each region is invoked at most once.
+  invocationBounds.assign(/*NumElts=*/3, /*Elt=*/{0, 1});
+}
+
+//===----------------------------------------------------------------------===//
+// AnyCondOp
+//===----------------------------------------------------------------------===//
+
+void AnyCondOp::getSuccessorRegions(Optional index,
+                                    ArrayRef operands,
+                                    SmallVectorImpl ®ions) {
+  // The parent op branches into the only region, and the region branches back
+  // to the parent op.
+  if (index)
+    regions.emplace_back(&getRegion());
+  else
+    regions.emplace_back(getResults());
+}
+
+void AnyCondOp::getRegionInvocationBounds(
+    ArrayRef operands,
+    SmallVectorImpl &invocationBounds) {
+  invocationBounds.emplace_back(1, 1);
+}
+
 //===----------------------------------------------------------------------===//
 // SingleNoTerminatorCustomAsmOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index cc9c950ea73fe..f171e19270069 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -2342,14 +2342,15 @@ def RegionIfYieldOp : TEST_Op<"region_if_yield",
 }
 
 def RegionIfOp : TEST_Op<"region_if",
-      [DeclareOpInterfaceMethods,
+      [DeclareOpInterfaceMethods,
        SingleBlockImplicitTerminator<"RegionIfYieldOp">,
        RecursiveSideEffects]> {
   let description =[{
     Represents an abstract if-then-else-join pattern. In this context, the then
     and else regions jump to the join region, which finally returns to its
     parent op.
-    }];
+  }];
 
   let printer = [{ return ::print(p, *this); }];
   let parser = [{ return ::parseRegionIfOp(parser, result); }];
@@ -2372,6 +2373,14 @@ def RegionIfOp : TEST_Op<"region_if",
   }];
 }
 
+def AnyCondOp : TEST_Op<"any_cond",
+      [DeclareOpInterfaceMethods,
+       RecursiveSideEffects]> {
+  let results = (outs Variadic:$results);
+  let regions = (region AnyRegion:$region);
+}
+
 //===----------------------------------------------------------------------===//
 // Test TableGen generated build() methods
 //===----------------------------------------------------------------------===//

From 9407a701790fc8c55795f992ded9a40bd37ca7d1 Mon Sep 17 00:00:00 2001
From: David Blaikie 
Date: Mon, 24 Jan 2022 15:02:49 -0800
Subject: [PATCH 466/946] DWARFv5 default: Switch bolt tests to use DWARFv4
 since Bolt doesn't support v5 yet

Rough attempt to fix these, since I don't have bolt building locally.
Will see how the buildbots go with it...
---
 bolt/test/X86/asm-func-debug.test         | 2 +-
 bolt/test/X86/inline-debug-info.test      | 2 +-
 bolt/test/X86/inlined-function-mixed.test | 2 +-
 bolt/test/keep-aranges.test               | 2 +-
 bolt/test/non-empty-debug-line.test       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bolt/test/X86/asm-func-debug.test b/bolt/test/X86/asm-func-debug.test
index 9679d79821ef6..a022ff0822b31 100644
--- a/bolt/test/X86/asm-func-debug.test
+++ b/bolt/test/X86/asm-func-debug.test
@@ -3,7 +3,7 @@
 #
 # The input test case foo() contains nops that we remove.
 
-RUN: %clang -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
+RUN: %clang -gdwarf-4 %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections
 RUN: llvm-dwarfdump -all %t | FileCheck %s
 
diff --git a/bolt/test/X86/inline-debug-info.test b/bolt/test/X86/inline-debug-info.test
index 300fa0be78914..cab3fb9baa22e 100644
--- a/bolt/test/X86/inline-debug-info.test
+++ b/bolt/test/X86/inline-debug-info.test
@@ -3,7 +3,7 @@
 
 # REQUIRES: system-linux
 
-# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
+# RUN: %clang %cflags -O1 -gdwarf-4 %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
 # RUN:   -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -update-debug-sections -print-debug-info \
 # RUN:   -print-only=main -print-after-lowering -force-inline=foo -o %t.bolt \
diff --git a/bolt/test/X86/inlined-function-mixed.test b/bolt/test/X86/inlined-function-mixed.test
index 1b17c1932c1dd..d2ed274ae6cc5 100644
--- a/bolt/test/X86/inlined-function-mixed.test
+++ b/bolt/test/X86/inlined-function-mixed.test
@@ -2,7 +2,7 @@
 # debug info does not cause a crash.
 
 RUN: %clangxx %S/Inputs/inlined.cpp -c -o %T/inlined.o
-RUN: %clangxx %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
+RUN: %clangxx %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -gdwarf-4
 RUN: %clangxx %T/inlined.o %T/inlinee.o -o %t
 
 RUN: llvm-bolt %t -o %t.bolt -update-debug-sections -reorder-blocks=reverse \
diff --git a/bolt/test/keep-aranges.test b/bolt/test/keep-aranges.test
index 8b93f19ea31af..bd33686005048 100644
--- a/bolt/test/keep-aranges.test
+++ b/bolt/test/keep-aranges.test
@@ -4,7 +4,7 @@
 REQUIRES: system-linux
 
 RUN: %clang %S/Inputs/icf_baz.c %S/Inputs/icf_main.c -Wl,--icf=all,--gdb-index \
-RUN:   -g -o %t.exe -fuse-ld=lld
+RUN:   -gdwarf-4 -o %t.exe -fuse-ld=lld
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections -keep-aranges
 RUN: llvm-dwarfdump -debug-aranges %t | FileCheck %s
 
diff --git a/bolt/test/non-empty-debug-line.test b/bolt/test/non-empty-debug-line.test
index 28c86be320e8e..677efe319bba8 100644
--- a/bolt/test/non-empty-debug-line.test
+++ b/bolt/test/non-empty-debug-line.test
@@ -3,7 +3,7 @@
 
 REQUIRES: system-linux
 
-RUN: %clang %S/Inputs/hello.c -g -o %t
+RUN: %clang %S/Inputs/hello.c -gdwarf-4 -o %t
 RUN: llvm-bolt %t -o %t1 -update-debug-sections -funcs=_start
 RUN: llvm-readobj -S %t > %t2
 RUN: llvm-readobj -S %t1 >> %t2

From cd8122b27f8fb9cbf222ef946bff3b698625e2f4 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere 
Date: Mon, 24 Jan 2022 15:12:18 -0800
Subject: [PATCH 467/946] [lldb] Add ConstString memory usage statistics

Add statistics about the memory usage of the string pool. I'm
particularly interested in the memory used by the allocator, i.e. the
number of bytes actually used by the allocator it self as well as the
number of bytes allocated through the allocator.

Differential revision: https://reviews.llvm.org/D117914
---
 lldb/include/lldb/Target/Statistics.h         |  6 +++
 lldb/include/lldb/Utility/ConstString.h       | 10 +++++
 lldb/source/Target/Statistics.cpp             | 14 ++++++
 lldb/source/Utility/ConstString.cpp           | 15 +++++++
 .../commands/statistics/basic/TestStats.py    | 45 +++++++++++++++++++
 5 files changed, 90 insertions(+)

diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h
index 185389b2eeafe..d2b8f746a38c8 100644
--- a/lldb/include/lldb/Target/Statistics.h
+++ b/lldb/include/lldb/Target/Statistics.h
@@ -9,6 +9,7 @@
 #ifndef LLDB_TARGET_STATISTICS_H
 #define LLDB_TARGET_STATISTICS_H
 
+#include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/lldb-forward.h"
 #include "llvm/Support/JSON.h"
@@ -110,6 +111,11 @@ struct ModuleStats {
   bool debug_info_index_saved_to_cache = false;
 };
 
+struct ConstStringStats {
+  llvm::json::Value ToJSON() const;
+  ConstString::MemoryStats stats = ConstString::GetMemoryStats();
+};
+
 /// A class that represents statistics for a since lldb_private::Target.
 class TargetStats {
 public:
diff --git a/lldb/include/lldb/Utility/ConstString.h b/lldb/include/lldb/Utility/ConstString.h
index 2756f1fd72038..937f8271a9a8e 100644
--- a/lldb/include/lldb/Utility/ConstString.h
+++ b/lldb/include/lldb/Utility/ConstString.h
@@ -408,6 +408,16 @@ class ConstString {
   ///     in memory.
   static size_t StaticMemorySize();
 
+  struct MemoryStats {
+    size_t GetBytesTotal() const { return bytes_total; }
+    size_t GetBytesUsed() const { return bytes_used; }
+    size_t GetBytesUnused() const { return bytes_total - bytes_used; }
+    size_t bytes_total = 0;
+    size_t bytes_used = 0;
+  };
+
+  static MemoryStats GetMemoryStats();
+
 protected:
   template  friend struct ::llvm::DenseMapInfo;
   /// Only used by DenseMapInfo.
diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp
index 8d1e982c3b988..ebddad837d14b 100644
--- a/lldb/source/Target/Statistics.cpp
+++ b/lldb/source/Target/Statistics.cpp
@@ -65,6 +65,14 @@ json::Value ModuleStats::ToJSON() const {
   return module;
 }
 
+llvm::json::Value ConstStringStats::ToJSON() const {
+  json::Object obj;
+  obj.try_emplace("bytesTotal", stats.GetBytesTotal());
+  obj.try_emplace("bytesUsed", stats.GetBytesUsed());
+  obj.try_emplace("bytesUnused", stats.GetBytesUnused());
+  return obj;
+}
+
 json::Value TargetStats::ToJSON(Target &target) {
   CollectStats(target);
 
@@ -212,9 +220,15 @@ llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger,
     json_modules.emplace_back(module_stat.ToJSON());
   }
 
+  ConstStringStats const_string_stats;
+  json::Object json_memory{
+      {"strings", const_string_stats.ToJSON()},
+  };
+
   json::Object global_stats{
       {"targets", std::move(json_targets)},
       {"modules", std::move(json_modules)},
+      {"memory", std::move(json_memory)},
       {"totalSymbolTableParseTime", symtab_parse_time},
       {"totalSymbolTableIndexTime", symtab_index_time},
       {"totalSymbolTablesLoadedFromCache", symtabs_loaded},
diff --git a/lldb/source/Utility/ConstString.cpp b/lldb/source/Utility/ConstString.cpp
index e5e1b2387e64d..76270e3f53b96 100644
--- a/lldb/source/Utility/ConstString.cpp
+++ b/lldb/source/Utility/ConstString.cpp
@@ -171,6 +171,17 @@ class Pool {
     return mem_size;
   }
 
+  ConstString::MemoryStats GetMemoryStats() const {
+    ConstString::MemoryStats stats;
+    for (const auto &pool : m_string_pools) {
+      llvm::sys::SmartScopedReader rlock(pool.m_mutex);
+      const Allocator &alloc = pool.m_string_map.getAllocator();
+      stats.bytes_total += alloc.getTotalMemory();
+      stats.bytes_used += alloc.getBytesAllocated();
+    }
+    return stats;
+  }
+
 protected:
   uint8_t hash(const llvm::StringRef &s) const {
     uint32_t h = llvm::djbHash(s);
@@ -332,6 +343,10 @@ size_t ConstString::StaticMemorySize() {
   return StringPool().MemorySize();
 }
 
+ConstString::MemoryStats ConstString::GetMemoryStats() {
+  return StringPool().GetMemoryStats();
+}
+
 void llvm::format_provider::format(const ConstString &CS,
                                                 llvm::raw_ostream &OS,
                                                 llvm::StringRef Options) {
diff --git a/lldb/test/API/commands/statistics/basic/TestStats.py b/lldb/test/API/commands/statistics/basic/TestStats.py
index f69fddc27fbaa..99940ed5061f9 100644
--- a/lldb/test/API/commands/statistics/basic/TestStats.py
+++ b/lldb/test/API/commands/statistics/basic/TestStats.py
@@ -135,6 +135,7 @@ def test_default_no_run(self):
 
         (lldb) statistics dump
         {
+          "memory" : {...},
           "modules" : [...],
           "targets" : [
             {
@@ -160,6 +161,7 @@ def test_default_no_run(self):
         target = self.createTestTarget()
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',
@@ -197,6 +199,7 @@ def test_default_with_run(self):
 
         (lldb) statistics dump
         {
+          "memory" : {...},
           "modules" : [...],
           "targets" : [
                 {
@@ -227,6 +230,7 @@ def test_default_with_run(self):
                                           lldb.SBFileSpec("main.c"))
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',
@@ -254,6 +258,44 @@ def test_default_with_run(self):
         self.assertGreater(stats['launchOrAttachTime'], 0.0)
         self.assertGreater(stats['targetCreateTime'], 0.0)
 
+    def test_memory(self):
+        """
+            Test "statistics dump" and the memory information.
+        """
+        exe = self.getBuildArtifact("a.out")
+        target = self.createTestTarget(file_path=exe)
+        debug_stats = self.get_stats()
+        debug_stat_keys = [
+            'memory',
+            'modules',
+            'targets',
+            'totalSymbolTableParseTime',
+            'totalSymbolTableIndexTime',
+            'totalSymbolTablesLoadedFromCache',
+            'totalSymbolTablesSavedToCache',
+            'totalDebugInfoParseTime',
+            'totalDebugInfoIndexTime',
+            'totalDebugInfoIndexLoadedFromCache',
+            'totalDebugInfoIndexSavedToCache',
+            'totalDebugInfoByteSize'
+        ]
+        self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None)
+
+        memory = debug_stats['memory']
+        memory_keys= [
+            'strings',
+        ]
+        self.verify_keys(memory, '"memory"', memory_keys, None)
+
+        strings = memory['strings']
+        strings_keys= [
+            'bytesTotal',
+            'bytesUsed',
+            'bytesUnused',
+        ]
+        self.verify_keys(strings, '"strings"', strings_keys, None)
+
+
     def find_module_in_metrics(self, path, stats):
         modules = stats['modules']
         for module in modules:
@@ -269,6 +311,7 @@ def test_modules(self):
         target = self.createTestTarget(file_path=exe)
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',
@@ -312,6 +355,7 @@ def test_breakpoints(self):
         Output expected to be something like:
 
         {
+          "memory" : {...},
           "modules" : [...],
           "targets" : [
                 {
@@ -355,6 +399,7 @@ def test_breakpoints(self):
         self.runCmd("b a_function")
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',

From 52f37c24c3f891350394c30096be5e93b063f61e Mon Sep 17 00:00:00 2001
From: Nikolas Klauser 
Date: Tue, 28 Dec 2021 13:09:40 +0100
Subject: [PATCH 468/946] [libc++][NFC] remove this-> when calling member
 functions in 

remove `this->` when calling member functions

Reviewed By: Quuxplusone, Mordante, ldionne, #libc

Spies: libcxx-commits

Differential Revision: https://reviews.llvm.org/D116324
---
 libcxx/include/string | 71 ++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/libcxx/include/string b/libcxx/include/string
index 6f22f02afa0ba..f53e1bfef75e8 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1866,7 +1866,7 @@ void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s,
                                                        size_type __reserve)
 {
     if (__reserve > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__reserve))
     {
@@ -1890,7 +1890,7 @@ void
 basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz)
 {
     if (__sz > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__sz))
     {
@@ -1973,7 +1973,7 @@ void basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(
     __set_short_size(__sz);
   } else {
     if (__sz > max_size())
-      this->__throw_length_error();
+      __throw_length_error();
     size_t __cap = __recommend(__sz);
     __p = __alloc_traits::allocate(__alloc(), __cap + 1);
     __set_long_pointer(__p);
@@ -2029,7 +2029,7 @@ void
 basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c)
 {
     if (__n > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__n))
     {
@@ -2074,7 +2074,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st
 {
     size_type __str_sz = __str.size();
     if (__pos > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __init(__str.data() + __pos, _VSTD::min(__n, __str_sz - __pos));
     _VSTD::__debug_db_insert_c(this);
 }
@@ -2087,7 +2087,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st
 {
     size_type __str_sz = __str.size();
     if (__pos > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __init(__str.data() + __pos, __str_sz - __pos);
     _VSTD::__debug_db_insert_c(this);
 }
@@ -2160,7 +2160,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(_ForwardIterator __first, _For
 {
     size_type __sz = static_cast(_VSTD::distance(__first, __last));
     if (__sz > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__sz))
     {
@@ -2259,7 +2259,7 @@ basic_string<_CharT, _Traits, _Allocator>::__grow_by_and_replace
 {
     size_type __ms = max_size();
     if (__delta_cap > __ms - __old_cap - 1)
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __old_p = __get_pointer();
     size_type __cap = __old_cap < __ms / 2 - __alignment ?
                           __recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
@@ -2291,7 +2291,7 @@ basic_string<_CharT, _Traits, _Allocator>::__grow_by(size_type __old_cap, size_t
 {
     size_type __ms = max_size();
     if (__delta_cap > __ms - __old_cap)
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __old_p = __get_pointer();
     size_type __cap = __old_cap < __ms / 2 - __alignment ?
                           __recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
@@ -2523,7 +2523,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(const basic_string& __str, siz
 {
     size_type __sz = __str.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return assign(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2540,7 +2540,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(const _Tp & __t, size_type __p
     __self_view __sv = __t;
     size_type __sz = __sv.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return assign(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2709,7 +2709,7 @@ basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str, siz
 {
     size_type __sz = __str.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return append(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2725,7 +2725,7 @@ basic_string<_CharT, _Traits, _Allocator>::append(const _Tp & __t, size_type __p
     __self_view __sv = __t;
     size_type __sz = __sv.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return append(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2746,7 +2746,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, const value_t
     _LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::insert received nullptr");
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     size_type __cap = capacity();
     if (__cap - __sz >= __n)
     {
@@ -2777,7 +2777,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, size_type __n
 {
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     if (__n)
     {
         size_type __cap = capacity();
@@ -2883,7 +2883,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_
 {
     size_type __str_sz = __str.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return insert(__pos1, __str.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
 }
 
@@ -2900,7 +2900,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const _Tp& _
     __self_view __sv = __t;
     size_type __str_sz = __sv.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return insert(__pos1, __sv.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
 }
 
@@ -2961,7 +2961,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
     _LIBCPP_ASSERT(__n2 == 0 || __s != nullptr, "string::replace received nullptr");
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __n1 = _VSTD::min(__n1, __sz - __pos);
     size_type __cap = capacity();
     if (__cap - __sz + __n1 >= __n2)
@@ -3008,7 +3008,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
 {
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __n1 = _VSTD::min(__n1, __sz - __pos);
     size_type __cap = capacity();
     value_type* __p;
@@ -3042,7 +3042,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_it
                                                    _InputIterator __j1, _InputIterator __j2)
 {
     const basic_string __temp(__j1, __j2, __alloc());
-    return this->replace(__i1, __i2, __temp);
+    return replace(__i1, __i2, __temp);
 }
 
 template 
@@ -3060,7 +3060,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type _
 {
     size_type __str_sz = __str.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return replace(__pos1, __n1, __str.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
 }
 
@@ -3077,7 +3077,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type _
     __self_view __sv = __t;
     size_type __str_sz = __sv.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return replace(__pos1, __n1, __sv.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
 }
 
@@ -3147,7 +3147,8 @@ template 
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::erase(size_type __pos,
                                                  size_type __n) {
-  if (__pos > size()) this->__throw_out_of_range();
+  if (__pos > size())
+    __throw_out_of_range();
   if (__n == npos) {
     __erase_to_end(__pos);
   } else {
@@ -3263,7 +3264,7 @@ void
 basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacity)
 {
     if (__requested_capacity > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
 
     // Make sure reserve(n) never shrinks. This is technically only required in C++20
     // and later (since P0966R1), however we provide consistent behavior in all Standard
@@ -3370,7 +3371,7 @@ typename basic_string<_CharT, _Traits, _Allocator>::const_reference
 basic_string<_CharT, _Traits, _Allocator>::at(size_type __n) const
 {
     if (__n >= size())
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return (*this)[__n];
 }
 
@@ -3379,7 +3380,7 @@ typename basic_string<_CharT, _Traits, _Allocator>::reference
 basic_string<_CharT, _Traits, _Allocator>::at(size_type __n)
 {
     if (__n >= size())
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return (*this)[__n];
 }
 
@@ -3425,7 +3426,7 @@ basic_string<_CharT, _Traits, _Allocator>::copy(value_type* __s, size_type __n,
 {
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     size_type __rlen = _VSTD::min(__n, __sz - __pos);
     traits_type::copy(__s, data() + __pos, __rlen);
     return __rlen;
@@ -3869,7 +3870,7 @@ basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
     _LIBCPP_ASSERT(__n2 == 0 || __s != nullptr, "string::compare(): received nullptr");
     size_type __sz = size();
     if (__pos1 > __sz || __n2 == npos)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     size_type __rlen = _VSTD::min(__n1, __sz - __pos1);
     int __r = traits_type::compare(data() + __pos1, __s, _VSTD::min(__rlen, __n2));
     if (__r == 0)
@@ -3933,7 +3934,7 @@ basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
                                                    size_type __pos2,
                                                    size_type __n2) const
 {
-        return compare(__pos1, __n1, __self_view(__str), __pos2, __n2);
+    return compare(__pos1, __n1, __self_view(__str), __pos2, __n2);
 }
 
 template 
@@ -4447,16 +4448,16 @@ template
 bool
 basic_string<_CharT, _Traits, _Allocator>::__dereferenceable(const const_iterator* __i) const
 {
-    return this->data() <= _VSTD::__to_address(__i->base()) &&
-           _VSTD::__to_address(__i->base()) < this->data() + this->size();
+    return data() <= _VSTD::__to_address(__i->base()) &&
+           _VSTD::__to_address(__i->base()) < data() + size();
 }
 
 template
 bool
 basic_string<_CharT, _Traits, _Allocator>::__decrementable(const const_iterator* __i) const
 {
-    return this->data() < _VSTD::__to_address(__i->base()) &&
-           _VSTD::__to_address(__i->base()) <= this->data() + this->size();
+    return data() < _VSTD::__to_address(__i->base()) &&
+           _VSTD::__to_address(__i->base()) <= data() + size();
 }
 
 template
@@ -4464,7 +4465,7 @@ bool
 basic_string<_CharT, _Traits, _Allocator>::__addable(const const_iterator* __i, ptrdiff_t __n) const
 {
     const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
-    return this->data() <= __p && __p <= this->data() + this->size();
+    return data() <= __p && __p <= data() + size();
 }
 
 template
@@ -4472,7 +4473,7 @@ bool
 basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* __i, ptrdiff_t __n) const
 {
     const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
-    return this->data() <= __p && __p < this->data() + this->size();
+    return data() <= __p && __p < data() + size();
 }
 
 #endif // _LIBCPP_DEBUG_LEVEL == 2

From 014a673441c6050683e059a547ffcbb03004730d Mon Sep 17 00:00:00 2001
From: Nikolas Klauser 
Date: Mon, 24 Jan 2022 19:44:34 +0100
Subject: [PATCH 469/946] [libc++] Remove std::basic_string's base class in
 ABIv2

Remove `std::basic_string`'s base class in ABI version 2

Reviewed By: Quuxplusone, ldionne, #libc

Spies: libcxx-commits

Differential Revision: https://reviews.llvm.org/D116334
---
 libcxx/include/__config |  2 ++
 libcxx/include/string   | 16 ++++++----------
 libcxx/src/string.cpp   |  2 ++
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 4c6edd45ff0a5..b99cdc38dc9f8 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -114,6 +114,8 @@
 // compatible. This switch removes these workarounds for platforms that don't care
 // about ABI compatibility.
 #  define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT
+// Remove basic_string common base
+#  define _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
 #elif _LIBCPP_ABI_VERSION == 1
 #  if !defined(_LIBCPP_OBJECT_FORMAT_COFF)
 // Enable compiling copies of now inline methods into the dylib to support
diff --git a/libcxx/include/string b/libcxx/include/string
index f53e1bfef75e8..3616de8a214d8 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -618,6 +618,7 @@ operator+(const basic_string<_CharT, _Traits, _Allocator>& __x, _CharT __y);
 
 _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS string operator+, allocator >(char const*, string const&))
 
+#ifndef _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
 template 
 struct __basic_string_common;
 
@@ -627,6 +628,7 @@ struct __basic_string_common {
     _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_length_error() const;
     _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_out_of_range() const;
 };
+#endif
 
 template 
 struct __string_is_trivial_iterator : public false_type {};
@@ -680,7 +682,9 @@ class
     _LIBCPP_PREFERRED_NAME(u32string)
 #endif
     basic_string
+#ifndef _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
     : private __basic_string_common // This base class is historical, but it needs to remain for ABI compatibility
+#endif
 {
 public:
     typedef basic_string                                 __self;
@@ -1729,20 +1733,12 @@ private:
 
     _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
     void __throw_length_error() const {
-#ifndef _LIBCPP_NO_EXCEPTIONS
-        __basic_string_common::__throw_length_error();
-#else
-        _VSTD::abort();
-#endif
+        _VSTD::__throw_length_error("basic_string");
     }
 
     _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
     void __throw_out_of_range() const {
-#ifndef _LIBCPP_NO_EXCEPTIONS
-        __basic_string_common::__throw_out_of_range();
-#else
-        _VSTD::abort();
-#endif
+        _VSTD::__throw_out_of_range("basic_string");
     }
 
     friend basic_string operator+<>(const basic_string&, const basic_string&);
diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp
index 608dcb2c5863f..3c63f408240d3 100644
--- a/libcxx/src/string.cpp
+++ b/libcxx/src/string.cpp
@@ -21,6 +21,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+#ifndef _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
 void __basic_string_common::__throw_length_error() const {
     _VSTD::__throw_length_error("basic_string");
 }
@@ -28,6 +29,7 @@ void __basic_string_common::__throw_length_error() const {
 void __basic_string_common::__throw_out_of_range() const {
     _VSTD::__throw_out_of_range("basic_string");
 }
+#endif
 
 #define _LIBCPP_EXTERN_TEMPLATE_DEFINE(...) template __VA_ARGS__;
 #ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION

From 59eb542f6070a017f0d407df908aa53639feb0ea Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy 
Date: Fri, 21 Jan 2022 06:23:59 +0000
Subject: [PATCH 470/946] [libc] Let header generator generate the type header
 inclusion boiler plate.

Reviewed By: michaelrj

Differential Revision: https://reviews.llvm.org/D117855
---
 libc/config/linux/api.td                   | 227 +++------------------
 libc/config/public_api.td                  |   7 +-
 libc/utils/HdrGen/PublicAPICommand.cpp     |  21 +-
 libc/utils/LibcTableGenUtil/APIIndexer.cpp |   6 +-
 libc/utils/LibcTableGenUtil/APIIndexer.h   |   2 +-
 5 files changed, 40 insertions(+), 223 deletions(-)

diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 48cd1c1113485..77e22287fe765 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -6,46 +6,6 @@ include "spec/llvm_libc_ext.td"
 include "spec/posix.td"
 include "spec/stdc.td"
 
-// TODO: Eliminate all TypeDecl specializations. Since we define all public
-// types in their own self contained header files, the header generator can
-// produce the boiler plate which pulls in the type definitions.
-
-def SizeT : TypeDecl<"size_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def SSizeT : TypeDecl<"ssize_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def StructTm: TypeDecl<"struct tm"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def TimeT: TypeDecl<"time_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def OffT : TypeDecl<"off_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def FILE : TypeDecl<"FILE"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
 def AssertMacro : MacroDef<"assert"> {
   let Defn = [{
     #undef assert
@@ -102,16 +62,8 @@ def AssertAPI : PublicAPI<"assert.h"> {
 def CTypeAPI : PublicAPI<"ctype.h"> {
 }
 
-def IMaxDivT : TypeDecl<"imaxdiv_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
 def IntTypesAPI : PublicAPI<"inttypes.h"> {
-  let TypeDeclarations = [
-    IMaxDivT,
-  ];
+  let Types = ["imaxdiv_t"];
 }
 
 def MathErrHandlingMacro : MacroDef<"math_errhandling"> {
@@ -146,18 +98,6 @@ def IsNanMacro : MacroDef<"isnan"> {
   }];
 }
 
-def FloatT : TypeDecl<"float_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def DoubleT : TypeDecl<"double_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
 def MathAPI : PublicAPI<"math.h"> {
   let Macros = [
     SimpleMacroDef<"MATH_ERRNO", "1">,
@@ -174,22 +114,7 @@ def MathAPI : PublicAPI<"math.h"> {
     IsInfMacro,
     IsNanMacro,
   ];
-  let TypeDeclarations = [
-    DoubleT,
-    FloatT,
-  ];
-}
-
-def FEnvT : TypeDecl<"fenv_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def FExceptT : TypeDecl<"fexcept_t"> {
-  let Decl = [{
-    #include 
-  }];
+  let Types = ["double_t", "float_t"];
 }
 
 def FenvAPI: PublicAPI<"fenv.h"> {
@@ -208,16 +133,11 @@ def FenvAPI: PublicAPI<"fenv.h"> {
 
     SimpleMacroDef<"FE_DFL_ENV", "((fenv_t *)-1)">,
   ];
-  let TypeDeclarations = [
-    FEnvT,
-    FExceptT,
-  ];
+  let Types = ["fenv_t", "fexcept_t"];
 }
 
 def StringAPI : PublicAPI<"string.h"> {
-  let TypeDeclarations = [
-    SizeT,
-  ];
+  let Types = ["size_t"];
 
   let Macros = [
     NullMacro,
@@ -225,66 +145,22 @@ def StringAPI : PublicAPI<"string.h"> {
 }
 
 def StdIOAPI : PublicAPI<"stdio.h"> {
-  let TypeDeclarations = [
-    SizeT,
-    FILE,
-  ];
-}
-
-def DivT : TypeDecl<"div_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def LDivT : TypeDecl<"ldiv_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def LLDivT : TypeDecl<"lldiv_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def BSearchCompareTDefn : TypeDecl<"__bsearchcompare_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def QSortCompareTDefn : TypeDecl<"__qsortcompare_t"> {
-  let Decl = [{
-    #include 
-  }];
+  let Types = ["size_t", "FILE"];
 }
 
 def StdlibAPI : PublicAPI<"stdlib.h"> {
-  let TypeDeclarations = [
-    DivT,
-    LDivT,
-    LLDivT,
-    SizeT,
-    BSearchCompareTDefn,
-    QSortCompareTDefn,
+  let Types = [
+    "div_t",
+    "ldiv_t",
+    "lldiv_t",
+    "size_t",
+    "__bsearchcompare_t",
+    "__qsortcompare_t"
   ];
 }
 
 def TimeAPI : PublicAPI<"time.h"> {
-  let TypeDeclarations = [
-    StructTm,
-    TimeT,
-  ];
-
-  let Functions = [
-    "asctime",
-    "asctime_r",
-    "gmtime",
-    "gmtime_r",
-    "mktime",
-  ];
+  let Types = ["time_t", "struct tm"];
 }
 
 def ErrnoAPI : PublicAPI<"errno.h"> {
@@ -307,6 +183,7 @@ def ErrnoAPI : PublicAPI<"errno.h"> {
 }
 
 def SysMManAPI : PublicAPI<"sys/mman.h"> {
+  let Types = ["off_t", "size_t"];
   let Macros = [
     SimpleMacroDef<"PROT_NONE", "0">,
     SimpleMacroDef<"PROT_READ", "1">,
@@ -326,65 +203,10 @@ def SysMManAPI : PublicAPI<"sys/mman.h"> {
     // TODO: Add other MAP_* macros used by Linux.
   ];
 
-  let TypeDeclarations = [
-    SizeT,
-    OffT,
-  ];
-}
-
-def StructSigactionDefn : TypeDecl<"struct sigaction"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def SighandlerTDefn : TypeDecl<"__sighandler_t"> {
-  let Decl = [{
-    #include 
-  }];
 }
 
 def SignalAPI : PublicAPI<"signal.h"> {
-  let TypeDeclarations = [
-    StructSigactionDefn,
-    SighandlerTDefn,
-  ];
-}
-
-def OnceFlag : TypeDecl<"once_flag"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def MtxT : TypeDecl<"mtx_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def CndT : TypeDecl<"cnd_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def ThrdT : TypeDecl<"thrd_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def ThreadStartT : TypeDecl<"thrd_start_t"> {
-  let Decl = [{
-    #include 
-  }];
-}
-
-def CallOnceFuncT : TypeDecl<"__call_once_func_t"> {
-  let Decl = [{
-    #include 
-  }];
+  let Types = ["struct sigaction", "__sighandler_t"];
 }
 
 def ThreadsAPI : PublicAPI<"threads.h"> {
@@ -392,13 +214,13 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
     SimpleMacroDef<"ONCE_FLAG_INIT", "0">,
   ];
 
-  let TypeDeclarations = [
-    OnceFlag,
-    CallOnceFuncT,
-    MtxT,
-    CndT,
-    ThrdT,
-    ThreadStartT,
+  let Types = [
+    "__call_once_func_t",
+    "once_flag",
+    "cnd_t",
+    "mtx_t",
+    "thrd_t",
+    "thrd_start_t",
   ];
 
   let Enumerations = [
@@ -414,8 +236,5 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
 }
 
 def UniStdAPI : PublicAPI<"unistd.h"> {
-  let TypeDeclarations = [
-    SSizeT,
-    SizeT,
-  ];
+  let Types = ["size_t", "ssize_t"];
 }
diff --git a/libc/config/public_api.td b/libc/config/public_api.td
index b60836901836a..6d2f534f5a593 100644
--- a/libc/config/public_api.td
+++ b/libc/config/public_api.td
@@ -1,10 +1,5 @@
 include "spec/spec.td"
 
-class TypeDecl {
-  string Name = name;
-  string Decl = "";
-}
-
 class MacroDef {
   string Name = name;
   string Defn = "";
@@ -23,7 +18,7 @@ class MacroDefineIfNot : MacroDef {
 class PublicAPI {
   string HeaderName = name;
   list Macros = [];
-  list TypeDeclarations = [];
+  list Types = [];
   list Enumerations = [];
   list Structs = [];
   list Functions = [];
diff --git a/libc/utils/HdrGen/PublicAPICommand.cpp b/libc/utils/HdrGen/PublicAPICommand.cpp
index 5285b0daac7d5..8f036f7a6d56e 100644
--- a/libc/utils/HdrGen/PublicAPICommand.cpp
+++ b/libc/utils/HdrGen/PublicAPICommand.cpp
@@ -10,6 +10,7 @@
 
 #include "utils/LibcTableGenUtil/APIIndexer.h"
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SourceMgr.h"
@@ -38,6 +39,12 @@ static void dedentAndWrite(llvm::StringRef Text, llvm::raw_ostream &OS) {
   }
 }
 
+static std::string getTypeHdrName(const std::string &Name) {
+  llvm::SmallVector Parts;
+  llvm::SplitString(llvm::StringRef(Name), Parts);
+  return llvm::join(Parts.begin(), Parts.end(), "_");
+}
+
 namespace llvm_libc {
 
 void writeAPIFromIndex(APIIndexer &G,
@@ -54,16 +61,12 @@ void writeAPIFromIndex(APIIndexer &G,
     OS << '\n';
   }
 
-  for (auto &Pair : G.TypeDeclsMap) {
-    const std::string &Name = Pair.first;
-    if (G.TypeSpecMap.find(Name) == G.TypeSpecMap.end())
-      llvm::PrintFatalError(Name + " not found in any standard spec.\n");
-
-    llvm::Record *TypeDecl = Pair.second;
-    dedentAndWrite(TypeDecl->getValueAsString("Decl"), OS);
-
-    OS << '\n';
+  for (auto &TypeName : G.RequiredTypes) {
+    if (G.TypeSpecMap.find(TypeName) == G.TypeSpecMap.end())
+      llvm::PrintFatalError(TypeName + " not found in any standard spec.\n");
+    OS << "#include \n";
   }
+  OS << '\n';
 
   if (G.Enumerations.size() != 0)
     OS << "enum {" << '\n';
diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.cpp b/libc/utils/LibcTableGenUtil/APIIndexer.cpp
index 16aef5880bffe..fd3f53c4150fc 100644
--- a/libc/utils/LibcTableGenUtil/APIIndexer.cpp
+++ b/libc/utils/LibcTableGenUtil/APIIndexer.cpp
@@ -120,9 +120,9 @@ void APIIndexer::indexPublicAPIDef(llvm::Record *PublicAPI) {
   for (llvm::Record *MacroDef : MacroDefList)
     MacroDefsMap[std::string(MacroDef->getValueAsString("Name"))] = MacroDef;
 
-  auto TypeDeclList = PublicAPI->getValueAsListOfDefs("TypeDeclarations");
-  for (llvm::Record *TypeDecl : TypeDeclList)
-    TypeDeclsMap[std::string(TypeDecl->getValueAsString("Name"))] = TypeDecl;
+  auto TypeList = PublicAPI->getValueAsListOfStrings("Types");
+  for (llvm::StringRef TypeName : TypeList)
+    RequiredTypes.insert(std::string(TypeName));
 
   auto StructList = PublicAPI->getValueAsListOfStrings("Structs");
   for (llvm::StringRef StructName : StructList)
diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.h b/libc/utils/LibcTableGenUtil/APIIndexer.h
index 7b4d62a38c615..cbb7ac2e66dcf 100644
--- a/libc/utils/LibcTableGenUtil/APIIndexer.h
+++ b/libc/utils/LibcTableGenUtil/APIIndexer.h
@@ -63,10 +63,10 @@ class APIIndexer {
   NameToRecordMapping EnumerationSpecMap;
   NameToRecordMapping FunctionSpecMap;
   NameToRecordMapping MacroDefsMap;
-  NameToRecordMapping TypeDeclsMap;
 
   std::unordered_map FunctionToHeaderMap;
 
+  NameSet RequiredTypes;
   NameSet Structs;
   NameSet Enumerations;
   NameSet Functions;

From 3628febcf8e3d88db5871c9f82a33ab98611e5a8 Mon Sep 17 00:00:00 2001
From: Mogball 
Date: Mon, 24 Jan 2022 23:31:00 +0000
Subject: [PATCH 471/946] [mlir] NFC control-flow sink cleanup

---
 .../mlir/Interfaces/ControlFlowInterfaces.td      |  3 ++-
 mlir/include/mlir/Transforms/Passes.td            |  8 +++++---
 mlir/lib/Transforms/ControlFlowSink.cpp           | 13 +++++++------
 .../lib/Transforms/Utils/ControlFlowSinkUtils.cpp | 15 ++++++++-------
 mlir/test/Transforms/control-flow-sink.mlir       | 12 +-----------
 5 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
index 429a5356428f7..896eb501d3799 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
@@ -148,7 +148,8 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
       (ins "::mlir::ArrayRef<::mlir::Attribute>":$operands,
            "::llvm::SmallVectorImpl<::mlir::InvocationBounds> &"
              :$invocationBounds), [{}],
-       [{ invocationBounds.append($_op->getNumRegions(), {0, ::llvm::None}); }]
+       [{ invocationBounds.append($_op->getNumRegions(),
+                                  ::mlir::InvocationBounds::getUnknown()); }]
     >,
   ];
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 56e90644363e5..4b1d6ca71e2e1 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -310,14 +310,16 @@ def Canonicalizer : Pass<"canonicalize"> {
 def ControlFlowSink : Pass<"control-flow-sink"> {
   let summary = "Sink operations into conditional blocks";
   let description = [{
-    This pass implements a simple control-flow sink on operations that implement
+    This pass implements control-flow sink on operations that implement
     `RegionBranchOpInterface` by moving dominating operations whose only uses
-    are in a single conditionally-executed region into that region so that
+    are in a conditionally-executed regions into those regions so that
     executions paths where their results are not needed do not perform
     unnecessary computations.
 
     This is similar (but opposite) to loop-invariant code motion, which hoists
-    operations out of regions executed more than once.
+    operations out of regions executed more than once. The implementation of
+    control-flow sink uses a simple and conversative cost model: operations are
+    never duplicated and are only moved into singly-executed regions.
 
     It is recommended to run canonicalization first to remove unreachable
     blocks: ops in unreachable blocks may prevent other operations from being
diff --git a/mlir/lib/Transforms/ControlFlowSink.cpp b/mlir/lib/Transforms/ControlFlowSink.cpp
index 71afc5702edf0..10d41b0f0013f 100644
--- a/mlir/lib/Transforms/ControlFlowSink.cpp
+++ b/mlir/lib/Transforms/ControlFlowSink.cpp
@@ -22,9 +22,7 @@
 using namespace mlir;
 
 namespace {
-/// A basic control-flow sink pass. This pass analyzes the regions of operations
-/// that implement `RegionBranchOpInterface` that are reachable and executed at
-/// most once and sinks candidate operations that are side-effect free.
+/// A control-flow sink pass.
 struct ControlFlowSink : public ControlFlowSinkBase {
   void runOnOperation() override;
 };
@@ -59,10 +57,13 @@ void ControlFlowSink::runOnOperation() {
   auto &domInfo = getAnalysis();
   getOperation()->walk([&](RegionBranchOpInterface branch) {
     SmallVector regionsToSink;
+    // Get the regions are that known to be executed at most once.
     getSinglyExecutedRegionsToSink(branch, regionsToSink);
-    numSunk = mlir::controlFlowSink(
-        regionsToSink, domInfo,
-        [](Operation *op, Region *) { return isSideEffectFree(op); });
+    // Sink side-effect free operations.
+    numSunk =
+        controlFlowSink(regionsToSink, domInfo, [](Operation *op, Region *) {
+          return isSideEffectFree(op);
+        });
   });
 }
 
diff --git a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
index cffbd922f88c8..868174b261c4a 100644
--- a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
+++ b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements utilityies for control-flow sinking. Control-flow
+// This file implements utilities for control-flow sinking. Control-flow
 // sinking moves operations whose only uses are in conditionally-executed blocks
 // into those blocks so that they aren't executed on paths where their results
 // are not needed.
@@ -40,7 +40,7 @@ class Sinker {
 
   /// Given a list of regions, find operations to sink and sink them. Return the
   /// number of operations sunk.
-  size_t sinkRegions(ArrayRef regions) &&;
+  size_t sinkRegions(ArrayRef regions);
 
 private:
   /// Given a region and an op which dominates the region, returns true if all
@@ -93,9 +93,9 @@ void Sinker::tryToSinkPredecessors(Operation *user, Region *region,
     if (allUsersDominatedBy(op, region) && shouldMoveIntoRegion(op, region)) {
       // Move the op into the region's entry block. If the op is part of a
       // subgraph, dependee ops would have been moved first, so inserting before
-      // the start of the block will ensure dominance is preserved. Ops can only
-      // be safely moved into the entry block as the region's other blocks may
-      // for a loop.
+      // the start of the block will ensure SSA dominance is preserved locally
+      // in the subgraph. Ops can only be safely moved into the entry block as
+      // the region's other blocks may for a loop.
       op->moveBefore(®ion->front(), region->front().begin());
       ++numSunk;
       // Add the op to the work queue.
@@ -119,7 +119,7 @@ void Sinker::sinkRegion(Region *region) {
   }
 }
 
-size_t Sinker::sinkRegions(ArrayRef regions) && {
+size_t Sinker::sinkRegions(ArrayRef regions) {
   for (Region *region : regions)
     if (!region->empty())
       sinkRegion(region);
@@ -137,7 +137,8 @@ void mlir::getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
   // Collect constant operands.
   SmallVector operands(branch->getNumOperands(), Attribute());
   for (auto &it : llvm::enumerate(branch->getOperands()))
-    matchPattern(it.value(), m_Constant(&operands[it.index()]));
+    (void)matchPattern(it.value(), m_Constant(&operands[it.index()]));
+
   // Get the invocation bounds.
   SmallVector bounds;
   branch.getRegionInvocationBounds(operands, bounds);
diff --git a/mlir/test/Transforms/control-flow-sink.mlir b/mlir/test/Transforms/control-flow-sink.mlir
index 58dffe19c0aae..ba895ebc493e5 100644
--- a/mlir/test/Transforms/control-flow-sink.mlir
+++ b/mlir/test/Transforms/control-flow-sink.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -control-flow-sink %s | FileCheck %s
+// RUN: mlir-opt -control-flow-sink %s | FileCheck %s
 
 // Test that operations can be sunk.
 
@@ -35,8 +35,6 @@ func @test_simple_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %4 : i32
 }
 
-// -----
-
 // Test that a region op can be sunk.
 
 // CHECK-LABEL: @test_region_sink
@@ -76,8 +74,6 @@ func @test_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %2 : i32
 }
 
-// -----
-
 // Test that an entire subgraph can be sunk.
 
 // CHECK-LABEL: @test_subgraph_sink
@@ -113,8 +109,6 @@ func @test_subgraph_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %6 : i32
 }
 
-// -----
-
 // Test that ops can be sunk into regions with multiple blocks.
 
 // CHECK-LABEL: @test_multiblock_region_sink
@@ -144,8 +138,6 @@ func @test_multiblock_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %4 : i32
 }
 
-// -----
-
 // Test that ops can be sunk recursively into nested regions.
 
 // CHECK-LABEL: @test_nested_region_sink
@@ -185,8 +177,6 @@ func @test_nested_region_sink(%arg0: i1, %arg1: i32) -> i32 {
   return %1 : i32
 }
 
-// -----
-
 // Test that ops are only moved into the entry block, even when their only uses
 // are further along.
 

From 3e746c6d9ef0758c1e06901a99a75b638d6a5655 Mon Sep 17 00:00:00 2001
From: Rob Suderman 
Date: Mon, 24 Jan 2022 15:38:30 -0800
Subject: [PATCH 472/946] [mlir] Add support for ExpM1 to GLSL/OpenCL SPIRV
 Backends

Adding a similar decomposition for exponential minus one to the SPIRV
backends along with the necessary tests.

Reviewed By: antiagainst

Differential Revision: https://reviews.llvm.org/D118081
---
 .../Conversion/MathToSPIRV/MathToSPIRV.cpp    | 31 +++++++++++---
 .../MathToSPIRV/math-to-glsl-spirv.mlir       | 38 +++++++++++-------
 .../MathToSPIRV/math-to-opencl-spirv.mlir     | 40 +++++++++++--------
 3 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp
index ec8402af03009..90588ed9bd5f0 100644
--- a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp
+++ b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp
@@ -30,6 +30,28 @@ using namespace mlir;
 // normal RewritePattern.
 
 namespace {
+/// Converts math.expm1 to SPIR-V ops.
+///
+/// SPIR-V does not have a direct operations for exp(x)-1. Explicitly lower to
+/// these operations.
+template 
+class ExpM1OpPattern final : public OpConversionPattern {
+public:
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(math::ExpM1Op operation, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    assert(adaptor.getOperands().size() == 1);
+    Location loc = operation.getLoc();
+    auto type = this->getTypeConverter()->convertType(operation.getType());
+    auto exp = rewriter.create(loc, type, adaptor.getOperand());
+    auto one = spirv::ConstantOp::getOne(type, loc, rewriter);
+    rewriter.replaceOpWithNewOp(operation, exp, one);
+    return success();
+  }
+};
+
 /// Converts math.log1p to SPIR-V ops.
 ///
 /// SPIR-V does not have a direct operations for log(1+x). Explicitly lower to
@@ -44,11 +66,10 @@ class Log1pOpPattern final : public OpConversionPattern {
                   ConversionPatternRewriter &rewriter) const override {
     assert(adaptor.getOperands().size() == 1);
     Location loc = operation.getLoc();
-    auto type =
-        this->getTypeConverter()->convertType(operation.getOperand().getType());
+    auto type = this->getTypeConverter()->convertType(operation.getType());
     auto one = spirv::ConstantOp::getOne(type, operation.getLoc(), rewriter);
     auto onePlus =
-        rewriter.create(loc, one, adaptor.getOperands()[0]);
+        rewriter.create(loc, one, adaptor.getOperand());
     rewriter.replaceOpWithNewOp(operation, type, onePlus);
     return success();
   }
@@ -65,7 +86,7 @@ void populateMathToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
 
   // GLSL patterns
   patterns
-      .add,
+      .add, ExpM1OpPattern,
            spirv::ElementwiseOpPattern,
            spirv::ElementwiseOpPattern,
            spirv::ElementwiseOpPattern,
@@ -81,7 +102,7 @@ void populateMathToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
           typeConverter, patterns.getContext());
 
   // OpenCL patterns
-  patterns.add,
+  patterns.add, ExpM1OpPattern,
                spirv::ElementwiseOpPattern,
                spirv::ElementwiseOpPattern,
                spirv::ElementwiseOpPattern,
diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir
index f0e0b7e63fdce..c996e7056783a 100644
--- a/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir
+++ b/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir
@@ -8,26 +8,30 @@ func @float32_unary_scalar(%arg0: f32) {
   %0 = math.cos %arg0 : f32
   // CHECK: spv.GLSL.Exp %{{.*}}: f32
   %1 = math.exp %arg0 : f32
+  // CHECK: %[[EXP:.+]] = spv.GLSL.Exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : f32
   // CHECK: spv.GLSL.Log %{{.*}}: f32
-  %2 = math.log %arg0 : f32
+  %3 = math.log %arg0 : f32
   // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.GLSL.Log %[[ADDONE]]
-  %3 = math.log1p %arg0 : f32
+  %4 = math.log1p %arg0 : f32
   // CHECK: spv.GLSL.InverseSqrt %{{.*}}: f32
-  %4 = math.rsqrt %arg0 : f32
+  %5 = math.rsqrt %arg0 : f32
   // CHECK: spv.GLSL.Sqrt %{{.*}}: f32
-  %5 = math.sqrt %arg0 : f32
+  %6 = math.sqrt %arg0 : f32
   // CHECK: spv.GLSL.Tanh %{{.*}}: f32
-  %6 = math.tanh %arg0 : f32
+  %7 = math.tanh %arg0 : f32
   // CHECK: spv.GLSL.Sin %{{.*}}: f32
-  %7 = math.sin %arg0 : f32
+  %8 = math.sin %arg0 : f32
   // CHECK: spv.GLSL.FAbs %{{.*}}: f32
-  %8 = math.abs %arg0 : f32
+  %9 = math.abs %arg0 : f32
   // CHECK: spv.GLSL.Ceil %{{.*}}: f32
-  %9 = math.ceil %arg0 : f32
+  %10 = math.ceil %arg0 : f32
   // CHECK: spv.GLSL.Floor %{{.*}}: f32
-  %10 = math.floor %arg0 : f32
+  %11 = math.floor %arg0 : f32
   return
 }
 
@@ -37,20 +41,24 @@ func @float32_unary_vector(%arg0: vector<3xf32>) {
   %0 = math.cos %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Exp %{{.*}}: vector<3xf32>
   %1 = math.exp %arg0 : vector<3xf32>
+  // CHECK: %[[EXP:.+]] = spv.GLSL.Exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Log %{{.*}}: vector<3xf32>
-  %2 = math.log %arg0 : vector<3xf32>
+  %3 = math.log %arg0 : vector<3xf32>
   // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.GLSL.Log %[[ADDONE]]
-  %3 = math.log1p %arg0 : vector<3xf32>
+  %4 = math.log1p %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.InverseSqrt %{{.*}}: vector<3xf32>
-  %4 = math.rsqrt %arg0 : vector<3xf32>
+  %5 = math.rsqrt %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Sqrt %{{.*}}: vector<3xf32>
-  %5 = math.sqrt %arg0 : vector<3xf32>
+  %6 = math.sqrt %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Tanh %{{.*}}: vector<3xf32>
-  %6 = math.tanh %arg0 : vector<3xf32>
+  %7 = math.tanh %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Sin %{{.*}}: vector<3xf32>
-  %7 = math.sin %arg0 : vector<3xf32>
+  %8 = math.sin %arg0 : vector<3xf32>
   return
 }
 
diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir
index 7580f1f733c49..d0959efc98ab2 100644
--- a/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir
+++ b/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir
@@ -8,28 +8,32 @@ func @float32_unary_scalar(%arg0: f32) {
   %0 = math.cos %arg0 : f32
   // CHECK: spv.OCL.exp %{{.*}}: f32
   %1 = math.exp %arg0 : f32
+  // CHECK: %[[EXP:.+]] = spv.OCL.exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : f32
   // CHECK: spv.OCL.log %{{.*}}: f32
-  %2 = math.log %arg0 : f32
+  %3 = math.log %arg0 : f32
   // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.OCL.log %[[ADDONE]]
-  %3 = math.log1p %arg0 : f32
+  %4 = math.log1p %arg0 : f32
   // CHECK: spv.OCL.rsqrt %{{.*}}: f32
-  %4 = math.rsqrt %arg0 : f32
+  %5 = math.rsqrt %arg0 : f32
   // CHECK: spv.OCL.sqrt %{{.*}}: f32
-  %5 = math.sqrt %arg0 : f32
+  %6 = math.sqrt %arg0 : f32
   // CHECK: spv.OCL.tanh %{{.*}}: f32
-  %6 = math.tanh %arg0 : f32
+  %7 = math.tanh %arg0 : f32
   // CHECK: spv.OCL.sin %{{.*}}: f32
-  %7 = math.sin %arg0 : f32
+  %8 = math.sin %arg0 : f32
   // CHECK: spv.OCL.fabs %{{.*}}: f32
-  %8 = math.abs %arg0 : f32
+  %9 = math.abs %arg0 : f32
   // CHECK: spv.OCL.ceil %{{.*}}: f32
-  %9 = math.ceil %arg0 : f32
+  %10 = math.ceil %arg0 : f32
   // CHECK: spv.OCL.floor %{{.*}}: f32
-  %10 = math.floor %arg0 : f32
+  %11 = math.floor %arg0 : f32
   // CHECK: spv.OCL.erf %{{.*}}: f32
-  %11 = math.erf %arg0 : f32
+  %12 = math.erf %arg0 : f32
   return
 }
 
@@ -39,20 +43,24 @@ func @float32_unary_vector(%arg0: vector<3xf32>) {
   %0 = math.cos %arg0 : vector<3xf32>
   // CHECK: spv.OCL.exp %{{.*}}: vector<3xf32>
   %1 = math.exp %arg0 : vector<3xf32>
+  // CHECK: %[[EXP:.+]] = spv.OCL.exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : vector<3xf32>
   // CHECK: spv.OCL.log %{{.*}}: vector<3xf32>
-  %2 = math.log %arg0 : vector<3xf32>
+  %3 = math.log %arg0 : vector<3xf32>
   // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.OCL.log %[[ADDONE]]
-  %3 = math.log1p %arg0 : vector<3xf32>
+  %4 = math.log1p %arg0 : vector<3xf32>
   // CHECK: spv.OCL.rsqrt %{{.*}}: vector<3xf32>
-  %4 = math.rsqrt %arg0 : vector<3xf32>
+  %5 = math.rsqrt %arg0 : vector<3xf32>
   // CHECK: spv.OCL.sqrt %{{.*}}: vector<3xf32>
-  %5 = math.sqrt %arg0 : vector<3xf32>
+  %6 = math.sqrt %arg0 : vector<3xf32>
   // CHECK: spv.OCL.tanh %{{.*}}: vector<3xf32>
-  %6 = math.tanh %arg0 : vector<3xf32>
+  %7 = math.tanh %arg0 : vector<3xf32>
   // CHECK: spv.OCL.sin %{{.*}}: vector<3xf32>
-  %7 = math.sin %arg0 : vector<3xf32>
+  %8 = math.sin %arg0 : vector<3xf32>
   return
 }
 

From d0d8d2d572cd1db54d0f6d90f8dd3825f9c7b36b Mon Sep 17 00:00:00 2001
From: Derek Schuff 
Date: Mon, 24 Jan 2022 14:59:54 -0800
Subject: [PATCH 473/946] [clang][Driver] use DWARF4 for wasm

Opt into the old default of DWARF4 for now.

Differential Revision: https://reviews.llvm.org/D118082
---
 clang/lib/Driver/ToolChains/WebAssembly.h | 1 +
 clang/test/Driver/debug-options.c         | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/WebAssembly.h b/clang/lib/Driver/ToolChains/WebAssembly.h
index c84e596759466..b4c3082a089a0 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.h
+++ b/clang/lib/Driver/ToolChains/WebAssembly.h
@@ -51,6 +51,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain {
   bool hasBlocksRuntime() const override;
   bool SupportsProfiling() const override;
   bool HasNativeLLVMSupport() const override;
+  unsigned GetDefaultDwarfVersion() const override { return 4; }
   void
   addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
                         llvm::opt::ArgStringList &CC1Args,
diff --git a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c
index 22e05140f2618..e4a49269a8c70 100644
--- a/clang/test/Driver/debug-options.c
+++ b/clang/test/Driver/debug-options.c
@@ -132,6 +132,13 @@
 // RUN: %clang -### -c -g %s -target powerpc64-ibm-aix-xcoff -gcolumn-info \
 // RUN:             2>&1 | FileCheck -check-prefix=CI %s
 
+// WebAssembly.
+// WebAssembly should default to DWARF4.
+// RUN: %clang -### -c -g %s -target wasm32 2>&1 \
+// RUN:             | FileCheck -check-prefix=G_DWARF4 %s
+// RUN: %clang -### -c -g %s -target wasm64 2>&1 \
+// RUN:             | FileCheck -check-prefix=G_DWARF4 %s
+
 // RUN: %clang -### -c -gdwarf-2 %s 2>&1 \
 // RUN:             | FileCheck -check-prefix=G_ONLY_DWARF2 %s
 //

From dd01d971aa2c4b464a295ca5c78ff93fc4441dc3 Mon Sep 17 00:00:00 2001
From: Jan Korous 
Date: Fri, 21 Jan 2022 17:11:05 -0800
Subject: [PATCH 474/946] [clang][dataflow] Avoid MaxIterations overflow

unsigned is technically guaranteed to be only 16 bits in which case 1 << 16 would wrap around to zero.

Differential Revision: https://reviews.llvm.org/D117938
---
 .../lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 3782f0f5f69ac..7611395cafb6b 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -210,8 +210,8 @@ runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx,
   // FIXME: Consider making the maximum number of iterations configurable.
   // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
   // of iterations, number of functions that time out, etc.
-  unsigned Iterations = 0;
-  static constexpr unsigned MaxIterations = 1 << 16;
+  uint32_t Iterations = 0;
+  static constexpr uint32_t MaxIterations = 1 << 16;
   while (const CFGBlock *Block = Worklist.dequeue()) {
     if (++Iterations > MaxIterations) {
       llvm::errs() << "Maximum number of iterations reached, giving up.\n";

From 902184e6cc263e4c66440c95a21665b6fdffe57c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Tue, 25 Jan 2022 00:24:06 +0000
Subject: [PATCH 475/946] [X86] combinePredicateReduction - generalize
 allof(cmpeq(x,0)) handling to allof(cmpeq(x,y))

There's no further reasons to limit this to cmpeq-with-zero, the outstanding regressions with lowering to PTEST have now been addressed

Improves codegen for Issue #53379
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   6 +-
 .../test/CodeGen/X86/vector-compare-all_of.ll |  20 ++-
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 143 +++++++++---------
 3 files changed, 81 insertions(+), 88 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4360bc68ffaee..c68774a838a25 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42215,12 +42215,10 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
       EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
       Movmsk = DAG.getBitcast(MovmskVT, Match);
     } else {
-      // For all_of(setcc(vec,0,eq))
+      // For all_of(setcc(x,y,eq))
       // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD.
       // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()).
-      if (BinOp == ISD::AND &&
-          Match.getOpcode() == ISD::SETCC &&
-          ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) &&
+      if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC &&
           cast(Match.getOperand(2))->get() ==
               ISD::CondCode::SETEQ) {
         SDValue Vec = Match.getOperand(0);
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index 91fa60ef09d59..584b941681831 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1277,11 +1277,10 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; SSE-LABEL: bool_reduction_v16i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    psubb %xmm3, %xmm1
+; SSE-NEXT:    psubb %xmm2, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm0
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
@@ -1289,18 +1288,17 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: bool_reduction_v16i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index fa48b72490ebf..4be3ed18cf901 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1656,10 +1656,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
 ; SSE2-LABEL: icmp_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    movmskpd %xmm1, %eax
-; SSE2-NEXT:    cmpb $3, %al
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    cmpb $15, %al
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
@@ -1769,21 +1767,25 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
 }
 
 define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
-; SSE-NEXT:    packsswb %xmm0, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpb $-1, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v8i16_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i16_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    cmpb $-1, %al
+; AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1876,14 +1878,11 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
 ; SSE2-LABEL: icmp_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
-; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    packssdw %xmm3, %xmm1
-; SSE2-NEXT:    movmskps %xmm1, %eax
-; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    packsswb %xmm0, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpb $-1, %al
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
@@ -2022,32 +2021,40 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
 }
 
 define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v16i16_v16i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v16i16_v16i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm3, %xmm1
+; SSE41-NEXT:    psubb %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: icmp_v16i16_v16i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
@@ -2171,23 +2178,14 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
 ; SSE2-LABEL: icmp_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
-; SSE2-NEXT:    pand %xmm3, %xmm7
 ; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    packssdw %xmm3, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
-; SSE2-NEXT:    pand %xmm1, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    packssdw %xmm2, %xmm1
-; SSE2-NEXT:    packssdw %xmm3, %xmm1
-; SSE2-NEXT:    packsswb %xmm1, %xmm1
-; SSE2-NEXT:    pmovmskb %xmm1, %eax
-; SSE2-NEXT:    cmpb $-1, %al
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    packsswb %xmm2, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
@@ -2329,14 +2327,14 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
 define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
 ; SSE-LABEL: icmp_v32i16_v32i1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
-; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
-; SSE-NEXT:    packsswb %xmm3, %xmm2
-; SSE-NEXT:    pand %xmm0, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pand %xmm1, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
 ; SSE-NEXT:    cmpw $-1, %ax
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
@@ -2345,14 +2343,14 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
-; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    cmpw $-1, %ax
@@ -2362,11 +2360,10 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
 ;
 ; AVX2-LABEL: icmp_v32i16_v32i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From fd0a4bc76bd93d81ca3acecb2ce07513d64060be Mon Sep 17 00:00:00 2001
From: Craig Topper 
Date: Mon, 24 Jan 2022 16:56:29 -0800
Subject: [PATCH 476/946] [RISCV] Add missing space to 'clang-format on'
 directive. NFC

Without a space after the comment characters it seems to be ignored.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2ab9ab653328d..7baed2793e4e2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1729,7 +1729,7 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
     }
-    //clang-format on
+    // clang-format on
 
     MachineBasicBlock &MBB = *MI.getParent();
     MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))

From 15f7857412aebcaa0277803f8eafbd366e236820 Mon Sep 17 00:00:00 2001
From: Philip Reames 
Date: Sun, 23 Jan 2022 07:51:24 -0800
Subject: [PATCH 477/946] [tests] Refresh autogen tests for SLP

---
 .../SLPVectorizer/AArch64/64-bit-vector.ll    |  8 ++---
 .../SLPVectorizer/AArch64/slp-or-reduction.ll |  1 +
 .../SLPVectorizer/AArch64/spillcost-di.ll     | 28 ++++++++--------
 .../AMDGPU/add_sub_sat-inseltpoison.ll        |  8 ++---
 .../SLPVectorizer/AMDGPU/add_sub_sat.ll       |  8 ++---
 .../address-space-ptr-sze-gep-index-assert.ll | 28 ++++++++++------
 .../SLPVectorizer/PowerPC/aggregate.ll        |  2 +-
 .../Transforms/SLPVectorizer/X86/PR34635.ll   |  2 +-
 .../Transforms/SLPVectorizer/X86/PR36280.ll   |  4 +--
 .../Transforms/SLPVectorizer/X86/aggregate.ll |  2 +-
 .../X86/alternate-cast-inseltpoison.ll        | 16 +++++-----
 .../SLPVectorizer/X86/alternate-cast.ll       | 16 +++++-----
 .../SLPVectorizer/X86/arith-add-ssat.ll       | 28 ++++++++++------
 .../SLPVectorizer/X86/arith-sub-ssat.ll       | 28 ++++++++++------
 .../test/Transforms/SLPVectorizer/X86/call.ll | 13 +++-----
 .../X86/crash_netbsd_decompress.ll            |  8 ++---
 .../X86/crash_scheduling-inseltpoison.ll      |  4 +--
 .../SLPVectorizer/X86/crash_scheduling.ll     |  4 +--
 .../SLPVectorizer/X86/crash_vectorizeTree.ll  | 27 ++++++++++------
 .../SLPVectorizer/X86/debug_info.ll           | 30 ++++++++---------
 .../SLPVectorizer/X86/external_user.ll        |  2 +-
 .../SLPVectorizer/X86/extract_in_tree_user.ll |  6 ++--
 .../SLPVectorizer/X86/gep_mismatch.ll         |  2 +-
 .../SLPVectorizer/X86/hadd-inseltpoison.ll    | 16 +++++-----
 .../test/Transforms/SLPVectorizer/X86/hadd.ll | 16 +++++-----
 .../SLPVectorizer/X86/hsub-inseltpoison.ll    | 16 +++++-----
 .../test/Transforms/SLPVectorizer/X86/hsub.ll | 16 +++++-----
 .../SLPVectorizer/X86/insertvalue.ll          |  8 ++---
 .../Transforms/SLPVectorizer/X86/metadata.ll  | 13 ++++----
 .../SLPVectorizer/X86/multi_block.ll          |  6 ++--
 .../SLPVectorizer/X86/no_alternate_divrem.ll  | 32 +++++++++----------
 .../Transforms/SLPVectorizer/X86/pr16899.ll   |  6 ++--
 .../Transforms/SLPVectorizer/X86/reduction.ll | 10 +++---
 .../SLPVectorizer/X86/remark_listcost.ll      | 14 ++++----
 .../SLPVectorizer/X86/undef_vect.ll           | 10 +++---
 35 files changed, 235 insertions(+), 203 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
index ad970b2bec1bf..10883987aa758 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
@@ -21,14 +21,14 @@ define void @f(float* %r, float* %w) {
 ; NO_SLP-LABEL: @f(
 ; NO_SLP-NEXT:    [[R0:%.*]] = getelementptr inbounds float, float* [[R:%.*]], i64 0
 ; NO_SLP-NEXT:    [[R1:%.*]] = getelementptr inbounds float, float* [[R]], i64 1
-; NO_SLP-NEXT:    [[F0:%.*]] = load float, float* [[R0]]
-; NO_SLP-NEXT:    [[F1:%.*]] = load float, float* [[R1]]
+; NO_SLP-NEXT:    [[F0:%.*]] = load float, float* [[R0]], align 4
+; NO_SLP-NEXT:    [[F1:%.*]] = load float, float* [[R1]], align 4
 ; NO_SLP-NEXT:    [[ADD0:%.*]] = fadd float [[F0]], [[F0]]
 ; NO_SLP-NEXT:    [[ADD1:%.*]] = fadd float [[F1]], [[F1]]
 ; NO_SLP-NEXT:    [[W0:%.*]] = getelementptr inbounds float, float* [[W:%.*]], i64 0
 ; NO_SLP-NEXT:    [[W1:%.*]] = getelementptr inbounds float, float* [[W]], i64 1
-; NO_SLP-NEXT:    store float [[ADD0]], float* [[W0]]
-; NO_SLP-NEXT:    store float [[ADD1]], float* [[W1]]
+; NO_SLP-NEXT:    store float [[ADD0]], float* [[W0]], align 4
+; NO_SLP-NEXT:    store float [[ADD1]], float* [[W1]], align 4
 ; NO_SLP-NEXT:    ret void
 ;
   %r0 = getelementptr inbounds float, float* %r, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
index 185eb8a73ffb1..53126ee407e98 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
@@ -29,6 +29,7 @@ define i8 @reduce_or(%struct.buf* %a, %struct.buf* %b) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor <8 x i8> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP4]])
 ; CHECK-NEXT:    ret i8 [[TMP5]]
+;
 
 entry:
   %arrayidx = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
index 98a9fd482e872..39f2f885bc26b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
@@ -9,20 +9,20 @@ target triple = "aarch64"
 define void @patatino(i64 %n, i64 %i, %struct.S* %p) !dbg !7 {
 ; CHECK-LABEL: @patatino(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[N:%.*]], metadata !18, metadata !DIExpression()), !dbg !23
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata !19, metadata !DIExpression()), !dbg !24
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25
-; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata !21, metadata !DIExpression()), !dbg !27
-; CHECK-NEXT:    [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg !26
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg !26, !tbaa !29
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata !22, metadata !DIExpression()), !dbg !33
-; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34
-; CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg !36
-; CHECK-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg !36, !tbaa !29
-; CHECK-NEXT:    ret void, !dbg !37
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[N:%.*]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]]
+; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]]
+; CHECK-NEXT:    [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg [[DBG28:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg [[DBG26]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg [[DBG26]], !tbaa [[TBAA29:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33:![0-9]+]]
+; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg [[DBG34:![0-9]+]]
+; CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg [[DBG35:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg [[DBG36:![0-9]+]]
+; CHECK-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg [[DBG36]], !tbaa [[TBAA29]]
+; CHECK-NEXT:    ret void, !dbg [[DBG37:![0-9]+]]
 ;
 entry:
   call void @llvm.dbg.value(metadata i64 %n, metadata !18, metadata !DIExpression()), !dbg !23
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll
index f2c4b7e899c12..a86fd0e9b8693 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll
@@ -248,8 +248,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> 
 ; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
 ; GFX8-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
-; GFX8-NEXT:    [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> 
-; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2
+; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> 
+; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
 ; GFX8-NEXT:    ret <3 x i16> [[INS_2]]
 ;
 bb:
@@ -297,8 +297,8 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> 
 ; GFX8-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> 
 ; GFX8-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
-; GFX8-NEXT:    [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> 
-; GFX8-NEXT:    ret <4 x i16> [[INS_32]]
+; GFX8-NEXT:    [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> 
+; GFX8-NEXT:    ret <4 x i16> [[INS_31]]
 ;
 bb:
   %arg0.0 = extractelement <4 x i16> %arg0, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
index af1b7f42f9973..503e947ebeed2 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
@@ -248,8 +248,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> 
 ; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
 ; GFX8-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
-; GFX8-NEXT:    [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> 
-; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2
+; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> 
+; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
 ; GFX8-NEXT:    ret <3 x i16> [[INS_2]]
 ;
 bb:
@@ -297,8 +297,8 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> 
 ; GFX8-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> 
 ; GFX8-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
-; GFX8-NEXT:    [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> 
-; GFX8-NEXT:    ret <4 x i16> [[INS_32]]
+; GFX8-NEXT:    [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> 
+; GFX8-NEXT:    ret <4 x i16> [[INS_31]]
 ;
 bb:
   %arg0.0 = extractelement <4 x i16> %arg0, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
index 4f85482e3ae00..fc6347dcaa427 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
@@ -12,8 +12,8 @@ define void @slp_scev_assert(i32 %idx, i64 %tmp3) #0 {
 ; CHECK-NEXT:    [[TMP:%.*]] = addrspacecast i8 addrspace(5)* undef to i8*
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* undef, i32 [[IDX:%.*]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP]], i64 [[TMP3:%.*]]
-; CHECK-NEXT:    store i8 0, i8 addrspace(5)* [[TMP2]]
-; CHECK-NEXT:    store i8 0, i8* [[TMP4]]
+; CHECK-NEXT:    store i8 0, i8 addrspace(5)* [[TMP2]], align 1
+; CHECK-NEXT:    store i8 0, i8* [[TMP4]], align 1
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -39,8 +39,8 @@ define void @multi_as_reduction_different_sized(i32 addrspace(3)* %lds, i32 %idx
 ; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef
+; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -80,8 +80,8 @@ define void @multi_as_reduction_same_size(i32 addrspace(1)* %global, i64 %idx0,
 ; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_GLOBAL_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_GLOBAL_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef
+; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -122,8 +122,8 @@ define void @multi_as_reduction_different_sized_noncanon(i32 addrspace(3)* %lds,
 ; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef
+; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -148,9 +148,17 @@ bb:
   ret void
 }
 
-; CHECK-LABEL: slp_crash_on_addrspacecast
-; CHECK: ret void
 define void @slp_crash_on_addrspacecast() {
+; CHECK-LABEL: @slp_crash_on_addrspacecast(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
+; CHECK-NEXT:    [[P0:%.*]] = addrspacecast i64 addrspace(3)* [[TMP0]] to i64*
+; CHECK-NEXT:    store i64 undef, i64* [[P0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
+; CHECK-NEXT:    [[P1:%.*]] = addrspacecast i64 addrspace(3)* [[TMP1]] to i64*
+; CHECK-NEXT:    store i64 undef, i64* [[P1]], align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
   %p0 = addrspacecast i64 addrspace(3)* %0 to i64*
diff --git a/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
index 99af834e5b81e..974bddc4ba2cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
@@ -9,7 +9,7 @@ define { i64, i64 } @getS() {
 ; CHECK-LABEL: @getS(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i64, i64 } [[TMP2]], i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret { i64, i64 } [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
index daa68b12b3300..33be367b35311 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S -mcpu=corei7 | FileCheck %s
 
 define i32 @main() {
-; CHECK-LABEL: define {{[^@]+}}@main(
+; CHECK-LABEL: @main(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[T:%.*]] = alloca <8 x i32>, align 32
 ; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i32>* [[T]] to [8 x i32]*
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
index 1001468fd6f74..6968171f5db7c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
@@ -5,8 +5,8 @@ define float @jacobi(float* %p, float %x, float %y, float %z) {
 ; CHECK-LABEL: @jacobi(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr float, float* [[P:%.*]], i64 1
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, float* [[P]], i64 2
-; CHECK-NEXT:    [[P1:%.*]] = load float, float* [[GEP1]]
-; CHECK-NEXT:    [[P2:%.*]] = load float, float* [[GEP2]]
+; CHECK-NEXT:    [[P1:%.*]] = load float, float* [[GEP1]], align 4
+; CHECK-NEXT:    [[P2:%.*]] = load float, float* [[GEP2]], align 4
 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[P1]], [[X:%.*]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = fmul float [[P2]], [[Y:%.*]]
 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[MUL1]], [[Z:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
index f270dbf4f78e7..1af4154a8f338 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
@@ -9,7 +9,7 @@ define { i64, i64 } @getS() {
 ; CHECK-LABEL: @getS(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i64, i64 } [[TMP2]], i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret { i64, i64 } [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
index 44d0f17182694..2416d100f653f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
@@ -165,8 +165,8 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> 
 ; CHECK-NEXT:    [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> 
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> 
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
@@ -209,12 +209,12 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16
 ; CHECK-NEXT:    [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> 
-; CHECK-NEXT:    [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> 
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> 
-; CHECK-NEXT:    [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> 
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> 
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> 
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> 
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> 
+; CHECK-NEXT:    [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> 
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> 
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> 
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
index 5b537ac6683f4..7d44d988143f9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
@@ -165,8 +165,8 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> 
 ; CHECK-NEXT:    [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> 
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> 
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
@@ -209,12 +209,12 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16
 ; CHECK-NEXT:    [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> 
-; CHECK-NEXT:    [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> 
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> 
-; CHECK-NEXT:    [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> 
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> 
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> 
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> 
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> 
+; CHECK-NEXT:    [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> 
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> 
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> 
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
index a94439f348702..66154522c327d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
@@ -96,6 +96,24 @@ define void @add_v8i64() {
 ; SLM-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
 ; SLM-NEXT:    ret void
 ;
+; AVX-LABEL: @add_v8i64(
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP3]])
+; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP2]], <4 x i64> [[TMP4]])
+; AVX-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    ret void
+;
+; AVX512-LABEL: @add_v8i64(
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    ret void
+;
 ; AVX1-LABEL: @add_v8i64(
 ; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
@@ -114,7 +132,6 @@ define void @add_v8i64() {
 ; AVX1-NEXT:    store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
 ; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
 ; AVX1-NEXT:    ret void
-;
 ; AVX2-LABEL: @add_v8i64(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -125,14 +142,6 @@ define void @add_v8i64() {
 ; AVX2-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX2-NEXT:    ret void
-;
-; AVX512-LABEL: @add_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
-; AVX512-NEXT:    ret void
-;
 ; AVX256BW-LABEL: @add_v8i64(
 ; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -143,7 +152,6 @@ define void @add_v8i64() {
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    ret void
-;
   %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
   %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
   %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
index 3c23d0fd75f4b..88f18cba2b2ee 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
@@ -96,6 +96,24 @@ define void @sub_v8i64() {
 ; SLM-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
 ; SLM-NEXT:    ret void
 ;
+; AVX-LABEL: @sub_v8i64(
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP3]])
+; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP2]], <4 x i64> [[TMP4]])
+; AVX-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    ret void
+;
+; AVX512-LABEL: @sub_v8i64(
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    ret void
+;
 ; AVX1-LABEL: @sub_v8i64(
 ; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
@@ -114,7 +132,6 @@ define void @sub_v8i64() {
 ; AVX1-NEXT:    store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
 ; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
 ; AVX1-NEXT:    ret void
-;
 ; AVX2-LABEL: @sub_v8i64(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -125,14 +142,6 @@ define void @sub_v8i64() {
 ; AVX2-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX2-NEXT:    ret void
-;
-; AVX512-LABEL: @sub_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
-; AVX512-NEXT:    ret void
-;
 ; AVX256BW-LABEL: @sub_v8i64(
 ; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -143,7 +152,6 @@ define void @sub_v8i64() {
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    ret void
-;
   %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
   %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
   %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 5cca9363802b4..a0dece0a58b47 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -125,8 +125,8 @@ define void @sqrt_libm_errno(double* %a, double* %b) {
 ; CHECK-NEXT:    [[A0:%.*]] = load double, double* [[A:%.*]], align 8
 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
 ; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDX1]], align 8
-; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #2
-; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #2
+; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR2]]
 ; CHECK-NEXT:    store double [[SQRT1]], double* [[B:%.*]], align 8
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
 ; CHECK-NEXT:    store double [[SQRT2]], double* [[IDX2]], align 8
@@ -149,8 +149,8 @@ define void @round_custom(i64* %a, i64* %b) {
 ; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
 ; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
-; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #3
-; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #3
+; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR3]]
 ; CHECK-NEXT:    store i64 [[ROUND1]], i64* [[B:%.*]], align 8
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
 ; CHECK-NEXT:    store i64 [[ROUND2]], i64* [[IDX2]], align 8
@@ -168,10 +168,5 @@ define void @round_custom(i64* %a, i64* %b) {
 }
 
 
-; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) [[ATTR0:#[0-9]+]]
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) [[ATTR0]]
-; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]]
-; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]]
 
-; CHECK: attributes [[ATTR0]] = { nofree nosync nounwind readnone speculatable willreturn }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
index de17d44f39a04..7e40a49fb09e9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -15,8 +15,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 define i32 @fn1() {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 0), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_DSTATE:%.*]], %struct.DState* @b, i32 0, i32 0), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_DSTATE]], %struct.DState* @b, i32 0, i32 1), align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* @d, align 4
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[COND]], label [[SW_BB:%.*]], label [[SAVE_STATE_AND_RETURN:%.*]]
@@ -33,8 +33,8 @@ define i32 @fn1() {
 ; CHECK:       save_state_and_return:
 ; CHECK-NEXT:    [[T_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP0]], [[SW_BB]] ], [ [[TMP0]], [[SW_BB]] ]
 ; CHECK-NEXT:    [[F_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[TMP1]], [[ENTRY]] ], [ 0, [[SW_BB]] ], [ 0, [[SW_BB]] ]
-; CHECK-NEXT:    store i32 [[T_0]], i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 0), align 4
-; CHECK-NEXT:    store i32 [[F_0]], i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 1), align 4
+; CHECK-NEXT:    store i32 [[T_0]], i32* getelementptr inbounds ([[STRUCT_DSTATE]], %struct.DState* @b, i32 0, i32 0), align 4
+; CHECK-NEXT:    store i32 [[F_0]], i32* getelementptr inbounds ([[STRUCT_DSTATE]], %struct.DState* @b, i32 0, i32 1), align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index 348aed4a48e5a..f529d0fc47338 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -23,11 +23,11 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[T_0259]], i32 0
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, [[TBAA0:!tbaa !.*]]
+; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[P3_ADDR_0258]], i32 0
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, [[TBAA0]]
+; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
 ; CHECK-NEXT:    [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index 8e93552ff3663..9265ca1731a09 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -23,11 +23,11 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[T_0259]], i32 0
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa !0
+; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[P3_ADDR_0258]], i32 0
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa !0
+; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
 ; CHECK-NEXT:    [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
index d646180f36d98..c1a67ab415fea 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -25,27 +25,36 @@ define void @bar() {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1
 ; CHECK-NEXT:    br label [[TMP7:%.*]]
-; CHECK:         [[TMP8:%.*]] = phi <2 x double> [ , [[TMP0]] ], [ [[TMP11:%.*]], [[TMP21:%.*]] ], [ [[TMP11]], [[TMP18:%.*]] ], [ [[TMP11]], [[TMP18]] ]
+; CHECK:       7:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x double> [ , [[TMP0]] ], [ [[TMP11:%.*]], [[TMP21:%.*]] ], [ [[TMP11]], [[TMP18:%.*]] ], [ [[TMP11]], [[TMP18]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
 ; CHECK-NEXT:    [[TMP11]] = load <2 x double>, <2 x double>* [[TMP10]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[TMP12:%.*]], label [[TMP13:%.*]]
-; CHECK:         ret void
-; CHECK:         [[TMP14:%.*]] = bitcast double* [[TMP5]] to <2 x double>*
+; CHECK:       12:
+; CHECK-NEXT:    ret void
+; CHECK:       13:
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[TMP5]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[TMP15:%.*]], label [[TMP16:%.*]]
-; CHECK:         br label [[TMP16]]
-; CHECK:         br i1 undef, label [[TMP17:%.*]], label [[TMP18]]
-; CHECK:         unreachable
-; CHECK:         [[TMP19:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
+; CHECK:       15:
+; CHECK-NEXT:    br label [[TMP16]]
+; CHECK:       16:
+; CHECK-NEXT:    br i1 undef, label [[TMP17:%.*]], label [[TMP18]]
+; CHECK:       17:
+; CHECK-NEXT:    unreachable
+; CHECK:       18:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
 ; CHECK-NEXT:    switch i32 undef, label [[TMP21]] [
 ; CHECK-NEXT:    i32 32, label [[TMP7]]
 ; CHECK-NEXT:    i32 103, label [[TMP7]]
 ; CHECK-NEXT:    ]
-; CHECK:         br i1 undef, label [[TMP7]], label [[TMP22:%.*]]
-; CHECK:         unreachable
+; CHECK:       21:
+; CHECK-NEXT:    br i1 undef, label [[TMP7]], label [[TMP22:%.*]]
+; CHECK:       22:
+; CHECK-NEXT:    unreachable
 ;
   %1 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 0
   %2 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
index 25ff6b6ba702a..c717c5d10b30c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -16,24 +16,24 @@ target triple = "x86_64-apple-macosx10.7.0"
 define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
 ; CHECK-LABEL: @depth(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata double* [[A:%.*]], metadata !12, metadata !DIExpression()), !dbg !18
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[M:%.*]], metadata !13, metadata !DIExpression()), !dbg !18
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 0.000000e+00, metadata !14, metadata !DIExpression()), !dbg !19
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 2.000000e-01, metadata !15, metadata !DIExpression()), !dbg !19
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 0, metadata !16, metadata !DIExpression()), !dbg !20
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg !20
-; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg !20
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double* [[A:%.*]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[M:%.*]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 0.000000e+00, metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 2.000000e-01, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 0, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg [[DBG20]]
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg [[DBG20]]
 ; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 4, !dbg !21
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*, !dbg !21
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !dbg !21
-; CHECK-NEXT:    br label [[FOR_END]], !dbg !20
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 4, !dbg [[DBG21:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*, !dbg [[DBG21]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !dbg [[DBG21]]
+; CHECK-NEXT:    br label [[FOR_END]], !dbg [[DBG20]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ , [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 8, !dbg !23
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*, !dbg !23
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8, !dbg !23
-; CHECK-NEXT:    ret i32 undef, !dbg !24
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 8, !dbg [[DBG23:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*, !dbg [[DBG23]]
+; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8, !dbg [[DBG23]]
+; CHECK-NEXT:    ret i32 undef, !dbg [[DBG24:![0-9]+]]
 ;
 entry:
   tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
index 9f14ff88cc816..012ba4da88033 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -82,7 +82,7 @@ define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[D:%.*]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[C:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[C:%.*]], align 4
 ; CHECK-NEXT:    [[SUB:%.*]] = fsub float 0.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUB]], 0.000000e+00
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[CONV]], [[MUL]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index cbf6d8ff4fa95..38b2b97a23cd0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -111,9 +111,9 @@ define void @externally_used_ptrs() {
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
 ; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
index f9b9995066a22..409e2e71d090a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
@@ -13,7 +13,7 @@ define void @foo() {
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[LS1_PH:%.*]] = phi float* [ [[_TMP1:%.*]], [[BB1]] ], [ undef, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[LS2_PH:%.*]] = phi float* [ [[_TMP2:%.*]], [[BB1]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    store float undef, float* [[LS1_PH]]
+; CHECK-NEXT:    store float undef, float* [[LS1_PH]], align 4
 ; CHECK-NEXT:    [[_TMP1]] = getelementptr float, float* [[LS1_PH]], i32 1
 ; CHECK-NEXT:    [[_TMP2]] = getelementptr float, float* [[LS2_PH]], i64 4
 ; CHECK-NEXT:    br i1 false, label [[BB1]], label [[BB2:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
index bc6eeb1cdd7fd..8db775a352246 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> 
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> 
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> 
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
index b9d55ecb27ebe..3f332c14126b5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> 
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> 
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> 
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
index 3d165eb00b90d..c123956b8856e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> 
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> 
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> 
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
index 9ff8142b269df..ffd320d34e869 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> 
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> 
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> 
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> 
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> 
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> 
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> 
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> 
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
index 7974bce183d45..ed58f407628cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
@@ -275,12 +275,12 @@ define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudov
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
-; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
+; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT0]], float [[TMP6]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
-; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
+; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT1]], float [[TMP7]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
-; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
-; CHECK-NEXT:    store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
+; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT2]], float [[TMP8]], 3
+; CHECK-NEXT:    store [[PSEUDOVEC]] [[C_STRUCT3]], %pseudovec* [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
index 7956ab3411e64..5bfd4f0f5fcf7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -8,12 +8,12 @@ define void @test1(double* %a, double* %b, double* %c) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !4
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa !0
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -35,13 +35,13 @@ define void @test2(double* %a, double* %b, i8* %e) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !5
 ; CHECK-NEXT:    [[C:%.*]] = bitcast i8* [[E:%.*]] to double*
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa !0
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -61,7 +61,6 @@ entry:
   ret void
 }
 
-;CHECK-DAG: !0 = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
 ;CHECK-DAG: !4 = !{float 5.000000e+00}
 ;CHECK-DAG: !5 = !{float 2.500000e+00}
 !0 = !{ float 5.0 }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
index b0cc02b649f04..1b224cb4109c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -24,9 +24,11 @@ define i32 @bar(double* nocapture %A, i32 %d) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[D:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP7:%.*]], label [[TMP5:%.*]]
-; CHECK:         [[TMP6:%.*]] = tail call i32 (...) @foo()
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 (...) @foo()
 ; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:         [[TMP8:%.*]] = fadd <2 x float> [[TMP3]], 
+; CHECK:       7:
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x float> [[TMP3]], 
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, double* [[A]], i64 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double>
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index 468cabc598f1c..c0ed976fd702f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -12,10 +12,10 @@ define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1
 ; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
 ; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
-; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]]
-; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]]
-; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]]
-; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]]
+; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]], align 4
 ; CHECK-NEXT:    [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
 ; CHECK-NEXT:    [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
 ; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
@@ -24,10 +24,10 @@ define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[RES1:%.*]] = add nsw i32 [[V1]], [[Y1]]
 ; CHECK-NEXT:    [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
 ; CHECK-NEXT:    [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
-; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]]
-; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]]
-; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]]
-; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]]
+; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]], align 4
+; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]], align 4
+; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]], align 4
+; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -76,10 +76,10 @@ define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1
 ; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
 ; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
-; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]]
-; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]]
-; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]]
-; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]]
+; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]], align 4
 ; CHECK-NEXT:    [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
 ; CHECK-NEXT:    [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
 ; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
@@ -88,10 +88,10 @@ define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[RES1:%.*]] = urem i32 [[V1]], [[Y1]]
 ; CHECK-NEXT:    [[RES2:%.*]] = urem i32 [[V2]], [[Y2]]
 ; CHECK-NEXT:    [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
-; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]]
-; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]]
-; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]]
-; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]]
+; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]], align 4
+; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]], align 4
+; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]], align 4
+; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
index ceb48d98b744d..42c99c21c3a30 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -9,10 +9,10 @@ target triple = "i386--netbsd"
 define i32 @fn1() #0 {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @a, align 4, !tbaa !0
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa !4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @a, align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4, !tbaa !4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]]
 ; CHECK-NEXT:    br label [[DO_BODY:%.*]]
 ; CHECK:       do.body:
 ; CHECK-NEXT:    [[C_0:%.*]] = phi i32 [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[DO_BODY]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index 9132cf26c2218..681d30dff4a86 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -81,11 +81,11 @@ define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) {
 ; CHECK-NEXT:    [[T4:%.*]] = load i32, i32* [[X4]], align 4
 ; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[X5]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[T4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[T4]]
-; CHECK-NEXT:    [[C012345:%.*]] = icmp sgt i32 [[TMP5]], [[T5]]
-; CHECK-NEXT:    [[T17:%.*]] = select i1 [[C012345]], i32 [[TMP5]], i32 [[T5]]
-; CHECK-NEXT:    [[THREE_OR_FOUR:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; CHECK-NEXT:    [[MAX_ROOT_CMP:%.*]] = icmp sgt i32 [[TMP3]], [[T4]]
+; CHECK-NEXT:    [[MAX_ROOT_SEL:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 [[TMP3]], i32 [[T4]]
+; CHECK-NEXT:    [[C012345:%.*]] = icmp sgt i32 [[MAX_ROOT_SEL]], [[T5]]
+; CHECK-NEXT:    [[T17:%.*]] = select i1 [[C012345]], i32 [[MAX_ROOT_SEL]], i32 [[T5]]
+; CHECK-NEXT:    [[THREE_OR_FOUR:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 3, i32 4
 ; CHECK-NEXT:    store i32 [[THREE_OR_FOUR]], i32* [[P:%.*]], align 8
 ; CHECK-NEXT:    ret i32 [[T17]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
index ed2cd870cf6f0..930315a55dc08 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
@@ -5,21 +5,23 @@
 define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
 ; CHECK-LABEL: @vsub2_test(
 ; CHECK-NEXT:    br label [[TMP1:%.*]]
-; CHECK:         [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ]
+; CHECK:       1:
+; CHECK-NEXT:    [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[PO_03:%.*]] = phi i32* [ [[POUT:%.*]], [[TMP0]] ], [ [[TMP7:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[PTMPI2_02:%.*]] = phi i32* [ [[PIN2:%.*]], [[TMP0]] ], [ [[TMP4:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[PTMPI1_01:%.*]] = phi i32* [ [[PIN1:%.*]], [[TMP0]] ], [ [[TMP2:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[TMP2]] = getelementptr inbounds i32, i32* [[PTMPI1_01]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[PTMPI1_01]], align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[PTMPI1_01]], align 4, !tbaa [[TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4]] = getelementptr inbounds i32, i32* [[PTMPI2_02]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[PTMPI2_02]], align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[PTMPI2_02]], align 4, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i32, i32* [[PO_03]], i64 1
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[PO_03]], align 4, !tbaa !1
+; CHECK-NEXT:    store i32 [[TMP6]], i32* [[PO_03]], align 4, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[IDX_04]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP8]], 64
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop !5
-; CHECK:         ret void
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       9:
+; CHECK-NEXT:    ret void
 ;
   br label %1
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
index 3b5a53e44dec1..fefbbf1c75fd3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
@@ -17,11 +17,11 @@ define void @_Z2azv() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], undef
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 undef
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[TMP4]], i32 [[OP_EXTRA]], i32 undef
-; CHECK-NEXT:    [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA1]]
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP2]], undef
+; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP2]], i32 undef
+; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = icmp sgt i32 [[OP_EXTRA1]], undef
+; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = select i1 [[OP_EXTRA2]], i32 [[OP_EXTRA1]], i32 undef
+; CHECK-NEXT:    [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA3]]
 ; CHECK-NEXT:    [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], undef
 ; CHECK-NEXT:    ret void
 ;

From 6693c562f90925dd5214d86875a6f82ca5c6ef93 Mon Sep 17 00:00:00 2001
From: wlei 
Date: Mon, 24 Jan 2022 16:55:05 -0800
Subject: [PATCH 478/946] [llvm-profgen] Support to load debug info from a
 second binary

For reducing binary size purpose, the binary's debug info and executable segment can be separated(like using objcopy --only-keep-debug). Here add support in llvm-profgen to use two binaries as input. The original one is executable binary and added for debug info only binary. Adding a flag `--debug-binary=file-path`, with this, the binary will load debug info from debug binary.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D115948
---
 .../separate-debuginfo-binary.test            | 57 +++++++++++++++++++
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    | 19 +++++--
 llvm/tools/llvm-profgen/ProfiledBinary.h      | 12 +++-
 llvm/tools/llvm-profgen/llvm-profgen.cpp      | 14 +++--
 4 files changed, 90 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test

diff --git a/llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test b/llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test
new file mode 100644
index 0000000000000..3daeac33471ce
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test
@@ -0,0 +1,57 @@
+; RUN: llvm-objcopy --strip-debug %S/Inputs/inline-noprobe.perfbin %t1
+; RUN: llvm-objcopy --only-keep-debug %S/Inputs/inline-noprobe.perfbin %t2
+; RUN: echo -e "0\n0" > %t
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%t1 --debug-binary=%t2 --output=%t3 --fill-zero-for-all-funcs
+; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK
+
+; RUN: llvm-objcopy --strip-debug %S/Inputs/inline-cs-pseudoprobe.perfbin %t4
+; RUN: llvm-objcopy --only-keep-debug %S/Inputs/inline-cs-pseudoprobe.perfbin %t5
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%t4 --debug-binary=%t5 --output=%t6 --profile-summary-hot-count=0 --csspgo-preinliner=0
+; RUN: FileCheck %s --input-file %t6 --check-prefix=CHECK-CS-PROBE
+
+; CHECK: bar:0:0
+; CHECK:  1: 0
+; CHECK:  5: 0
+; CHECK: foo:0:0
+; CHECK:  0: 0
+; CHECK:  2.1: 0
+; CHECK:  3: 0
+; CHECK:  3.2: 0
+; CHECK:  4: 0
+; CHECK:  3.1: bar:0
+; CHECK:   1: 0
+; CHECK:   65533: 0
+; CHECK:  3.2: bar:0
+; CHECK:   1: 0
+; CHECK:   7: 0
+; CHECK: main:0:0
+; CHECK:  0: 0
+; CHECK:  2: 0
+; CHECK:  1: foo:0
+; CHECK:   2.1: 0
+; CHECK:   3: 0
+; CHECK:   3.2: 0
+; CHECK:   4: 0
+; CHECK:   65526: 0
+; CHECK:   3.1: bar:0
+; CHECK:    1: 0
+; CHECK:    65533: 0
+; CHECK:   3.2: bar:0
+; CHECK:    1: 0
+
+
+; CHECK-CS-PROBE: [main:2 @ foo]:74:0
+; CHECK-CS-PROBE:   1: 0
+; CHECK-CS-PROBE:   2: 15
+; CHECK-CS-PROBE:   3: 15
+; CHECK-CS-PROBE:   4: 14
+; CHECK-CS-PROBE:   5: 1
+; CHECK-CS-PROBE:   6: 15
+; CHECK-CS-PROBE:   7: 0
+; CHECK-CS-PROBE:   8: 14 bar:14
+; CHECK-CS-PROBE:   9: 0
+; CHECK-CS-PROBE: !CFGChecksum: 563088904013236
+; CHECK-CS-PROBE: [main:2 @ foo:8 @ bar]:28:14
+; CHECK-CS-PROBE:   1: 14
+; CHECK-CS-PROBE:   4: 14
+; CHECK-CS-PROBE: !CFGChecksum: 72617220756
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 6dc0d2604367d..a773a3c98d409 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -187,9 +187,9 @@ void ProfiledBinary::warnNoFuncEntry() {
 void ProfiledBinary::load() {
   // Attempt to open the binary.
   OwningBinary OBinary = unwrapOrError(createBinary(Path), Path);
-  Binary &Binary = *OBinary.getBinary();
+  Binary &ExeBinary = *OBinary.getBinary();
 
-  auto *Obj = dyn_cast(&Binary);
+  auto *Obj = dyn_cast(&ExeBinary);
   if (!Obj)
     exitWithError("not a valid Elf image", Path);
 
@@ -206,7 +206,15 @@ void ProfiledBinary::load() {
   decodePseudoProbe(Obj);
 
   // Load debug info of subprograms from DWARF section.
-  loadSymbolsFromDWARF(*cast(&Binary));
+  // If path of debug info binary is specified, use the debug info from it,
+  // otherwise use the debug info from the executable binary.
+  if (!DebugBinaryPath.empty()) {
+    OwningBinary DebugPath =
+        unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
+    loadSymbolsFromDWARF(*dyn_cast(DebugPath.getBinary()));
+  } else {
+    loadSymbolsFromDWARF(*dyn_cast(&ExeBinary));
+  }
 
   // Disassemble the text sections.
   disassemble(Obj);
@@ -684,8 +692,9 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
          "Binary should only symbolize its own instruction");
   auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(),
                                        object::SectionedAddress::UndefSection};
-  DIInliningInfo InlineStack =
-      unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());
+  DIInliningInfo InlineStack = unwrapOrError(
+      Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
+      SymbolizerPath);
 
   SampleContextFrameVector CallStack;
   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index b5c985fe6ebbc..d3d1c6f1fd248 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -185,8 +185,12 @@ class BinarySizeContextTracker {
 using OffsetRange = std::pair;
 
 class ProfiledBinary {
-  // Absolute path of the binary.
+  // Absolute path of the executable binary.
   std::string Path;
+  // Path of the debug info binary.
+  std::string DebugBinaryPath;
+  // Path of symbolizer path which should be pointed to binary with debug info.
+  StringRef SymbolizerPath;
   // The target triple.
   Triple TheTriple;
   // The runtime base address that the first executable segment is loaded at.
@@ -311,10 +315,12 @@ class ProfiledBinary {
   void load();
 
 public:
-  ProfiledBinary(const StringRef Path)
-      : Path(Path), ProEpilogTracker(this),
+  ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath)
+      : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
         TrackFuncContextSize(EnableCSPreInliner &&
                              UseContextCostForPreInliner) {
+    // Point to executable binary if debug info binary is not specified.
+    SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
     setupSymbolizer();
     load();
   }
diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp
index 0ab93ca0bd65b..f092df04d52b3 100644
--- a/llvm/tools/llvm-profgen/llvm-profgen.cpp
+++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp
@@ -48,9 +48,15 @@ static cl::opt UnsymbolizedProfFilename(
 static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"),
                      cl::aliasopt(UnsymbolizedProfFilename));
 
-static cl::opt BinaryPath(
-    "binary", cl::value_desc("binary"), cl::Required,
-    cl::desc("Path of profiled binary, only one binary is supported."),
+static cl::opt
+    BinaryPath("binary", cl::value_desc("binary"), cl::Required,
+               cl::desc("Path of profiled executable binary."),
+               cl::cat(ProfGenCategory));
+
+static cl::opt DebugBinPath(
+    "debug-binary", cl::value_desc("debug-binary"), cl::ZeroOrMore,
+    cl::desc("Path of debug info binary, llvm-profgen will load the DWARF info "
+             "from it instead of the executable binary."),
     cl::cat(ProfGenCategory));
 
 extern cl::opt ShowDisassemblyOnly;
@@ -135,7 +141,7 @@ int main(int argc, const char *argv[]) {
 
   // Load symbols and disassemble the code of a given binary.
   std::unique_ptr Binary =
-      std::make_unique(BinaryPath);
+      std::make_unique(BinaryPath, DebugBinPath);
   if (ShowDisassemblyOnly)
     return EXIT_SUCCESS;
 

From 8b29b84c99ac8140c9820fd34b733bdedf5bb0f5 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer 
Date: Sat, 22 Jan 2022 15:13:13 -0500
Subject: [PATCH 479/946] [libc++] Fix LWG3422 "Issues of seed_seq's
 constructors"

https://cplusplus.github.io/LWG/issue3422

Also add a static_assert to check the "Mandates:" on the
iterator-pair constructor. Oddly, the `InputIterator` parameter
itself is merely preconditioned, not constrained, to satisfy the
input iterator requirements.

Also drive-by rename `init` to `__init`.

Differential Revision: https://reviews.llvm.org/D117962
---
 libcxx/docs/Status/Cxx2bIssues.csv            |  2 +-
 libcxx/include/__random/seed_seq.h            | 31 ++++++++------
 .../rand.util.seedseq/default.pass.cpp        |  3 ++
 .../rand.util.seedseq/iterator.pass.cpp       | 42 ++++++++++++++++++-
 .../rand.util.seedseq/iterator.verify.cpp     | 30 +++++++++++++
 5 files changed, 93 insertions(+), 15 deletions(-)
 create mode 100644 libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 9baefabe05da8..0ce504816fe3b 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -108,7 +108,7 @@
 `3361 `__,"``safe_range`` case","October 2021","","","|ranges|"
 `3392 `__,"``ranges::distance()`` cannot be used on a move-only iterator with a sized sentinel","October 2021","","","|ranges|"
 `3407 `__,"Some problems with the wording changes of P1739R4","October 2021","","","|ranges|"
-`3422 `__,"Issues of ``seed_seq``'s constructors","October 2021","",""
+`3422 `__,"Issues of ``seed_seq``'s constructors","October 2021","|Complete|","14.0"
 `3470 `__,"``convertible-to-non-slicing`` seems to reject valid case","October 2021","","","|ranges|"
 `3480 `__,"``directory_iterator`` and ``recursive_directory_iterator`` are not C++20 ranges","October 2021","|Complete|","14.0","|ranges|"
 `3498 `__,"Inconsistent ``noexcept``-specifiers for ``basic_syncbuf``","October 2021","",""
diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h
index bf27af6627a54..1a0877995650e 100644
--- a/libcxx/include/__random/seed_seq.h
+++ b/libcxx/include/__random/seed_seq.h
@@ -31,25 +31,24 @@ class _LIBCPP_TEMPLATE_VIS seed_seq
     // types
     typedef uint32_t result_type;
 
-private:
-    vector __v_;
-
-    template
-        void init(_InputIterator __first, _InputIterator __last);
-public:
     // constructors
     _LIBCPP_INLINE_VISIBILITY
     seed_seq() _NOEXCEPT {}
 #ifndef _LIBCPP_CXX03_LANG
-    template
-        _LIBCPP_INLINE_VISIBILITY
-        seed_seq(initializer_list<_Tp> __il) {init(__il.begin(), __il.end());}
+    template::value>* = nullptr>
+    _LIBCPP_INLINE_VISIBILITY
+    seed_seq(initializer_list<_Tp> __il) {
+        __init(__il.begin(), __il.end());
+    }
 #endif // _LIBCPP_CXX03_LANG
 
     template
-        _LIBCPP_INLINE_VISIBILITY
-        seed_seq(_InputIterator __first, _InputIterator __last)
-             {init(__first, __last);}
+    _LIBCPP_INLINE_VISIBILITY
+    seed_seq(_InputIterator __first, _InputIterator __last) {
+        static_assert(is_integral::value_type>::value,
+            "Mandates: iterator_traits::value_type is an integer type");
+        __init(__first, __last);
+    }
 
     // generating functions
     template
@@ -68,11 +67,17 @@ class _LIBCPP_TEMPLATE_VIS seed_seq
 
     _LIBCPP_INLINE_VISIBILITY
     static result_type _Tp(result_type __x) {return __x ^ (__x >> 27);}
+
+private:
+    template
+    void __init(_InputIterator __first, _InputIterator __last);
+
+    vector __v_;
 };
 
 template
 void
-seed_seq::init(_InputIterator __first, _InputIterator __last)
+seed_seq::__init(_InputIterator __first, _InputIterator __last)
 {
     for (_InputIterator __s = __first; __s != __last; ++__s)
         __v_.push_back(*__s & 0xFFFFFFFF);
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
index 33f855bab30c3..c99d5e276b2b5 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
@@ -19,8 +19,11 @@
 
 int main(int, char**)
 {
+  ASSERT_NOEXCEPT(std::seed_seq());
+  {
     std::seed_seq s;
     assert(s.size() == 0);
+  }
 
   return 0;
 }
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
index 2e2c6365eb4f0..1dd9a055f7ca3 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
@@ -18,11 +18,13 @@
 
 #include "test_macros.h"
 
-int main(int, char**)
+void test()
 {
+  {
     unsigned a[5] = {5, 4, 3, 2, 1};
     std::seed_seq s(a, a+5);
     assert(s.size() == 5);
+
     unsigned b[5] = {0};
     s.param(b);
     assert(b[0] == 5);
@@ -30,6 +32,44 @@ int main(int, char**)
     assert(b[2] == 3);
     assert(b[3] == 2);
     assert(b[4] == 1);
+  }
+  {
+    // Test truncation to 32 bits
+    unsigned long long a[4] = {
+      0x1234000056780000uLL,
+      0x0000001234567800uLL,
+      0xFFFFFFFFFFFFFFFFuLL,
+      0x0000000180000000uLL,
+    };
+    std::seed_seq s(a, a+4);
+    assert(s.size() == 4);
+
+    unsigned b[4] = {0};
+    s.param(b);
+    assert(b[0] == 0x56780000u);
+    assert(b[1] == 0x34567800u);
+    assert(b[2] == 0xFFFFFFFFu);
+    assert(b[3] == 0x80000000u);
+  }
+#if TEST_STD_VER >= 11
+  {
+    // Test uniform initialization syntax (LWG 3422)
+    unsigned a[3] = {1, 2, 3};
+    std::seed_seq s{a, a+3};  // uniform initialization
+    assert(s.size() == 3);
+
+    unsigned b[3] = {0};
+    s.param(b);
+    assert(b[0] == 1);
+    assert(b[1] == 2);
+    assert(b[2] == 3);
+  }
+#endif // TEST_STD_VER >= 11
+}
+
+int main(int, char**)
+{
+  test();
 
   return 0;
 }
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp
new file mode 100644
index 0000000000000..d5c57841c8fa2
--- /dev/null
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// 
+
+// class seed_seq;
+
+// template
+//   seed_seq(InputIterator begin, InputIterator end);
+// Mandates: iterator_traits::value_type is an integer type.
+
+#include 
+
+void test()
+{
+  {
+    bool a[2] = {true, false};
+    std::seed_seq s(a, a+2); // OK
+  }
+  {
+    double a[2] = {1, 2};
+    std::seed_seq s(a, a+2); // expected-error@*:* {{Mandates: iterator_traits::value_type is an integer type}}
+        // expected-error@*:* {{invalid operands to binary expression ('double' and 'unsigned int')}}
+  }
+}

From 16bff06790a7652b282352b07250b627e5787c8c Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere 
Date: Mon, 24 Jan 2022 17:18:18 -0800
Subject: [PATCH 480/946] [lldb] Make PythonDataObjects work with Python 2

I considered keeping this change strictly downstream. Since we still
have a bunch of places that check for Python 2, I figured it doesn't
harm to land it upstream and avoid the conflict when I eventually do
remove them (hopefully soon!).
---
 .../Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 32020f983f605..68f4e90d70f6e 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -70,7 +70,9 @@ Expected python::As(Expected &&obj) {
 }
 
 static bool python_is_finalizing() {
-#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7
+#if PY_MAJOR_VERSION == 2
+  return false;
+#elif PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7
   return _Py_Finalizing != nullptr;
 #else
   return _Py_IsFinalizing();
@@ -279,7 +281,9 @@ PythonObject PythonObject::GetAttributeValue(llvm::StringRef attr) const {
 }
 
 StructuredData::ObjectSP PythonObject::CreateStructuredObject() const {
+#if PY_MAJOR_VERSION >= 3
   assert(PyGILState_Check());
+#endif
   switch (GetObjectType()) {
   case PyObjectType::Dictionary:
     return PythonDictionary(PyRefType::Borrowed, m_py_obj)

From 06cfdd5224bf5496e3d3dbdb9f73e77161ad5438 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Mon, 24 Jan 2022 14:02:20 -0500
Subject: [PATCH 481/946] [OpenMP][Fix] Properly inherit calling convention

Previously in OpenMPOpt we did not correctly inherit the calling
convention of the callee when creating new OpenMP runtime calls. This
created issues when the calling convention was changed during
`GlobalOpt` but a new call was creating without the correct calling
convention. This lead to the call being replaced with a poison value in
`InstCombine` due to undefined behaviour and causing large portions of
the program to be incorrectly eliminated. This patch correctly inherits
the existing calling convention from the callee.

Reviewed By: tianshilei1992, jdoerfert

Differential Revision: https://reviews.llvm.org/D118059
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp      | 57 +++++++++++++++-------
 llvm/test/Transforms/OpenMP/spmdization.ll |  6 ++-
 2 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 416c3b8b01d40..87a1f7f7045ab 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -417,6 +417,12 @@ struct OMPInformationCache : public InformationCache {
       recollectUsesForFunction(static_cast(Idx));
   }
 
+  // Helper function to inherit the calling convention of the function callee.
+  void setCallingConvention(FunctionCallee Callee, CallInst *CI) {
+    if (Function *Fn = dyn_cast(Callee.getCallee()))
+      CI->setCallingConv(Fn->getCallingConv());
+  }
+
   /// Helper to initialize all runtime function information for those defined
   /// in OpenMPKinds.def.
   void initializeRuntimeFunctions() {
@@ -1531,6 +1537,7 @@ struct OpenMPOpt {
 
     CallInst *IssueCallsite =
         CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
+    OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite);
     RuntimeCall.eraseFromParent();
 
     // Add "wait" runtime call declaration:
@@ -1543,7 +1550,9 @@ struct OpenMPOpt {
             OffloadArray::DeviceIDArgNum), // device_id.
         Handle                             // handle to wait on.
     };
-    CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
+    CallInst *WaitCallsite = CallInst::Create(
+        WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
+    OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite);
 
     return true;
   }
@@ -3241,8 +3250,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
       FunctionCallee HardwareTidFn =
           OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
               M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
-      Value *Tid =
+      CallInst *Tid =
           OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
+      Tid->setDebugLoc(DL);
+      OMPInfoCache.setCallingConvention(HardwareTidFn, Tid);
       Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
       OMPInfoCache.OMPBuilder.Builder
           .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
@@ -3255,14 +3266,18 @@ struct AAKernelInfoFunction : AAKernelInfo {
               M, OMPRTL___kmpc_barrier_simple_spmd);
       OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
           RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
-      OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid})
-          ->setDebugLoc(DL);
+      CallInst *Barrier =
+          OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid});
+      Barrier->setDebugLoc(DL);
+      OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
 
       // Second barrier ensures workers have read broadcast values.
-      if (HasBroadcastValues)
-        CallInst::Create(BarrierFn, {Ident, Tid}, "",
-                         RegionBarrierBB->getTerminator())
-            ->setDebugLoc(DL);
+      if (HasBroadcastValues) {
+        CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "",
+                                             RegionBarrierBB->getTerminator());
+        Barrier->setDebugLoc(DL);
+        OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
+      }
     };
 
     auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
@@ -3532,10 +3547,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
     FunctionCallee WarpSizeFn =
         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
             M, OMPRTL___kmpc_get_warp_size);
-    Instruction *BlockHwSize =
+    CallInst *BlockHwSize =
         CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+    OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
     BlockHwSize->setDebugLoc(DLoc);
-    Instruction *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+    CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+    OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
     WarpSize->setDebugLoc(DLoc);
     Instruction *BlockSize =
         BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
@@ -3575,8 +3592,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
     FunctionCallee BarrierFn =
         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
             M, OMPRTL___kmpc_barrier_simple_generic);
-    CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
-        ->setDebugLoc(DLoc);
+    CallInst *Barrier =
+        CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB);
+    OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
+    Barrier->setDebugLoc(DLoc);
 
     if (WorkFnAI->getType()->getPointerAddressSpace() !=
         (unsigned int)AddressSpace::Generic) {
@@ -3592,8 +3611,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
     FunctionCallee KernelParallelFn =
         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
             M, OMPRTL___kmpc_kernel_parallel);
-    Instruction *IsActiveWorker = CallInst::Create(
+    CallInst *IsActiveWorker = CallInst::Create(
         KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
+    OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker);
     IsActiveWorker->setDebugLoc(DLoc);
     Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
                                        StateMachineBeginBB);
@@ -3673,10 +3693,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
                        StateMachineIfCascadeCurrentBB)
         ->setDebugLoc(DLoc);
 
-    CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
-                         M, OMPRTL___kmpc_kernel_end_parallel),
-                     {}, "", StateMachineEndParallelBB)
-        ->setDebugLoc(DLoc);
+    FunctionCallee EndParallelFn =
+        OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+            M, OMPRTL___kmpc_kernel_end_parallel);
+    CallInst *EndParallel =
+        CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB);
+    OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel);
+    EndParallel->setDebugLoc(DLoc);
     BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
         ->setDebugLoc(DLoc);
 
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 5051bce98279d..12ad3b831e3b8 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -1430,7 +1430,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32*
 ; AMDGPU-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; AMDGPU:       region.check.tid:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; AMDGPU:       region.guarded:
@@ -1466,7 +1466,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
 ; NVPTX-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; NVPTX:       region.check.tid:
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; NVPTX-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; NVPTX-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; NVPTX:       region.guarded:
@@ -2328,6 +2328,8 @@ entry:
   ret void
 }
 
+declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block();
+
 attributes #0 = { alwaysinline convergent norecurse nounwind }
 attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
 attributes #2 = { convergent }

From 5eb49009ebe6f1672e6c72f8ea1fe07d4018f682 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Mon, 24 Jan 2022 15:45:27 -0500
Subject: [PATCH 482/946] [OpenMP] Add more identifier to created shared
 globals

Currenly we push some variables to a global constant containing shared
memory as an optimization. This generated constant had internal linkage
and should not have collided with any known identifiers in the
translation unit. However, there have been observed cases of this
optimiztaion unintentionally colliding with undocumented PTX
identifiers. This patch adds a suffix to the created globals to
hopefully bypass this.

Depends on D118059

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D118068
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         |  2 +-
 .../OpenMP/replace_globalization.ll           |  8 +--
 llvm/test/Transforms/OpenMP/spmdization.ll    | 56 +++++++++----------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 87a1f7f7045ab..6288d2ff4b01b 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2779,7 +2779,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
       Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
       auto *SharedMem = new GlobalVariable(
           *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
-          UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
+          UndefValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
           GlobalValue::NotThreadLocal,
           static_cast(AddressSpace::Shared));
       auto *NewBuffer =
diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index 56dd9dd20893a..8c079317bcf11 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -119,8 +119,8 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
 ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4
-; CHECK: @[[Y:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; CHECK: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4
+; CHECK: @[[Y_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ;.
 ; CHECK-LABEL: define {{[^@]+}}@foo() {
 ; CHECK-NEXT:  entry:
@@ -139,13 +139,13 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C]], -1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
 ; CHECK:       master1:
-; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR6]]
+; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
 ; CHECK-NEXT:    br label [[NEXT:%.*]]
 ; CHECK:       next:
 ; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR4]]
 ; CHECK-NEXT:    br label [[MASTER2:%.*]]
 ; CHECK:       master2:
-; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR6]]
+; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 12ad3b831e3b8..b8de4034185d1 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -108,41 +108,41 @@
 ; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
 ; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU: @[[X_shared_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
 ; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
 ; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX: @[[X_shared1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 ; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
 ; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; AMDGPU-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; AMDGPU-DISABLED: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU-DISABLED: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU-DISABLED: @[[X_shared_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -153,13 +153,13 @@
 ; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; NVPTX-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; NVPTX-DISABLED: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX-DISABLED: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX-DISABLED: @[[X_shared1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -1049,7 +1049,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1)
@@ -1070,7 +1070,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1)
@@ -1091,7 +1091,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
 ; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)
@@ -1112,7 +1112,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
 ; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)
@@ -1427,7 +1427,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32*
+; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32*
 ; AMDGPU-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; AMDGPU:       region.check.tid:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
@@ -1452,7 +1452,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
 ; AMDGPU-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1)
@@ -1463,7 +1463,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
+; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32*
 ; NVPTX-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; NVPTX:       region.check.tid:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
@@ -1488,7 +1488,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
 ; NVPTX-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
 ; NVPTX-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1)
@@ -1499,7 +1499,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32*
+; AMDGPU-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32*
 ; AMDGPU-DISABLED-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU-DISABLED:       for.cond:
@@ -1511,7 +1511,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
 ; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)
@@ -1522,7 +1522,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
+; NVPTX-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32*
 ; NVPTX-DISABLED-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX-DISABLED:       for.cond:
@@ -1534,7 +1534,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
 ; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
 ; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)

From 92c1c63daeaf0b6b7abc6561133e2d3dbda80f8c Mon Sep 17 00:00:00 2001
From: Aart Bik 
Date: Mon, 24 Jan 2022 14:23:03 -0800
Subject: [PATCH 483/946] [mlir][sparse] integration test for sparse output
 operation

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D118079
---
 .../SparseTensor/python/test_output.py        | 127 ++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/python/test_output.py

diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py
new file mode 100644
index 0000000000000..eaf39db09a2b0
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py
@@ -0,0 +1,127 @@
+# RUN: SUPPORT_LIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+# RUN:   %PYTHON %s | FileCheck %s
+
+import ctypes
+import os
+import tempfile
+
+import mlir.all_passes_registration
+
+from mlir import execution_engine
+from mlir import ir
+from mlir import passmanager
+from mlir import runtime as rt
+
+from mlir.dialects import builtin
+from mlir.dialects import sparse_tensor as st
+
+
+# TODO: move more into actual IR building.
+def boilerplate(attr: st.EncodingAttr):
+  """Returns boilerplate main method."""
+  return f"""
+func @main(%p : !llvm.ptr) -> () attributes {{ llvm.emit_c_interface }} {{
+  %d = arith.constant sparse<[[0, 0], [1, 1], [0, 9], [9, 0], [4, 4]],
+                             [1.0, 2.0, 3.0, 4.0, 5.0]> : tensor<10x10xf64>
+  %a = sparse_tensor.convert %d : tensor<10x10xf64> to tensor<10x10xf64, {attr}>
+  sparse_tensor.out %a, %p : tensor<10x10xf64, {attr}>, !llvm.ptr
+  return
+}}
+"""
+
+
+def expected():
+  """Returns expected contents of output.
+
+  Regardless of the dimension ordering, compression, and bitwidths that are
+  used in the sparse tensor, the output is always lexicographically sorted
+  by natural index order.
+  """
+  return f"""; extended FROSTT format
+2 5
+10 10
+1 1 1
+1 10 3
+2 2 2
+5 5 5
+10 1 4
+"""
+
+
+def build_compile_and_run_output(attr: st.EncodingAttr, support_lib: str,
+                                 compiler):
+  # Build and Compile.
+  module = ir.Module.parse(boilerplate(attr))
+  compiler(module)
+  engine = execution_engine.ExecutionEngine(
+      module, opt_level=0, shared_libs=[support_lib])
+
+  # Invoke the kernel and compare output.
+  with tempfile.TemporaryDirectory() as test_dir:
+    out = os.path.join(test_dir, 'out.tns')
+    buf = out.encode('utf-8')
+    mem_a = ctypes.pointer(ctypes.pointer(ctypes.create_string_buffer(buf)))
+    engine.invoke('main', mem_a)
+
+    actual = open(out).read()
+    if actual != expected():
+      quit('FAILURE')
+
+
+class SparseCompiler:
+  """Sparse compiler passes."""
+
+  def __init__(self):
+    pipeline = (
+        f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),'
+        f'sparsification,'
+        f'sparse-tensor-conversion,'
+        f'builtin.func(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),'
+        f'convert-scf-to-std,'
+        f'func-bufferize,'
+        f'tensor-constant-bufferize,'
+        f'builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize),'
+        f'convert-vector-to-llvm{{reassociate-fp-reductions=1 enable-index-optimizations=1}},'
+        f'lower-affine,'
+        f'convert-memref-to-llvm,'
+        f'convert-std-to-llvm,'
+        f'reconcile-unrealized-casts')
+    self.pipeline = pipeline
+
+  def __call__(self, module: ir.Module):
+    passmanager.PassManager.parse(self.pipeline).run(module)
+
+
+def main():
+  support_lib = os.getenv('SUPPORT_LIB')
+  assert support_lib is not None, 'SUPPORT_LIB is undefined'
+  if not os.path.exists(support_lib):
+    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
+                            support_lib)
+
+  # CHECK-LABEL: TEST: test_output
+  print('\nTEST: test_output')
+  count = 0
+  with ir.Context() as ctx, ir.Location.unknown():
+    # Loop over various sparse types: CSR, DCSR, CSC, DCSC.
+    levels = [[st.DimLevelType.dense, st.DimLevelType.compressed],
+              [st.DimLevelType.compressed, st.DimLevelType.compressed]]
+    orderings = [
+        ir.AffineMap.get_permutation([0, 1]),
+        ir.AffineMap.get_permutation([1, 0])
+    ]
+    bitwidths = [8, 16, 32, 64]
+    for level in levels:
+      for ordering in orderings:
+        for bwidth in bitwidths:
+          attr = st.EncodingAttr.get(level, ordering, bwidth, bwidth)
+          compiler = SparseCompiler()
+          build_compile_and_run_output(attr, support_lib, compiler)
+          count = count + 1
+
+  # CHECK: Passed 16 tests
+  print('Passed', count, 'tests')
+
+
+if __name__ == '__main__':
+  main()

From ff8f7904d14d77e40f90c2f8306ecb737b02c997 Mon Sep 17 00:00:00 2001
From: Mehdi Amini 
Date: Tue, 25 Jan 2022 02:07:57 +0000
Subject: [PATCH 484/946] Remove null check after dereferencing the pointer
 (NFC)

Flagged by Coverity
---
 .../lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
index 32a77430fa3a9..73decbac7382c 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
@@ -349,7 +349,7 @@ class BufferDeallocation : public BufferPlacementTransformationBase {
     Region *argRegion = block->getParent();
     Operation *parentOp = argRegion->getParentOp();
     RegionBranchOpInterface regionInterface;
-    if (!argRegion || &argRegion->front() != block ||
+    if (&argRegion->front() != block ||
         !(regionInterface = dyn_cast(parentOp)))
       return success();
 

From 9ea3dfa5d015f61ff282ed88d08125bb38fd19a8 Mon Sep 17 00:00:00 2001
From: jacquesguan 
Date: Mon, 24 Jan 2022 15:13:46 +0800
Subject: [PATCH 485/946] [RISCV][NFC] Rename RequiredExtensions to
 RequiredFeatures.

The field 'RequiredExtensions' is used to specify the constraint for rvv builtin, and it contains something which is not a sub-extension or extension such as 'RV64'. So the word 'extension' is not accurate now, 'feature' seems better.

Differential Revision: https://reviews.llvm.org/D118015
---
 clang/include/clang/Basic/riscv_vector.td |  8 ++++----
 clang/utils/TableGen/RISCVVEmitter.cpp    | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index 28c57cc6afeeb..bf268d89d19e1 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -215,8 +215,8 @@ class RVVBuiltin RequiredExtensions = [];
+  // Features required to enable for this builtin.
+  list RequiredFeatures = [];
 
   // Number of fields for Load/Store Segment instructions.
   int NF = 1;
@@ -720,7 +720,7 @@ multiclass RVVIndexedLoad {
         defvar eew64 = "64";
         defvar eew64_type = "(Log2EEW:6)";
         let Name = op # eew64 # "_v", IRName = op, IRNameMask = op # "_mask",
-            RequiredExtensions = ["RV64"] in {
+            RequiredFeatures = ["RV64"] in {
             def: RVVBuiltin<"v", "vPCe" # eew64_type # "Uv", type>;
               if !not(IsFloat.val) then {
                 def: RVVBuiltin<"Uv", "UvPCUe" # eew64_type # "Uv", type>;
@@ -819,7 +819,7 @@ multiclass RVVIndexedStore {
         defvar eew64 = "64";
         defvar eew64_type = "(Log2EEW:6)";
         let Name = op # eew64  # "_v", IRName = op, IRNameMask = op # "_mask",
-            RequiredExtensions = ["RV64"]  in  {
+            RequiredFeatures = ["RV64"]  in  {
           def : RVVBuiltin<"v", "0Pe" # eew64_type # "Uvv", type>;
           if !not(IsFloat.val) then {
             def : RVVBuiltin<"Uv", "0PUe" # eew64_type # "UvUv", type>;
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index c063b766e4a65..67d946d73e419 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -179,7 +179,7 @@ class RVVIntrinsic {
                bool HasNoMaskedOverloaded, bool HasAutoDef,
                StringRef ManualCodegen, const RVVTypes &Types,
                const std::vector &IntrinsicTypes,
-               const std::vector &RequiredExtensions, unsigned NF);
+               const std::vector &RequiredFeatures, unsigned NF);
   ~RVVIntrinsic() = default;
 
   StringRef getBuiltinName() const { return BuiltinName; }
@@ -772,7 +772,7 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
                            bool HasNoMaskedOverloaded, bool HasAutoDef,
                            StringRef ManualCodegen, const RVVTypes &OutInTypes,
                            const std::vector &NewIntrinsicTypes,
-                           const std::vector &RequiredExtensions,
+                           const std::vector &RequiredFeatures,
                            unsigned NF)
     : IRName(IRName), IsMask(IsMask), HasVL(HasVL), HasPolicy(HasPolicy),
       HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef),
@@ -805,8 +805,8 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
     if (T->isVector(64))
       RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELen64;
   }
-  for (auto Extension : RequiredExtensions) {
-    if (Extension == "RV64")
+  for (auto Feature : RequiredFeatures) {
+    if (Feature == "RV64")
       RISCVPredefinedMacros |= RISCVPredefinedMacro::RV64;
   }
 
@@ -1154,8 +1154,8 @@ void RVVEmitter::createRVVIntrinsics(
     StringRef ManualCodegenMask = R->getValueAsString("ManualCodegenMask");
     std::vector IntrinsicTypes =
         R->getValueAsListOfInts("IntrinsicTypes");
-    std::vector RequiredExtensions =
-        R->getValueAsListOfStrings("RequiredExtensions");
+    std::vector RequiredFeatures =
+        R->getValueAsListOfStrings("RequiredFeatures");
     StringRef IRName = R->getValueAsString("IRName");
     StringRef IRNameMask = R->getValueAsString("IRNameMask");
     unsigned NF = R->getValueAsInt("NF");
@@ -1223,7 +1223,7 @@ void RVVEmitter::createRVVIntrinsics(
             Name, SuffixStr, MangledName, MangledSuffixStr, IRName,
             /*IsMask=*/false, /*HasMaskedOffOperand=*/false, HasVL, HasPolicy,
             HasNoMaskedOverloaded, HasAutoDef, ManualCodegen, Types.getValue(),
-            IntrinsicTypes, RequiredExtensions, NF));
+            IntrinsicTypes, RequiredFeatures, NF));
         if (HasMask) {
           // Create a mask intrinsic
           Optional MaskTypes =
@@ -1232,7 +1232,7 @@ void RVVEmitter::createRVVIntrinsics(
               Name, SuffixStr, MangledName, MangledSuffixStr, IRNameMask,
               /*IsMask=*/true, HasMaskedOffOperand, HasVL, HasPolicy,
               HasNoMaskedOverloaded, HasAutoDef, ManualCodegenMask,
-              MaskTypes.getValue(), IntrinsicTypes, RequiredExtensions, NF));
+              MaskTypes.getValue(), IntrinsicTypes, RequiredFeatures, NF));
         }
       } // end for Log2LMULList
     }   // end for TypeRange

From 0e55d4fab0183b6dca82ce127b78ded3db918c27 Mon Sep 17 00:00:00 2001
From: Evgeniy Brevnov 
Date: Mon, 24 Jan 2022 16:29:46 +0700
Subject: [PATCH 486/946] [AA] Refine ModRefInfo for llvm.memcpy.* in presence
 of operand bundles

Presence of operand bundles changes semantics in respect to ModRef. In particular, spec says: "From the compilers perspective, deoptimization operand bundles make the call sites theyre attached to at least readonly. They read through all of their pointer typed operands (even if theyre not otherwise escaped) and the entire visible heap. Deoptimization operand bundles do not capture their operands except during deoptimization, in which case control will not be returned to the compiled frame". Fix handling of llvm.memcpy.* according to the spec.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D118033
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp | 4 ++--
 llvm/test/Analysis/BasicAA/deoptimize.ll | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index fa9ccb095a21d..b4c9859628379 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1020,9 +1020,9 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
         getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI);
     // It's also possible for Loc to alias both src and dest, or neither.
     ModRefInfo rv = ModRefInfo::NoModRef;
-    if (SrcAA != AliasResult::NoAlias)
+    if (SrcAA != AliasResult::NoAlias || Call->hasReadingOperandBundles())
       rv = setRef(rv);
-    if (DestAA != AliasResult::NoAlias)
+    if (DestAA != AliasResult::NoAlias || Call->hasClobberingOperandBundles())
       rv = setMod(rv);
     return rv;
   }
diff --git a/llvm/test/Analysis/BasicAA/deoptimize.ll b/llvm/test/Analysis/BasicAA/deoptimize.ll
index 89297e2583186..18a41bd5405ae 100644
--- a/llvm/test/Analysis/BasicAA/deoptimize.ll
+++ b/llvm/test/Analysis/BasicAA/deoptimize.ll
@@ -22,7 +22,7 @@ define i32 @test_memcpy_with_deopt() {
 ; CHECK-LABEL: Function: test_memcpy_with_deopt:
 ; CHECK: Just Mod:  Ptr: i8* %A	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
 ; CHECK: Just Ref:  Ptr: i8* %B	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
-; CHECK: NoModRef:  Ptr: i32* @G1	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+; CHECK: Just Ref:  Ptr: i32* @G1	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
 
   %A = alloca i8
   %B = alloca i8

From 810f13f0ebde70e679a097a9f5dbe37fe58ffa27 Mon Sep 17 00:00:00 2001
From: Richard 
Date: Mon, 24 Jan 2022 20:19:03 -0700
Subject: [PATCH 487/946] [clang-tools-extra] Fix documentation build (NFC)

---
 clang-tools-extra/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index ba0e530b7fec4..cb622f9b09606 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -192,7 +192,7 @@ Changes in existing checks
   option to control whether to warn on narrowing integer to floating-point
   conversions.
 
-- Improved :doc:`performance-move-const-arg` check.
+- Improved :doc:`performance-move-const-arg ` check.
 
   Removed a wrong FixIt for trivially copyable objects wrapped by ``std::move()`` and passed to an rvalue reference parameter. Removal of ``std::move()`` would break the code.
 

From e01e4c9115ad49479d01b6b6de4e83ee454bab24 Mon Sep 17 00:00:00 2001
From: harsh 
Date: Tue, 25 Jan 2022 02:37:52 +0000
Subject: [PATCH 488/946] Fix bugs in GPUToNVVM lowering

The current lowering from GPU to NVVM does
not correctly handle the following cases when
lowering the gpu shuffle op.

1. When the active width is set to 32 (all lanes),
then the current approach computes (1 << 32) -1 which
results in poison values in the LLVM IR. We fix this by
defining the active mask as (-1) >> (32 - width).

2. In the case of shuffle up, the computation of the third
operand c has to be different from the other 3 modes due to
the op definition in the ISA reference.
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html)
Specifically, the predicate value is computed as j >= maxLane
for up and j <= maxLane for all other modes. We fix this by
computing maskAndClamp as 32 - width for this mode.

TEST: We modify the existing test and add more checks for the up mode.

Reviewed By: ThomasRaoux

Differential Revision: https://reviews.llvm.org/D118086
---
 .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp        | 32 +++++++++++++------
 .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir     | 15 +++++++--
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index c1b95c71b474b..9d564ee7d19d4 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -64,8 +64,10 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern {
   /// the highest lane which participates in the shuffle).
   ///
   ///     %one = llvm.constant(1 : i32) : i32
-  ///     %shl = llvm.shl %one, %width : i32
-  ///     %active_mask = llvm.sub %shl, %one : i32
+  ///     %minus_one = llvm.constant(-1 : i32) : i32
+  ///     %thirty_two = llvm.constant(32 : i32) : i32
+  ///     %num_lanes = llvm.sub %thirty_two, %width : i32
+  ///     %active_mask = llvm.lshr %minus_one, %num_lanes : i32
   ///     %mask_and_clamp = llvm.sub %width, %one : i32
   ///     %shfl = nvvm.shfl.sync.bfly %active_mask, %value, %offset,
   ///         %mask_and_clamp : !llvm<"{ float, i1 }">
@@ -86,14 +88,24 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern {
 
     Value one = rewriter.create(
         loc, int32Type, rewriter.getI32IntegerAttr(1));
-    // Bit mask of active lanes: `(1 << activeWidth) - 1`.
-    Value activeMask = rewriter.create(
-        loc, int32Type,
-        rewriter.create(loc, int32Type, one, adaptor.width()),
-        one);
-    // Clamp lane: `activeWidth - 1`
-    Value maskAndClamp =
-        rewriter.create(loc, int32Type, adaptor.width(), one);
+    Value minusOne = rewriter.create(
+        loc, int32Type, rewriter.getI32IntegerAttr(-1));
+    Value thirtyTwo = rewriter.create(
+        loc, int32Type, rewriter.getI32IntegerAttr(32));
+    Value numLeadInactiveLane = rewriter.create(
+        loc, int32Type, thirtyTwo, adaptor.width());
+    // Bit mask of active lanes: `(-1) >> (32 - activeWidth)`.
+    Value activeMask = rewriter.create(loc, int32Type, minusOne,
+                                                     numLeadInactiveLane);
+    Value maskAndClamp;
+    if (op.mode() == gpu::ShuffleMode::UP) {
+      // Clamp lane: `32 - activeWidth`
+      maskAndClamp = numLeadInactiveLane;
+    } else {
+      // Clamp lane: `activeWidth - 1`
+      maskAndClamp =
+          rewriter.create(loc, int32Type, adaptor.width(), one);
+    }
 
     auto returnValueAndIsValidAttr = rewriter.getUnitAttr();
     Value shfl = rewriter.create(
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index a413dae33ceb7..8219d2204772f 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -117,14 +117,23 @@ gpu.module @test_module {
     // CHECK: %[[#WIDTH:]] = llvm.mlir.constant(23 : i32) : i32
     %arg2 = arith.constant 23 : i32
     // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : i32) : i32
-    // CHECK: %[[#SHL:]] = llvm.shl %[[#ONE]], %[[#WIDTH]] : i32
-    // CHECK: %[[#MASK:]] = llvm.sub %[[#SHL]], %[[#ONE]] : i32
+    // CHECK: %[[#MINUS_ONE:]] = llvm.mlir.constant(-1 : i32) : i32
+    // CHECK: %[[#THIRTY_TWO:]] = llvm.mlir.constant(32 : i32) : i32
+    // CHECK: %[[#NUM_LANES:]] = llvm.sub %[[#THIRTY_TWO]], %[[#WIDTH]] : i32
+    // CHECK: %[[#MASK:]] = llvm.lshr %[[#MINUS_ONE]], %[[#NUM_LANES]] : i32
     // CHECK: %[[#CLAMP:]] = llvm.sub %[[#WIDTH]], %[[#ONE]] : i32
     // CHECK: %[[#SHFL:]] = nvvm.shfl.sync bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
     // CHECK: llvm.extractvalue %[[#SHFL]][0 : index] : !llvm.struct<(f32, i1)>
     // CHECK: llvm.extractvalue %[[#SHFL]][1 : index] : !llvm.struct<(f32, i1)>
     %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : f32
-    // CHECK: nvvm.shfl.sync up {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : i32) : i32
+    // CHECK: %[[#MINUS_ONE:]] = llvm.mlir.constant(-1 : i32) : i32
+    // CHECK: %[[#THIRTY_TWO:]] = llvm.mlir.constant(32 : i32) : i32
+    // CHECK: %[[#NUM_LANES:]] = llvm.sub %[[#THIRTY_TWO]], %[[#WIDTH]] : i32
+    // CHECK: %[[#MASK:]] = llvm.lshr %[[#MINUS_ONE]], %[[#NUM_LANES]] : i32
+    // CHECK: %[[#SHFL:]] = nvvm.shfl.sync up %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#NUM_LANES]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
+    // CHECK: llvm.extractvalue %[[#SHFL]][0 : index] : !llvm.struct<(f32, i1)>
+    // CHECK: llvm.extractvalue %[[#SHFL]][1 : index] : !llvm.struct<(f32, i1)>
     %shflu, %predu = gpu.shuffle up %arg0, %arg1, %arg2 : f32
     // CHECK: nvvm.shfl.sync down {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
     %shfld, %predd = gpu.shuffle down %arg0, %arg1, %arg2 : f32

From 0e9a4a3b65363c082087864b9ff5e0da33be90da Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Thu, 20 Jan 2022 14:30:47 -0800
Subject: [PATCH 489/946] [mlir] Move the Buffer related source files out of
 Transforms/

Transforms/ should only contain dialect-independent transformations,
and these files are a much better fit for the bufferization dialect anyways.

Differential Revision: https://reviews.llvm.org/D117839
---
 .../Bufferization}/Transforms/BufferUtils.h   | 17 ++---
 .../Dialect/Bufferization/Transforms/Passes.h | 24 +++++++
 .../Bufferization/Transforms/Passes.td        | 69 +++++++++++++++++++
 .../Dialect/StandardOps/Transforms/Passes.h   |  4 +-
 mlir/include/mlir/Transforms/Passes.h         | 24 -------
 mlir/include/mlir/Transforms/Passes.td        | 69 -------------------
 .../Transforms/BufferDeallocation.cpp         |  3 +-
 .../Transforms/BufferOptimizations.cpp        | 20 +++---
 .../Transforms/BufferResultsToOutParams.cpp   |  5 +-
 .../Bufferization}/Transforms/BufferUtils.cpp | 54 +++++++++++++--
 .../Bufferization/Transforms/CMakeLists.txt   |  3 +
 .../ArithInterfaceImpl.cpp                    |  2 +-
 .../Transforms/TensorConstantBufferize.cpp    | 45 +-----------
 mlir/lib/Transforms/CMakeLists.txt            |  4 --
 mlir/lib/Transforms/PassDetail.h              |  4 --
 15 files changed, 175 insertions(+), 172 deletions(-)
 rename mlir/include/mlir/{ => Dialect/Bufferization}/Transforms/BufferUtils.h (95%)
 rename mlir/lib/{ => Dialect/Bufferization}/Transforms/BufferOptimizations.cpp (96%)
 rename mlir/lib/{ => Dialect/Bufferization}/Transforms/BufferResultsToOutParams.cpp (97%)
 rename mlir/lib/{ => Dialect/Bufferization}/Transforms/BufferUtils.cpp (74%)

diff --git a/mlir/include/mlir/Transforms/BufferUtils.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h
similarity index 95%
rename from mlir/include/mlir/Transforms/BufferUtils.h
rename to mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h
index d9d06963fb28b..681b94953f20f 100644
--- a/mlir/include/mlir/Transforms/BufferUtils.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h
@@ -11,13 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_TRANSFORMS_BUFFERUTILS_H
-#define MLIR_TRANSFORMS_BUFFERUTILS_H
+#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_BUFFERUTILS_H
+#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_BUFFERUTILS_H
 
 #include "mlir/Analysis/BufferViewFlowAnalysis.h"
 #include "mlir/Analysis/Liveness.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dominance.h"
@@ -25,6 +24,11 @@
 #include "mlir/Transforms/DialectConversion.h"
 
 namespace mlir {
+namespace memref {
+class GlobalOp;
+} // namespace memref
+
+namespace bufferization {
 
 /// A simple analysis that detects allocation operations.
 class BufferPlacementAllocs {
@@ -117,10 +121,6 @@ class BufferPlacementTransformationBase {
   Liveness liveness;
 };
 
-namespace memref {
-class GlobalOp;
-} // namespace memref
-
 // Support class to create global ops for tensor-valued constants in the
 // program. Globals are created lazily at the top of the `moduleOp` with pretty
 // names. Duplicates are avoided.
@@ -137,6 +137,7 @@ class GlobalCreator {
   // dependence to the memref dialect for this.
   DenseMap globals;
 };
+} // namespace bufferization
 } // namespace mlir
 
-#endif // MLIR_TRANSFORMS_BUFFERUTILS_H
+#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_BUFFERUTILS_H
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index 72c5136eb1988..481eaa284d98b 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -14,10 +14,34 @@ namespace bufferization {
 /// buffers.
 std::unique_ptr createBufferDeallocationPass();
 
+/// Creates a pass that moves allocations upwards to reduce the number of
+/// required copies that are inserted during the BufferDeallocation pass.
+std::unique_ptr createBufferHoistingPass();
+
+/// Creates a pass that moves allocations upwards out of loops. This avoids
+/// reallocations inside of loops.
+std::unique_ptr createBufferLoopHoistingPass();
+
+/// Creates a pass that converts memref function results to out-params.
+std::unique_ptr createBufferResultsToOutParamsPass();
+
 /// Creates a pass that finalizes a partial bufferization by removing remaining
 /// bufferization.to_tensor and bufferization.to_memref operations.
 std::unique_ptr> createFinalizingBufferizePass();
 
+/// Creates a pass that promotes heap-based allocations to stack-based ones.
+/// Only buffers smaller than the provided size are promoted.
+/// Dynamic shaped buffers are promoted up to the given rank.
+std::unique_ptr
+createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
+                                unsigned bitwidthOfIndexType = 64,
+                                unsigned maxRankOfAllocatedMemRef = 1);
+
+/// Creates a pass that promotes heap-based allocations to stack-based ones.
+/// Only buffers smaller with `isSmallAlloc(alloc) == true` are promoted.
+std::unique_ptr
+createPromoteBuffersToStackPass(std::function isSmallAlloc);
+
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 22db1dc2c5bf9..dcbdd52a5c81d 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -88,6 +88,51 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
   let constructor = "mlir::bufferization::createBufferDeallocationPass()";
 }
 
+def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
+  let summary = "Optimizes placement of allocation operations by moving them "
+                "into common dominators and out of nested regions";
+  let description = [{
+    This pass implements an approach to aggressively move allocations upwards
+    into common dominators and out of nested regions.
+  }];
+  let constructor = "mlir::bufferization::createBufferHoistingPass()";
+}
+
+def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "FuncOp"> {
+  let summary = "Optimizes placement of allocation operations by moving them "
+                "out of loop nests";
+  let description = [{
+    This pass implements an approach to aggressively move allocations upwards
+    out of loop nests. It does not move allocations into common dominators.
+  }];
+  let constructor = "mlir::bufferization::createBufferLoopHoistingPass()";
+}
+
+def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">  {
+  let summary = "Converts memref-typed function results to out-params";
+  let description = [{
+    Some calling conventions prefer to pass output memrefs as "out params". The
+    conversion to this calling convention must be done as an atomic
+    transformation of the entire program (hence this is a module pass).
+
+    For example, if a call is rewritten, the callee needs to be rewritten
+    otherwise the IR will end up invalid. Thus, this transformation
+    require an atomic change to the entire program (e.g. the whole module).
+
+    This pass is expected to run immediately after bufferization is finished.
+    At that point, tensor-typed results will have been converted to memref-typed
+    results, and can be consistently converted to out params.
+
+    All memref-typed results are appended to the function argument list.
+
+    The main issue with this pass (and the out-param calling convention) is that
+    buffers for results need to be allocated in the caller. This currently only
+    works for static shaped memrefs.
+  }];
+  let constructor = "mlir::bufferization::createBufferResultsToOutParamsPass()";
+  let dependentDialects = ["memref::MemRefDialect"];
+}
+
 def FinalizingBufferize : Pass<"finalizing-bufferize", "FuncOp"> {
   let summary = "Finalize a partial bufferization";
   let description = [{
@@ -104,4 +149,28 @@ def FinalizingBufferize : Pass<"finalizing-bufferize", "FuncOp"> {
   let constructor = "mlir::bufferization::createFinalizingBufferizePass()";
 }
 
+def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
+  let summary = "Promotes heap-based allocations to automatically managed "
+                "stack-based allocations";
+  let description = [{
+    This pass implements a simple algorithm to convert heap-based memory
+    allocations to stack-based ones. It uses a built-in heuristic to decide
+    whether it makes sense to convert an allocation. Furthermore, dynamic
+    shaped buffers that are limited by the rank of the tensor can be
+    converted. They are only transformed if they are considered to be small.
+  }];
+  let constructor = "mlir::bufferization::createPromoteBuffersToStackPass()";
+  let options = [
+    Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
+           /*default=*/"1024",
+           "Maximal size in bytes to promote allocations to stack.">,
+    Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
+           /*default=*/"64",
+           "Bitwidth of the index type. Used for size estimation.">,
+    Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
+           /*default=*/"1",
+           "Maximal memref rank to promote dynamic buffers.">,
+  ];
+}
+
 #endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
index b80cd0bf6d625..b47303c70250e 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
@@ -19,9 +19,9 @@
 namespace mlir {
 namespace bufferization {
 class BufferizeTypeConverter;
+class GlobalCreator;
 } // namespace bufferization
 
-class GlobalCreator;
 class RewritePatternSet;
 using OwningRewritePatternList = RewritePatternSet;
 
@@ -38,7 +38,7 @@ std::unique_ptr createFuncBufferizePass();
 /// Add patterns to bufferize tensor constants into global memrefs to the given
 /// pattern list.
 void populateTensorConstantBufferizePatterns(
-    GlobalCreator &globalCreator,
+    bufferization::GlobalCreator &globalCreator,
     bufferization::BufferizeTypeConverter &typeConverter,
     RewritePatternSet &patterns);
 
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 46bab0047c1b6..2224343244516 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -33,30 +33,6 @@ enum FusionMode { Greedy, ProducerConsumer, Sibling };
 // Passes
 //===----------------------------------------------------------------------===//
 
-/// Creates a pass that moves allocations upwards to reduce the number of
-/// required copies that are inserted during the BufferDeallocation pass.
-std::unique_ptr createBufferHoistingPass();
-
-/// Creates a pass that moves allocations upwards out of loops. This avoids
-/// reallocations inside of loops.
-std::unique_ptr createBufferLoopHoistingPass();
-
-/// Creates a pass that promotes heap-based allocations to stack-based ones.
-/// Only buffers smaller than the provided size are promoted.
-/// Dynamic shaped buffers are promoted up to the given rank.
-std::unique_ptr
-createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
-                                unsigned bitwidthOfIndexType = 64,
-                                unsigned maxRankOfAllocatedMemRef = 1);
-
-/// Creates a pass that promotes heap-based allocations to stack-based ones.
-/// Only buffers smaller with `isSmallAlloc(alloc) == true` are promoted.
-std::unique_ptr
-createPromoteBuffersToStackPass(std::function isSmallAlloc);
-
-/// Creates a pass that converts memref function results to out-params.
-std::unique_ptr createBufferResultsToOutParamsPass();
-
 /// Creates an instance of the Canonicalizer pass, configured with default
 /// settings (which can be overridden by pass options on the command line).
 std::unique_ptr createCanonicalizerPass();
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 4b1d6ca71e2e1..9942c0fc88923 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -217,75 +217,6 @@ def AffinePipelineDataTransfer
   let constructor = "mlir::createPipelineDataTransferPass()";
 }
 
-def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
-  let summary = "Optimizes placement of allocation operations by moving them "
-                "into common dominators and out of nested regions";
-  let description = [{
-    This pass implements an approach to aggressively move allocations upwards
-    into common dominators and out of nested regions.
-  }];
-  let constructor = "mlir::createBufferHoistingPass()";
-}
-
-def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "FuncOp"> {
-  let summary = "Optimizes placement of allocation operations by moving them "
-                "out of loop nests";
-  let description = [{
-    This pass implements an approach to aggressively move allocations upwards
-    out of loop nests. It does not move allocations into common dominators.
-  }];
-  let constructor = "mlir::createBufferLoopHoistingPass()";
-}
-
-def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
-  let summary = "Promotes heap-based allocations to automatically managed "
-                "stack-based allocations";
-  let description = [{
-    This pass implements a simple algorithm to convert heap-based memory
-    allocations to stack-based ones. It uses a built-in heuristic to decide
-    whether it makes sense to convert an allocation. Furthermore, dynamic
-    shaped buffers that are limited by the rank of the tensor can be
-    converted. They are only transformed if they are considered to be small.
-  }];
-  let constructor = "mlir::createPromoteBuffersToStackPass()";
-  let options = [
-    Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
-           /*default=*/"1024",
-           "Maximal size in bytes to promote allocations to stack.">,
-    Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
-           /*default=*/"64",
-           "Bitwidth of the index type. Used for size estimation.">,
-    Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
-           /*default=*/"1",
-           "Maximal memref rank to promote dynamic buffers.">,
-  ];
-}
-
-def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">  {
-  let summary = "Converts memref-typed function results to out-params";
-  let description = [{
-    Some calling conventions prefer to pass output memrefs as "out params". The
-    conversion to this calling convention must be done as an atomic
-    transformation of the entire program (hence this is a module pass).
-
-    For example, if a call is rewritten, the callee needs to be rewritten
-    otherwise the IR will end up invalid. Thus, this transformation
-    require an atomic change to the entire program (e.g. the whole module).
-
-    This pass is expected to run immediately after bufferization is finished.
-    At that point, tensor-typed results will have been converted to memref-typed
-    results, and can be consistently converted to out params.
-
-    All memref-typed results are appended to the function argument list.
-
-    The main issue with this pass (and the out-param calling convention) is that
-    buffers for results need to be allocated in the caller. This currently only
-    works for static shaped memrefs.
-  }];
-  let constructor = "mlir::createBufferResultsToOutParamsPass()";
-  let dependentDialects = ["memref::MemRefDialect"];
-}
-
 def Canonicalizer : Pass<"canonicalize"> {
   let summary = "Canonicalize operations";
   let description = [{
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
index 73decbac7382c..ed7bd5c20d58e 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
@@ -54,12 +54,13 @@
 
 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Transforms/BufferUtils.h"
 #include "llvm/ADT/SetOperations.h"
 
 using namespace mlir;
+using namespace mlir::bufferization;
 
 /// Walks over all immediate return-like terminators in the given region.
 static LogicalResult
diff --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
similarity index 96%
rename from mlir/lib/Transforms/BufferOptimizations.cpp
rename to mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
index 4fa035a939923..158c786739342 100644
--- a/mlir/lib/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
@@ -12,14 +12,15 @@
 // convert heap-based allocations to stack-based allocations, if possible.
 
 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/BufferUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
+using namespace mlir::bufferization;
 
 /// Returns true if the given operation implements a known high-level region-
 /// based control-flow interface.
@@ -422,23 +423,22 @@ class PromoteBuffersToStackPass
 
 } // namespace
 
-std::unique_ptr mlir::createBufferHoistingPass() {
+std::unique_ptr mlir::bufferization::createBufferHoistingPass() {
   return std::make_unique();
 }
 
-std::unique_ptr mlir::createBufferLoopHoistingPass() {
+std::unique_ptr mlir::bufferization::createBufferLoopHoistingPass() {
   return std::make_unique();
 }
 
-std::unique_ptr
-mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
-                                      unsigned bitwidthOfIndexType,
-                                      unsigned maxRankOfAllocatedMemRef) {
+std::unique_ptr mlir::bufferization::createPromoteBuffersToStackPass(
+    unsigned maxAllocSizeInBytes, unsigned bitwidthOfIndexType,
+    unsigned maxRankOfAllocatedMemRef) {
   return std::make_unique(
       maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
 }
 
-std::unique_ptr
-mlir::createPromoteBuffersToStackPass(std::function isSmallAlloc) {
+std::unique_ptr mlir::bufferization::createPromoteBuffersToStackPass(
+    std::function isSmallAlloc) {
   return std::make_unique(std::move(isSmallAlloc));
 }
diff --git a/mlir/lib/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
similarity index 97%
rename from mlir/lib/Transforms/BufferResultsToOutParams.cpp
rename to mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index c462955d068a4..08780db5f94df 100644
--- a/mlir/lib/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -7,11 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
 
@@ -139,6 +139,7 @@ struct BufferResultsToOutParamsPass
 };
 } // namespace
 
-std::unique_ptr mlir::createBufferResultsToOutParamsPass() {
+std::unique_ptr
+mlir::bufferization::createBufferResultsToOutParamsPass() {
   return std::make_unique();
 }
diff --git a/mlir/lib/Transforms/BufferUtils.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp
similarity index 74%
rename from mlir/lib/Transforms/BufferUtils.cpp
rename to mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp
index c24293cb4bd68..a373a8dbe86b4 100644
--- a/mlir/lib/Transforms/BufferUtils.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp
@@ -10,18 +10,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Transforms/BufferUtils.h"
-#include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
+#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
 #include "llvm/ADT/SetOperations.h"
 
 using namespace mlir;
+using namespace mlir::bufferization;
 
 //===----------------------------------------------------------------------===//
 // BufferPlacementAllocs
@@ -139,3 +139,49 @@ bool BufferPlacementTransformationBase::isLoop(Operation *op) {
 
   return false;
 }
+
+//===----------------------------------------------------------------------===//
+// BufferPlacementTransformationBase
+//===----------------------------------------------------------------------===//
+
+memref::GlobalOp GlobalCreator::getGlobalFor(arith::ConstantOp constantOp) {
+  auto type = constantOp.getType().cast();
+
+  BufferizeTypeConverter typeConverter;
+
+  // If we already have a global for this constant value, no need to do
+  // anything else.
+  auto it = globals.find(constantOp.getValue());
+  if (it != globals.end())
+    return cast(it->second);
+
+  // Create a builder without an insertion point. We will insert using the
+  // symbol table to guarantee unique names.
+  OpBuilder globalBuilder(moduleOp.getContext());
+  SymbolTable symbolTable(moduleOp);
+
+  // Create a pretty name.
+  SmallString<64> buf;
+  llvm::raw_svector_ostream os(buf);
+  interleave(type.getShape(), os, "x");
+  os << "x" << type.getElementType();
+
+  // Add an optional alignment to the global memref.
+  IntegerAttr memrefAlignment =
+      alignment > 0 ? IntegerAttr::get(globalBuilder.getI64Type(), alignment)
+                    : IntegerAttr();
+
+  auto global = globalBuilder.create(
+      constantOp.getLoc(), (Twine("__constant_") + os.str()).str(),
+      /*sym_visibility=*/globalBuilder.getStringAttr("private"),
+      /*type=*/typeConverter.convertType(type).cast(),
+      /*initial_value=*/constantOp.getValue().cast(),
+      /*constant=*/true,
+      /*alignment=*/memrefAlignment);
+  symbolTable.insert(global);
+  // The symbol table inserts at the end of the module, but globals are a bit
+  // nicer if they are at the beginning.
+  global->moveBefore(&moduleOp.front());
+  globals[constantOp.getValue()] = global;
+  return global;
+}
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
index b212ef952a05e..56fc326b4fa92 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -1,6 +1,9 @@
 add_mlir_dialect_library(MLIRBufferizationTransforms
   Bufferize.cpp
   BufferDeallocation.cpp
+  BufferOptimizations.cpp
+  BufferResultsToOutParams.cpp
+  BufferUtils.cpp
   OneShotAnalysis.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
index 256916c5e7b32..de3fbcd8b121c 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
@@ -10,10 +10,10 @@
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Operation.h"
-#include "mlir/Transforms/BufferUtils.h"
 
 using namespace mlir::bufferization;
 
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
index c7752f592a9e8..5bae6f3f6154f 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
@@ -12,57 +12,16 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/Transforms/BufferUtils.h"
 #include "mlir/Transforms/DialectConversion.h"
 
 using namespace mlir;
-
-memref::GlobalOp GlobalCreator::getGlobalFor(arith::ConstantOp constantOp) {
-  auto type = constantOp.getType().cast();
-
-  bufferization::BufferizeTypeConverter typeConverter;
-
-  // If we already have a global for this constant value, no need to do
-  // anything else.
-  auto it = globals.find(constantOp.getValue());
-  if (it != globals.end())
-    return cast(it->second);
-
-  // Create a builder without an insertion point. We will insert using the
-  // symbol table to guarantee unique names.
-  OpBuilder globalBuilder(moduleOp.getContext());
-  SymbolTable symbolTable(moduleOp);
-
-  // Create a pretty name.
-  SmallString<64> buf;
-  llvm::raw_svector_ostream os(buf);
-  interleave(type.getShape(), os, "x");
-  os << "x" << type.getElementType();
-
-  // Add an optional alignment to the global memref.
-  IntegerAttr memrefAlignment =
-      alignment > 0 ? IntegerAttr::get(globalBuilder.getI64Type(), alignment)
-                    : IntegerAttr();
-
-  auto global = globalBuilder.create(
-      constantOp.getLoc(), (Twine("__constant_") + os.str()).str(),
-      /*sym_visibility=*/globalBuilder.getStringAttr("private"),
-      /*type=*/typeConverter.convertType(type).cast(),
-      /*initial_value=*/constantOp.getValue().cast(),
-      /*constant=*/true,
-      /*alignment=*/memrefAlignment);
-  symbolTable.insert(global);
-  // The symbol table inserts at the end of the module, but globals are a bit
-  // nicer if they are at the beginning.
-  global->moveBefore(&moduleOp.front());
-  globals[constantOp.getValue()] = global;
-  return global;
-}
+using namespace mlir::bufferization;
 
 namespace {
 class BufferizeTensorConstantOp
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 3e10b4a321311..1eba1ad94e5a5 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -1,9 +1,6 @@
 add_subdirectory(Utils)
 
 add_mlir_library(MLIRTransforms
-  BufferOptimizations.cpp
-  BufferResultsToOutParams.cpp
-  BufferUtils.cpp
   Canonicalizer.cpp
   ControlFlowSink.cpp
   CSE.cpp
@@ -31,7 +28,6 @@ add_mlir_library(MLIRTransforms
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRAnalysis
-  MLIRBufferization
   MLIRCopyOpInterface
   MLIRLoopLikeInterface
   MLIRMemRef
diff --git a/mlir/lib/Transforms/PassDetail.h b/mlir/lib/Transforms/PassDetail.h
index 9b846198fc08a..4496c4223cbec 100644
--- a/mlir/lib/Transforms/PassDetail.h
+++ b/mlir/lib/Transforms/PassDetail.h
@@ -27,10 +27,6 @@ namespace memref {
 class MemRefDialect;
 } // namespace memref
 
-namespace bufferization {
-class BufferizationDialect;
-} // namespace bufferization
-
 #define GEN_PASS_CLASSES
 #include "mlir/Transforms/Passes.h.inc"
 

From 2e2c0738e80e9c2b7c1413ca4719d5be2df4c6b5 Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Thu, 20 Jan 2022 15:16:17 -0800
Subject: [PATCH 490/946] [mlir:Transforms] Move NormalizeMemRefs to
 MemRef/Transforms/

Transforms/  should only contain transformations that are dialect-independent and
this pass interacts with MemRef operations (making it a better fit for living in that
dialect).

Differential Revision: https://reviews.llvm.org/D117841
---
 .../mlir/Dialect/MemRef/Transforms/Passes.h   |   4 +
 .../mlir/Dialect/MemRef/Transforms/Passes.td  | 116 ++++++++++++++++++
 mlir/include/mlir/Transforms/Passes.h         |   4 -
 mlir/include/mlir/Transforms/Passes.td        | 116 ------------------
 .../Dialect/MemRef/Transforms/CMakeLists.txt  |   1 +
 .../MemRef}/Transforms/NormalizeMemRefs.cpp   |   5 +-
 .../Dialect/MemRef/Transforms/PassDetail.h    |  43 +++++++
 .../ResolveShapedTypeResultDims.cpp           |   4 +-
 mlir/lib/Transforms/CMakeLists.txt            |   1 -
 9 files changed, 168 insertions(+), 126 deletions(-)
 rename mlir/lib/{ => Dialect/MemRef}/Transforms/NormalizeMemRefs.cpp (99%)
 create mode 100644 mlir/lib/Dialect/MemRef/Transforms/PassDetail.h

diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
index 186782c6efdb2..23d12508b65cb 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
@@ -55,6 +55,10 @@ void populateResolveShapedTypeResultDimsPatterns(RewritePatternSet &patterns);
 /// load/store ops into `patterns`.
 std::unique_ptr createFoldSubViewOpsPass();
 
+/// Creates an interprocedural pass to normalize memrefs to have a trivial
+/// (identity) layout map.
+std::unique_ptr> createNormalizeMemRefsPass();
+
 /// Creates an operation pass to resolve `memref.dim` operations with values
 /// that are defined by operations that implement the
 /// `ReifyRankedShapeTypeShapeOpInterface`, in terms of shapes of its input
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
index 29984c4fc385d..d67746b9c6033 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
@@ -23,6 +23,122 @@ def FoldSubViewOps : Pass<"fold-memref-subview-ops"> {
   ];
 }
 
+def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> {
+  let summary = "Normalize memrefs";
+   let description = [{
+    This pass transforms memref types with a non-trivial
+    [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into
+    memref types with an identity layout map, e.g. (i, j) -> (i, j). This
+    pass is inter-procedural, in the sense that it can modify function
+    interfaces and call sites that pass memref types. In order to modify
+    memref types while preserving the original behavior, users of those
+    memref types are also modified to incorporate the resulting layout map.
+    For instance, an [AffineLoadOp]
+    (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop)
+    will be updated to compose the layout map with with the affine expression
+    contained in the op. Operations marked with the [MemRefsNormalizable]
+    (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
+    expected to be normalizable. Supported operations include affine
+    operations, memref.alloc, memref.dealloc, and std.return.
+
+    Given an appropriate layout map specified in the code, this transformation
+    can express tiled or linearized access to multi-dimensional data
+    structures, but will not modify memref types without an explicit layout
+    map.
+
+    Currently this pass is limited to only modify
+    functions where all memref types can be normalized. If a function
+    contains any operations that are not MemRefNormalizable, then the function
+    and any functions that call or call it will not be modified.
+
+    Input
+
+    ```mlir
+    #tile = affine_map<(i) -> (i floordiv 4, i mod 4)>
+    func @matmul(%A: memref<16xf64, #tile>,
+                 %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) {
+      affine.for %arg3 = 0 to 16 {
+            %a = affine.load %A[%arg3] : memref<16xf64, #tile>
+            %p = arith.mulf %a, %a : f64
+            affine.store %p, %A[%arg3] : memref<16xf64, #tile>
+      }
+      %c = memref.alloc() : memref<16xf64, #tile>
+      %d = affine.load %c[0] : memref<16xf64, #tile>
+      return %A: memref<16xf64, #tile>
+    }
+    ```
+
+    Output
+
+    ```mlir
+    func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>)
+      -> memref<4x4xf64> {
+      affine.for %arg3 = 0 to 16 {
+        %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
+        %4 = arith.mulf %3, %3 : f64
+        affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
+      }
+      %0 = memref.alloc() : memref<4x4xf64>
+      %1 = affine.apply #map1()
+      %2 = affine.load %0[0, 0] : memref<4x4xf64>
+      return %arg0 : memref<4x4xf64>
+    }
+    ```
+
+    Input
+
+    ```
+    #linear8 = affine_map<(i, j) -> (i * 8 + j)>
+    func @linearize(%arg0: memref<8x8xi32, #linear8>,
+                    %arg1: memref<8x8xi32, #linear8>,
+                    %arg2: memref<8x8xi32, #linear8>) {
+      %c8 = arith.constant 8 : index
+      %c0 = arith.constant 0 : index
+      %c1 = arith.constant 1 : index
+      affine.for %arg3 = %c0 to %c8  {
+      affine.for %arg4 = %c0 to %c8  {
+        affine.for %arg5 = %c0 to %c8 {
+          %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8>
+          %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8>
+          %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
+          %3 = arith.muli %0, %1 : i32
+          %4 = arith.addi %2, %3 : i32
+          affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
+        }
+      }
+      }
+      return
+    }
+    ```
+
+    Output
+
+    ```mlir
+    func @linearize(%arg0: memref<64xi32>,
+                    %arg1: memref<64xi32>,
+                    %arg2: memref<64xi32>) {
+    %c8 = arith.constant 8 : index
+    %c0 = arith.constant 0 : index
+    affine.for %arg3 = %c0 to %c8 {
+      affine.for %arg4 = %c0 to %c8 {
+        affine.for %arg5 = %c0 to %c8 {
+          %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32>
+          %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32>
+          %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
+          %3 = arith.muli %0, %1 : i32
+          %4 = arith.addi %2, %3 : i32
+          affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
+        }
+      }
+    }
+    return
+  }
+  ```
+  }];
+  let constructor = "mlir::memref::createNormalizeMemRefsPass()";
+  let dependentDialects = ["AffineDialect"];
+}
+
 def ResolveRankedShapeTypeResultDims :
     Pass<"resolve-ranked-shaped-type-result-dims"> {
   let summary = "Resolve memref.dim of result values of ranked shape type";
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 2224343244516..4876d705afcb9 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -113,10 +113,6 @@ std::unique_ptr createSCCPPass();
 /// pass may *only* be scheduled on an operation that defines a SymbolTable.
 std::unique_ptr createSymbolDCEPass();
 
-/// Creates an interprocedural pass to normalize memrefs to have a trivial
-/// (identity) layout map.
-std::unique_ptr> createNormalizeMemRefsPass();
-
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 9942c0fc88923..44bf475af24c9 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -351,122 +351,6 @@ def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
   let constructor = "mlir::createLoopInvariantCodeMotionPass()";
 }
 
-def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> {
-  let summary = "Normalize memrefs";
-   let description = [{
-    This pass transforms memref types with a non-trivial
-    [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into
-    memref types with an identity layout map, e.g. (i, j) -> (i, j). This
-    pass is inter-procedural, in the sense that it can modify function
-    interfaces and call sites that pass memref types. In order to modify
-    memref types while preserving the original behavior, users of those
-    memref types are also modified to incorporate the resulting layout map.
-    For instance, an [AffineLoadOp]
-    (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop)
-    will be updated to compose the layout map with with the affine expression
-    contained in the op. Operations marked with the [MemRefsNormalizable]
-    (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
-    expected to be normalizable. Supported operations include affine
-    operations, memref.alloc, memref.dealloc, and std.return.
-
-    Given an appropriate layout map specified in the code, this transformation
-    can express tiled or linearized access to multi-dimensional data
-    structures, but will not modify memref types without an explicit layout
-    map.
-
-    Currently this pass is limited to only modify
-    functions where all memref types can be normalized. If a function
-    contains any operations that are not MemRefNormalizable, then the function
-    and any functions that call or call it will not be modified.
-
-    Input
-
-    ```mlir
-    #tile = affine_map<(i) -> (i floordiv 4, i mod 4)>
-    func @matmul(%A: memref<16xf64, #tile>,
-                 %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) {
-      affine.for %arg3 = 0 to 16 {
-            %a = affine.load %A[%arg3] : memref<16xf64, #tile>
-            %p = arith.mulf %a, %a : f64
-            affine.store %p, %A[%arg3] : memref<16xf64, #tile>
-      }
-      %c = memref.alloc() : memref<16xf64, #tile>
-      %d = affine.load %c[0] : memref<16xf64, #tile>
-      return %A: memref<16xf64, #tile>
-    }
-    ```
-
-    Output
-
-    ```mlir
-    func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>)
-      -> memref<4x4xf64> {
-      affine.for %arg3 = 0 to 16 {
-        %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
-        %4 = arith.mulf %3, %3 : f64
-        affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
-      }
-      %0 = memref.alloc() : memref<4x4xf64>
-      %1 = affine.apply #map1()
-      %2 = affine.load %0[0, 0] : memref<4x4xf64>
-      return %arg0 : memref<4x4xf64>
-    }
-    ```
-
-    Input
-
-    ```
-    #linear8 = affine_map<(i, j) -> (i * 8 + j)>
-    func @linearize(%arg0: memref<8x8xi32, #linear8>,
-                    %arg1: memref<8x8xi32, #linear8>,
-                    %arg2: memref<8x8xi32, #linear8>) {
-      %c8 = arith.constant 8 : index
-      %c0 = arith.constant 0 : index
-      %c1 = arith.constant 1 : index
-      affine.for %arg3 = %c0 to %c8  {
-      affine.for %arg4 = %c0 to %c8  {
-        affine.for %arg5 = %c0 to %c8 {
-          %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8>
-          %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8>
-          %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
-          %3 = arith.muli %0, %1 : i32
-          %4 = arith.addi %2, %3 : i32
-          affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
-        }
-      }
-      }
-      return
-    }
-    ```
-
-    Output
-
-    ```mlir
-    func @linearize(%arg0: memref<64xi32>,
-                    %arg1: memref<64xi32>,
-                    %arg2: memref<64xi32>) {
-    %c8 = arith.constant 8 : index
-    %c0 = arith.constant 0 : index
-    affine.for %arg3 = %c0 to %c8 {
-      affine.for %arg4 = %c0 to %c8 {
-        affine.for %arg5 = %c0 to %c8 {
-          %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32>
-          %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32>
-          %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
-          %3 = arith.muli %0, %1 : i32
-          %4 = arith.addi %2, %3 : i32
-          affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
-        }
-      }
-    }
-    return
-  }
-  ```
-  }];
-  let constructor = "mlir::createNormalizeMemRefsPass()";
-  let dependentDialects = ["AffineDialect"];
-}
-
 def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
   let summary = "Collapse parallel loops to use less induction variables";
   let constructor = "mlir::createParallelLoopCollapsingPass()";
diff --git a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
index 3eda2ded018fe..319f9bbb95a37 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_mlir_dialect_library(MLIRMemRefTransforms
   FoldSubViewOps.cpp
+  NormalizeMemRefs.cpp
   ResolveShapedTypeResultDims.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
similarity index 99%
rename from mlir/lib/Transforms/NormalizeMemRefs.cpp
rename to mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
index 5119c4526364a..0b5e49b2df528 100644
--- a/mlir/lib/Transforms/NormalizeMemRefs.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
@@ -14,7 +14,7 @@
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Transforms/Passes.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Debug.h"
@@ -43,7 +43,8 @@ struct NormalizeMemRefs : public NormalizeMemRefsBase {
 
 } // namespace
 
-std::unique_ptr> mlir::createNormalizeMemRefsPass() {
+std::unique_ptr>
+mlir::memref::createNormalizeMemRefsPass() {
   return std::make_unique();
 }
 
diff --git a/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h b/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h
new file mode 100644
index 0000000000000..d15631526817f
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h
@@ -0,0 +1,43 @@
+//===- PassDetail.h - MemRef Pass class details -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DIALECT_MEMREF_TRANSFORMS_PASSDETAIL_H_
+#define DIALECT_MEMREF_TRANSFORMS_PASSDETAIL_H_
+
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+
+class AffineDialect;
+
+// Forward declaration from Dialect.h
+template 
+void registerDialect(DialectRegistry ®istry);
+
+namespace arith {
+class ArithmeticDialect;
+} // namespace arith
+
+namespace memref {
+class MemRefDialect;
+} // namespace memref
+
+namespace tensor {
+class TensorDialect;
+} // namespace tensor
+
+namespace vector {
+class VectorDialect;
+} // namespace vector
+
+#define GEN_PASS_CLASSES
+#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
+
+} // namespace mlir
+
+#endif // DIALECT_MEMREF_TRANSFORMS_PASSDETAIL_H_
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
index 60f82f3b9e4b2..3f6aeeb696414 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -107,9 +108,6 @@ struct DimOfReifyRankedShapedTypeOpInterface : public OpRewritePattern {
 //===----------------------------------------------------------------------===//
 
 namespace {
-#define GEN_PASS_CLASSES
-#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
-
 struct ResolveRankedShapeTypeResultDimsPass final
     : public ResolveRankedShapeTypeResultDimsBase<
           ResolveRankedShapeTypeResultDimsPass> {
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 1eba1ad94e5a5..7826650f57471 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -9,7 +9,6 @@ add_mlir_library(MLIRTransforms
   LoopCoalescing.cpp
   LoopFusion.cpp
   LoopInvariantCodeMotion.cpp
-  NormalizeMemRefs.cpp
   OpStats.cpp
   ParallelLoopCollapsing.cpp
   PipelineDataTransfer.cpp

From a70aa7bb0d9a6066831b339e0a09a2c1bc74fe2b Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Thu, 20 Jan 2022 17:32:31 -0800
Subject: [PATCH 491/946] [mlir:Transforms] Move out the remaining non-dialect
 independent transforms and utilities

This has been a major TODO for a very long time, and is necessary for establishing a proper
dialect-free dependency layering for the Transforms library. Code was moved to effectively
two main locations:

* Affine/
There was quite a bit of affine dialect related code in Transforms/ do to historical reasons
(of a time way into MLIR's past). The following headers were moved to:
Transforms/LoopFusionUtils.h -> Dialect/Affine/LoopFusionUtils.h
Transforms/LoopUtils.h -> Dialect/Affine/LoopUtils.h
Transforms/Utils.h -> Dialect/Affine/Utils.h

The following transforms were also moved:
AffineLoopFusion, AffinePipelineDataTransfer, LoopCoalescing

* SCF/
Only one SCF pass was in Transforms/ (likely accidentally placed here): ParallelLoopCollapsing
The SCF specific utilities in LoopUtils have been moved to SCF/Utils.h

* Misc:
mlir::moveLoopInvariantCode was also moved to LoopLikeInterface.h given
that it is a simple utility defined in terms of LoopLikeOpInterface.

Differential Revision: https://reviews.llvm.org/D117848
---
 .../Affine}/LoopFusionUtils.h                 |   6 +-
 .../Affine}/LoopUtils.h                       |  48 +-
 mlir/include/mlir/Dialect/Affine/Passes.h     |  21 +
 mlir/include/mlir/Dialect/Affine/Passes.td    | 208 +++++
 mlir/include/mlir/Dialect/Affine/Utils.h      | 119 +++
 mlir/include/mlir/Dialect/SCF/Passes.h        |   4 +
 mlir/include/mlir/Dialect/SCF/Passes.td       |  16 +
 mlir/include/mlir/Dialect/SCF/Utils.h         |  60 ++
 .../mlir/Interfaces/LoopLikeInterface.h       |  13 +
 .../mlir/Transforms/ControlFlowSinkUtils.h    |  70 ++
 mlir/include/mlir/Transforms/Passes.h         |  26 -
 mlir/include/mlir/Transforms/Passes.td        | 224 -----
 mlir/include/mlir/Transforms/Utils.h          | 200 -----
 mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp     |   1 -
 .../SCFToStandard/SCFToStandard.cpp           |   1 -
 .../StandardToLLVM/StandardToLLVM.cpp         |   1 -
 .../Conversion/VectorToSCF/VectorToSCF.cpp    |   1 -
 .../Transforms/AffineDataCopyGeneration.cpp   |   2 +-
 .../AffineLoopInvariantCodeMotion.cpp         |   4 +-
 .../Affine/Transforms/AffineParallelize.cpp   |   2 +-
 .../Dialect/Affine/Transforms/CMakeLists.txt  |   3 +
 .../Affine}/Transforms/LoopCoalescing.cpp     |   3 +-
 .../Affine}/Transforms/LoopFusion.cpp         |   6 +-
 .../Dialect/Affine/Transforms/LoopTiling.cpp  |   4 +-
 .../Dialect/Affine/Transforms/LoopUnroll.cpp  |   2 +-
 .../Affine/Transforms/LoopUnrollAndJam.cpp    |   2 +-
 .../Dialect/Affine/Transforms/PassDetail.h    |   5 +
 .../Transforms/PipelineDataTransfer.cpp       |   7 +-
 .../Transforms/SimplifyAffineStructures.cpp   |   2 +-
 mlir/lib/Dialect/Affine/Utils/CMakeLists.txt  |   3 +
 .../Affine}/Utils/LoopFusionUtils.cpp         |   5 +-
 .../Affine}/Utils/LoopUtils.cpp               | 663 +--------------
 mlir/lib/Dialect/Affine/Utils/Utils.cpp       | 741 ++++++++++++++++-
 .../GPU/Transforms/MemoryPromotion.cpp        |   2 +-
 .../Linalg/Transforms/CodegenStrategy.cpp     |   2 -
 .../Linalg/Transforms/HoistPadding.cpp        |   2 -
 .../Dialect/Linalg/Transforms/Hoisting.cpp    |   2 -
 .../Transforms/LinalgStrategyPasses.cpp       |  12 +-
 .../Dialect/Linalg/Transforms/Transforms.cpp  |   1 -
 mlir/lib/Dialect/Linalg/Utils/Utils.cpp       |   2 +-
 .../MemRef/Transforms/NormalizeMemRefs.cpp    |   2 +-
 .../lib/Dialect/SCF/Transforms/CMakeLists.txt |   1 +
 .../Transforms/ParallelLoopCollapsing.cpp     |   6 +-
 mlir/lib/Dialect/SCF/Transforms/Utils.cpp     | 694 +++++++++++++++-
 .../VectorTransferSplitRewritePatterns.cpp    |   1 -
 mlir/lib/Dialect/Vector/VectorTransforms.cpp  |   1 -
 .../Dialect/Vector/VectorUnrollDistribute.cpp |   1 -
 mlir/lib/Interfaces/LoopLikeInterface.cpp     |  83 ++
 mlir/lib/Transforms/CMakeLists.txt            |   9 -
 mlir/lib/Transforms/CSE.cpp                   |   1 -
 mlir/lib/Transforms/ControlFlowSink.cpp       |   2 +-
 .../Transforms/LoopInvariantCodeMotion.cpp    |  79 +-
 mlir/lib/Transforms/PassDetail.h              |  15 -
 mlir/lib/Transforms/Utils/CMakeLists.txt      |  13 -
 .../Transforms/Utils/ControlFlowSinkUtils.cpp |   2 +-
 .../Transforms/Utils/DialectConversion.cpp    |   1 -
 mlir/lib/Transforms/Utils/Utils.cpp           | 767 ------------------
 mlir/test/lib/Dialect/Affine/CMakeLists.txt   |   2 +
 .../lib/Dialect/Affine/TestAffineDataCopy.cpp |   2 +-
 .../Affine/TestAffineLoopParametricTiling.cpp |   2 +-
 .../Affine}/TestLoopFusion.cpp                |   5 +-
 .../Affine}/TestLoopMapping.cpp               |   3 +-
 .../Dialect/Affine/TestLoopPermutation.cpp    |   3 +-
 .../Dialect/Affine/TestVectorizationUtils.cpp |   2 +-
 mlir/test/lib/Dialect/SCF/CMakeLists.txt      |   2 +
 .../SCF}/TestLoopParametricTiling.cpp         |   6 +-
 .../SCF}/TestLoopUnrolling.cpp                |   3 +-
 mlir/test/lib/Transforms/CMakeLists.txt       |   4 -
 mlir/test/lib/Transforms/TestConstantFold.cpp |   1 -
 mlir/unittests/Transforms/CMakeLists.txt      |   1 +
 70 files changed, 2129 insertions(+), 2081 deletions(-)
 rename mlir/include/mlir/{Transforms => Dialect/Affine}/LoopFusionUtils.h (98%)
 rename mlir/include/mlir/{Transforms => Dialect/Affine}/LoopUtils.h (85%)
 create mode 100644 mlir/include/mlir/Transforms/ControlFlowSinkUtils.h
 delete mode 100644 mlir/include/mlir/Transforms/Utils.h
 rename mlir/lib/{ => Dialect/Affine}/Transforms/LoopCoalescing.cpp (97%)
 rename mlir/lib/{ => Dialect/Affine}/Transforms/LoopFusion.cpp (99%)
 rename mlir/lib/{ => Dialect/Affine}/Transforms/PipelineDataTransfer.cpp (99%)
 rename mlir/lib/{Transforms => Dialect/Affine}/Utils/LoopFusionUtils.cpp (99%)
 rename mlir/lib/{Transforms => Dialect/Affine}/Utils/LoopUtils.cpp (81%)
 rename mlir/lib/{ => Dialect/SCF}/Transforms/ParallelLoopCollapsing.cpp (91%)
 delete mode 100644 mlir/lib/Transforms/Utils/Utils.cpp
 rename mlir/test/lib/{Transforms => Dialect/Affine}/TestLoopFusion.cpp (98%)
 rename mlir/test/lib/{Transforms => Dialect/Affine}/TestLoopMapping.cpp (96%)
 rename mlir/test/lib/{Transforms => Dialect/SCF}/TestLoopParametricTiling.cpp (93%)
 rename mlir/test/lib/{Transforms => Dialect/SCF}/TestLoopUnrolling.cpp (97%)

diff --git a/mlir/include/mlir/Transforms/LoopFusionUtils.h b/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h
similarity index 98%
rename from mlir/include/mlir/Transforms/LoopFusionUtils.h
rename to mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h
index b26d023873b45..486d142e0a20e 100644
--- a/mlir/include/mlir/Transforms/LoopFusionUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_TRANSFORMS_LOOPFUSIONUTILS_H
-#define MLIR_TRANSFORMS_LOOPFUSIONUTILS_H
+#ifndef MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
+#define MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
 
 #include "mlir/IR/Value.h"
 #include "mlir/Support/LLVM.h"
@@ -167,4 +167,4 @@ void gatherProducerConsumerMemrefs(ArrayRef srcOps,
                                    DenseSet &producerConsumerMemrefs);
 } // namespace mlir
 
-#endif // MLIR_TRANSFORMS_LOOPFUSIONUTILS_H
+#endif // MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
similarity index 85%
rename from mlir/include/mlir/Transforms/LoopUtils.h
rename to mlir/include/mlir/Dialect/Affine/LoopUtils.h
index d4d0d14d73fb5..bcaf864a43331 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_TRANSFORMS_LOOPUTILS_H
-#define MLIR_TRANSFORMS_LOOPUTILS_H
+#ifndef MLIR_DIALECT_AFFINE_LOOPUTILS_H
+#define MLIR_DIALECT_AFFINE_LOOPUTILS_H
 
 #include "mlir/IR/Block.h"
 #include "mlir/Support/LLVM.h"
@@ -45,9 +45,6 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
 LogicalResult loopUnrollByFactor(
     AffineForOp forOp, uint64_t unrollFactor,
     function_ref annotateFn = nullptr);
-LogicalResult loopUnrollByFactor(
-    scf::ForOp forOp, uint64_t unrollFactor,
-    function_ref annotateFn = nullptr);
 
 /// Unrolls this loop by the specified unroll factor or its trip count,
 /// whichever is lower.
@@ -63,8 +60,6 @@ bool LLVM_ATTRIBUTE_UNUSED isPerfectlyNested(ArrayRef loops);
 /// AffineForOp, and the second op is a terminator).
 void getPerfectlyNestedLoops(SmallVectorImpl &nestedLoops,
                              AffineForOp root);
-void getPerfectlyNestedLoops(SmallVectorImpl &nestedLoops,
-                             scf::ForOp root);
 
 /// Unrolls and jams this loop by the specified factor. `forOp` can be a loop
 /// with iteration arguments performing supported reductions and its inner loops
@@ -78,10 +73,9 @@ LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
 LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
                                       uint64_t unrollJamFactor);
 
-/// Promotes the loop body of a AffineForOp/scf::ForOp to its containing block
-/// if the loop was known to have a single iteration.
+/// Promotes the loop body of a AffineForOp to its containing block if the loop
+/// was known to have a single iteration.
 LogicalResult promoteIfSingleIteration(AffineForOp forOp);
-LogicalResult promoteIfSingleIteration(scf::ForOp forOp);
 
 /// Promotes all single iteration AffineForOp's in the Function, i.e., moves
 /// their body into the containing Block.
@@ -146,13 +140,9 @@ AffineForOp sinkSequentialLoops(AffineForOp forOp);
 /// occurrence in `forOps`, under each of the `targets`.
 /// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
 /// nested immediately under each of `targets`.
-using Loops = SmallVector;
-using TileLoops = std::pair;
 SmallVector, 8> tile(ArrayRef forOps,
                                                  ArrayRef sizes,
                                                  ArrayRef targets);
-SmallVector tile(ArrayRef forOps, ArrayRef sizes,
-                           ArrayRef targets);
 
 /// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
 /// and sinking them, in their order of occurrence in `forOps`, under `target`.
@@ -160,15 +150,6 @@ SmallVector tile(ArrayRef forOps, ArrayRef sizes,
 /// `target`.
 SmallVector tile(ArrayRef forOps,
                                  ArrayRef sizes, AffineForOp target);
-Loops tile(ArrayRef forOps, ArrayRef sizes,
-           scf::ForOp target);
-
-/// Tile a nest of scf::ForOp loops rooted at `rootForOp` with the given
-/// (parametric) sizes. Sizes are expected to be strictly positive values at
-/// runtime.  If more sizes than loops are provided, discard the trailing values
-/// in sizes.  Assumes the loop nest is permutable.
-/// Returns the newly created intra-tile loops.
-Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef sizes);
 
 /// Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
 struct AffineCopyOptions {
@@ -236,16 +217,6 @@ LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion,
                                        const AffineCopyOptions ©Options,
                                        CopyGenerateResult &result);
 
-/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
-/// parametric tile sizes that the outer loops have a fixed number of iterations
-/// as defined in `sizes`.
-TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef sizes);
-
-/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
-/// `loops` contains a list of perfectly nested loops with bounds and steps
-/// independent of any loop induction variable involved in the nest.
-void coalesceLoops(MutableArrayRef loops);
-
 /// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
 /// `loops` contains a list of perfectly nested loops outermost to innermost
 /// that are normalized (step one and lower bound of zero) and with bounds and
@@ -254,12 +225,6 @@ void coalesceLoops(MutableArrayRef loops);
 /// be representable using affine.for.
 LogicalResult coalesceLoops(MutableArrayRef loops);
 
-/// Take the ParallelLoop and for each set of dimension indices, combine them
-/// into a single dimension. combinedDimensions must contain each index into
-/// loops exactly once.
-void collapseParallelLoops(scf::ParallelOp loops,
-                           ArrayRef> combinedDimensions);
-
 /// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of
 /// size given by `numProcessors`. This is achieved by embedding the SSA values
 /// corresponding to `processorIds` and `numProcessors` into the bounds and step
@@ -321,9 +286,6 @@ LogicalResult
 separateFullTiles(MutableArrayRef nest,
                   SmallVectorImpl *fullTileNest = nullptr);
 
-/// Move loop invariant code out of `looplike`.
-LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike);
-
 } // namespace mlir
 
-#endif // MLIR_TRANSFORMS_LOOPUTILS_H
+#endif // MLIR_DIALECT_AFFINE_LOOPUTILS_H
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index e4f6d2034902f..8a94262a298b2 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -21,6 +21,10 @@ namespace mlir {
 
 class AffineForOp;
 
+/// Fusion mode to attempt. The default mode `Greedy` does both
+/// producer-consumer and sibling fusion.
+enum FusionMode { Greedy, ProducerConsumer, Sibling };
+
 /// Creates a simplification pass for affine structures (maps and sets). In
 /// addition, this pass also normalizes memrefs to have the trivial (identity)
 /// layout map.
@@ -53,6 +57,19 @@ std::unique_ptr> createAffineDataCopyGenerationPass();
 /// dead allocs.
 std::unique_ptr> createAffineScalarReplacementPass();
 
+/// Creates a pass that transforms perfectly nested loops with independent
+/// bounds into a single loop.
+std::unique_ptr> createLoopCoalescingPass();
+
+/// Creates a loop fusion pass which fuses loops according to type of fusion
+/// specified in `fusionMode`. Buffers of size less than or equal to
+/// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`.
+std::unique_ptr>
+createLoopFusionPass(unsigned fastMemorySpace = 0,
+                     uint64_t localBufSizeThreshold = 0,
+                     bool maximalFusion = false,
+                     enum FusionMode fusionMode = FusionMode::Greedy);
+
 /// Creates a pass to perform tiling on loop nests.
 std::unique_ptr>
 createLoopTilingPass(uint64_t cacheSizeBytes);
@@ -76,6 +93,10 @@ std::unique_ptr> createLoopUnrollPass(
 std::unique_ptr>
 createLoopUnrollAndJamPass(int unrollJamFactor = -1);
 
+/// Creates a pass to pipeline explicit movement of data across levels of the
+/// memory hierarchy.
+std::unique_ptr> createPipelineDataTransferPass();
+
 /// Creates a pass to vectorize loops, operations and data types using a
 /// target-independent, n-D super-vector abstraction.
 std::unique_ptr>
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 8ec4b84ea5ca0..d67bd33ef9d62 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -43,6 +43,138 @@ def AffineDataCopyGeneration : Pass<"affine-data-copy-generate", "FuncOp"> {
   ];
 }
 
+def AffineLoopFusion : Pass<"affine-loop-fusion", "FuncOp"> {
+  let summary = "Fuse affine loop nests";
+  let description = [{
+    This pass performs fusion of loop nests using a slicing-based approach. It
+    combines two fusion strategies: producer-consumer fusion and sibling fusion.
+    Producer-consumer fusion is aimed at fusing pairs of loops where the first
+    one writes to a memref that the second reads. Sibling fusion targets pairs
+    of loops that share no dependences between them but that load from the same
+    memref. The fused loop nests, when possible, are rewritten to access
+    significantly smaller local buffers instead of the original memref's, and
+    the latter are often either completely optimized away or contracted. This
+    transformation leads to enhanced locality and lower memory footprint through
+    the elimination or contraction of temporaries/intermediate memref's. These
+    benefits are sometimes achieved at the expense of redundant computation
+    through a cost model that evaluates available choices such as the depth at
+    which a source slice should be materialized in the designation slice.
+
+    Example 1: Producer-consumer fusion.
+    Input:
+    ```mlir
+    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
+      %0 = memref.alloc() : memref<10xf32>
+      %1 = memref.alloc() : memref<10xf32>
+      %cst = arith.constant 0.000000e+00 : f32
+      affine.for %arg2 = 0 to 10 {
+        affine.store %cst, %0[%arg2] : memref<10xf32>
+        affine.store %cst, %1[%arg2] : memref<10xf32>
+      }
+      affine.for %arg2 = 0 to 10 {
+        %2 = affine.load %0[%arg2] : memref<10xf32>
+        %3 = arith.addf %2, %2 : f32
+        affine.store %3, %arg0[%arg2] : memref<10xf32>
+      }
+      affine.for %arg2 = 0 to 10 {
+        %2 = affine.load %1[%arg2] : memref<10xf32>
+        %3 = arith.mulf %2, %2 : f32
+        affine.store %3, %arg1[%arg2] : memref<10xf32>
+      }
+      return
+    }
+    ```
+    Output:
+    ```mlir
+    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
+      %0 = memref.alloc() : memref<1xf32>
+      %1 = memref.alloc() : memref<1xf32>
+      %cst = arith.constant 0.000000e+00 : f32
+      affine.for %arg2 = 0 to 10 {
+        affine.store %cst, %0[0] : memref<1xf32>
+        affine.store %cst, %1[0] : memref<1xf32>
+        %2 = affine.load %1[0] : memref<1xf32>
+        %3 = arith.mulf %2, %2 : f32
+        affine.store %3, %arg1[%arg2] : memref<10xf32>
+        %4 = affine.load %0[0] : memref<1xf32>
+        %5 = arith.addf %4, %4 : f32
+        affine.store %5, %arg0[%arg2] : memref<10xf32>
+      }
+      return
+    }
+    ```
+
+    Example 2: Sibling fusion.
+    Input:
+    ```mlir
+    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
+                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
+                         %arg4: memref<10x10xf32>) {
+      affine.for %arg5 = 0 to 3 {
+        affine.for %arg6 = 0 to 3 {
+          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
+          %2 = arith.mulf %0, %1 : f32
+          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
+        }
+      }
+      affine.for %arg5 = 0 to 3 {
+        affine.for %arg6 = 0 to 3 {
+          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
+          %2 = arith.addf %0, %1 : f32
+          affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32>
+        }
+      }
+      return
+    }
+    ```
+    Output:
+    ```mlir
+    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
+                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
+                         %arg4: memref<10x10xf32>) {
+      affine.for %arg5 = 0 to 3 {
+        affine.for %arg6 = 0 to 3 {
+          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
+          %2 = arith.mulf %0, %1 : f32
+          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
+          %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
+          %5 = arith.addf %3, %4 : f32
+          affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32>
+        }
+      }
+      return
+    }
+    ```
+  }];
+  let constructor = "mlir::createLoopFusionPass()";
+  let options = [
+    Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
+           /*default=*/"0.30f", "Fractional increase in additional computation "
+                                "tolerated while fusing">,
+    Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
+           /*default=*/"0",
+           "Faster memory space number to promote fusion buffers to">,
+    Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
+           /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
+                            "to fast memory space">,
+    Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
+           "Enables maximal loop fusion">,
+    Option<"affineFusionMode", "mode", "enum FusionMode",
+           "mlir::FusionMode::Greedy", "fusion mode to attempt",
+           "llvm::cl::values(clEnumValN(mlir::FusionMode::Greedy,"
+           " \"greedy\", \"Perform greedy (both producer-consumer and sibling)  fusion\"), "
+           "clEnumValN( mlir::FusionMode::ProducerConsumer, "
+           "\"producer\", \"Perform only producer-consumer fusion\"), "
+           "clEnumValN( mlir::FusionMode::Sibling, "
+           "\"sibling\", \"Perform only sibling fusion\"))">,
+    ];
+  let dependentDialects = ["memref::MemRefDialect"];
+}
+
 def AffineLoopInvariantCodeMotion
     : Pass<"affine-loop-invariant-code-motion", "FuncOp"> {
   let summary = "Hoist loop invariant instructions outside of affine loops";
@@ -94,6 +226,75 @@ def AffineLoopUnrollAndJam : Pass<"affine-loop-unroll-jam", "FuncOp"> {
   ];
 }
 
+def AffinePipelineDataTransfer
+    : Pass<"affine-pipeline-data-transfer", "FuncOp"> {
+  let summary = "Pipeline non-blocking data transfers between explicitly "
+                "managed levels of the memory hierarchy";
+  let description = [{
+    This pass performs a transformation to overlap non-blocking DMA operations
+    in a loop with computations through double buffering. This is achieved by
+    advancing dma_start operations with respect to other operations.
+
+    Input
+
+    ```mlir
+    func @pipelinedatatransfer() {
+      %0 = memref.alloc() : memref<256xf32>
+      %1 = memref.alloc() : memref<32xf32, 1>
+      %2 = memref.alloc() : memref<1xf32>
+      %c0 = arith.constant 0 : index
+      %c128 = arith.constant 128 : index
+      affine.for %i0 = 0 to 8 {
+        affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
+        affine.dma_wait %2[%c0], %c128 : memref<1xf32>
+        %3 = affine.load %1[%i0] : memref<32xf32, 1>
+        %4 = "compute"(%3) : (f32) -> f32
+        affine.store %4, %1[%i0] : memref<32xf32, 1>
+      }
+      return
+    }
+    ```
+
+    Output
+
+    ```mlir
+    module {
+      func @pipelinedatatransfer() {
+        %c8 = arith.constant 8 : index
+        %c0 = arith.constant 0 : index
+        %0 = memref.alloc() : memref<256xf32>
+        %c0_0 = arith.constant 0 : index
+        %c128 = arith.constant 128 : index
+        %1 = memref.alloc() : memref<2x32xf32, 1>
+        %2 = memref.alloc() : memref<2x1xf32>
+        affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
+        affine.for %arg0 = 1 to 8 {
+          affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
+          %8 = affine.apply #map3(%arg0)
+          %9 = affine.apply #map4(%8)
+          %10 = affine.apply #map4(%8)
+          affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
+          %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
+          %12 = "compute"(%11) : (f32) -> f32
+          affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
+        }
+        %3 = affine.apply #map3(%c8)
+        %4 = affine.apply #map4(%3)
+        %5 = affine.apply #map4(%3)
+        affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
+        %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
+        %7 = "compute"(%6) : (f32) -> f32
+        affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
+        memref.dealloc %2 : memref<2x1xf32>
+        memref.dealloc %1 : memref<2x32xf32, 1>
+        return
+      }
+    }
+    ```
+  }];
+  let constructor = "mlir::createPipelineDataTransferPass()";
+}
+
 def AffineScalarReplacement : Pass<"affine-scalrep", "FuncOp"> {
   let summary = "Replace affine memref acceses by scalars by forwarding stores "
                 "to loads and eliminating redundant loads";
@@ -184,6 +385,13 @@ def AffineLoopNormalize : Pass<"affine-loop-normalize", "FuncOp"> {
   let constructor = "mlir::createAffineLoopNormalizePass()";
 }
 
+def LoopCoalescing : Pass<"loop-coalescing", "FuncOp"> {
+  let summary = "Coalesce nested loops with independent bounds into a single "
+                "loop";
+  let constructor = "mlir::createLoopCoalescingPass()";
+  let dependentDialects = ["arith::ArithmeticDialect"];
+}
+
 def SimplifyAffineStructures : Pass<"simplify-affine-structures", "FuncOp"> {
   let summary = "Simplify affine expressions in maps/sets and normalize "
                 "memrefs";
diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
index a0cb63d7fb9e5..bcbb7446df548 100644
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -24,6 +24,10 @@ class DominanceInfo;
 class Operation;
 class PostDominanceInfo;
 
+namespace memref {
+class AllocOp;
+} // namespace memref
+
 struct LogicalResult;
 
 using ReductionLoopMap = DenseMap>;
@@ -168,6 +172,121 @@ void normalizeAffineFor(AffineForOp op);
 AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min,
                         AffineExpr max, bool positivePath = true);
 
+/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
+/// optionally remapping the old memref's indices using the supplied affine map,
+/// `indexRemap`. The new memref could be of a different shape or rank.
+/// `extraIndices` provides any additional access indices to be added to the
+/// start.
+///
+/// `indexRemap` remaps indices of the old memref access to a new set of indices
+/// that are used to index the memref. Additional input operands to indexRemap
+/// can be optionally provided in `extraOperands`, and they occupy the start
+/// of its input list. `indexRemap`'s dimensional inputs are expected to
+/// correspond to memref's indices, and its symbolic inputs if any should be
+/// provided in `symbolOperands`.
+///
+/// `domOpFilter`, if non-null, restricts the replacement to only those
+/// operations that are dominated by the former; similarly, `postDomOpFilter`
+/// restricts replacement to only those operations that are postdominated by it.
+///
+/// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing
+/// uses of a memref without any requirement for access index rewrites as long
+/// as the user operation has the MemRefsNormalizable trait. The default value
+/// of this flag is false.
+///
+/// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to
+/// also be a candidate for replacement. The default value of this flag is
+/// false.
+///
+/// Returns true on success and false if the replacement is not possible,
+/// whenever a memref is used as an operand in a non-dereferencing context and
+/// 'allowNonDereferencingOps' is false, except for dealloc's on the memref
+/// which are left untouched. See comments at function definition for an
+/// example.
+//
+//  Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]:
+//  The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and
+//  index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0,
+//  d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any
+//  extra operands, note that 'indexRemap' would just be applied to existing
+//  indices (%i, %j).
+//  TODO: allow extraIndices to be added at any position.
+LogicalResult replaceAllMemRefUsesWith(
+    Value oldMemRef, Value newMemRef, ArrayRef extraIndices = {},
+    AffineMap indexRemap = AffineMap(), ArrayRef extraOperands = {},
+    ArrayRef symbolOperands = {}, Operation *domOpFilter = nullptr,
+    Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false,
+    bool replaceInDeallocOp = false);
+
+/// Performs the same replacement as the other version above but only for the
+/// dereferencing uses of `oldMemRef` in `op`, except in cases where
+/// 'allowNonDereferencingOps' is set to true where we replace the
+/// non-dereferencing uses as well.
+LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
+                                       Operation *op,
+                                       ArrayRef extraIndices = {},
+                                       AffineMap indexRemap = AffineMap(),
+                                       ArrayRef extraOperands = {},
+                                       ArrayRef symbolOperands = {},
+                                       bool allowNonDereferencingOps = false);
+
+/// Rewrites the memref defined by this alloc op to have an identity layout map
+/// and updates all its indexing uses. Returns failure if any of its uses
+/// escape (while leaving the IR in a valid state).
+LogicalResult normalizeMemRef(memref::AllocOp *op);
+
+/// Uses the old memref type map layout and computes the new memref type to have
+/// a new shape and a layout map, where the old layout map has been normalized
+/// to an identity layout map. It returns the old memref in case no
+/// normalization was needed or a failure occurs while transforming the old map
+/// layout to an identity layout map.
+MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder,
+                               unsigned numSymbolicOperands);
+
+/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
+/// its results equal to the number of operands, as a composition
+/// of all other AffineApplyOps reachable from input parameter 'operands'. If
+/// different operands were drawing results from multiple affine apply ops,
+/// these will also be collected into a single (multi-result) affine apply op.
+/// The final results of the composed AffineApplyOp are returned in output
+/// parameter 'results'. Returns the affine apply op created.
+Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
+                                       ArrayRef operands,
+                                       ArrayRef affineApplyOps,
+                                       SmallVectorImpl *results);
+
+/// Given an operation, inserts one or more single result affine apply
+/// operations, results of which are exclusively used by this operation.
+/// The operands of these newly created affine apply ops are
+/// guaranteed to be loop iterators or terminal symbols of a function.
+///
+/// Before
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   send %A[%idx], ...
+///   %v = "compute"(%idx, ...)
+///
+/// After
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   send %A[%idx], ...
+///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
+///   %v = "compute"(%idx_, ...)
+
+/// This allows the application of different transformations on send and
+/// compute (for eg. different shifts/delays)
+///
+/// Fills `sliceOps` with the list of affine.apply operations.
+/// In the following cases, `sliceOps` remains empty:
+///   1. If none of opInst's operands were the result of an affine.apply
+///      (i.e., there was no affine computation slice to create).
+///   2. If all the affine.apply op's supplying operands to this opInst did not
+///      have any uses other than those in this opInst.
+void createAffineComputationSlice(Operation *opInst,
+                                  SmallVectorImpl *sliceOps);
+
 } // namespace mlir
 
 #endif // MLIR_DIALECT_AFFINE_UTILS_H
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h
index e6123617f656e..bc9ed4f5b5c68 100644
--- a/mlir/include/mlir/Dialect/SCF/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Passes.h
@@ -32,6 +32,10 @@ std::unique_ptr createForLoopPeelingPass();
 /// inside of scf.for loops with known lower and upper bounds.
 std::unique_ptr createSCFForLoopCanonicalizationPass();
 
+/// Creates a pass that transforms a single ParallelLoop over N induction
+/// variables into another ParallelLoop over less than N induction variables.
+std::unique_ptr createParallelLoopCollapsingPass();
+
 /// Creates a loop fusion pass which fuses parallel loops.
 std::unique_ptr createParallelLoopFusionPass();
 
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td
index 45b6f8c92ee72..9e151440792ef 100644
--- a/mlir/include/mlir/Dialect/SCF/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Passes.td
@@ -52,6 +52,22 @@ def SCFParallelLoopFusion : Pass<"parallel-loop-fusion"> {
   let constructor = "mlir::createParallelLoopFusionPass()";
 }
 
+def SCFParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
+  let summary = "Collapse parallel loops to use less induction variables";
+  let constructor = "mlir::createParallelLoopCollapsingPass()";
+  let options = [
+    ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
+               "Which loop indices to combine 0th loop index",
+               "llvm::cl::MiscFlags::CommaSeparated">,
+    ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned",
+               "Which loop indices to combine into the position 1 loop index",
+               "llvm::cl::MiscFlags::CommaSeparated">,
+    ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned",
+               "Which loop indices to combine into the position 2 loop index",
+               "llvm::cl::MiscFlags::CommaSeparated">,
+  ];
+}
+
 def SCFParallelLoopSpecialization
     : Pass<"parallel-loop-specialization", "FuncOp"> {
   let summary = "Specialize parallel loops for vectorization";
diff --git a/mlir/include/mlir/Dialect/SCF/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils.h
index a062783d0bf60..38d80a0826dc9 100644
--- a/mlir/include/mlir/Dialect/SCF/Utils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils.h
@@ -98,5 +98,65 @@ getSCFMinMaxExpr(Value value, SmallVectorImpl &dims,
                  SmallVectorImpl &symbols,
                  llvm::function_ref loopFilter = nullptr);
 
+/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
+/// `loops` contains a list of perfectly nested loops with bounds and steps
+/// independent of any loop induction variable involved in the nest.
+void coalesceLoops(MutableArrayRef loops);
+
+/// Take the ParallelLoop and for each set of dimension indices, combine them
+/// into a single dimension. combinedDimensions must contain each index into
+/// loops exactly once.
+void collapseParallelLoops(scf::ParallelOp loops,
+                           ArrayRef> combinedDimensions);
+
+/// Promotes the loop body of a scf::ForOp to its containing block if the loop
+/// was known to have a single iteration.
+LogicalResult promoteIfSingleIteration(scf::ForOp forOp);
+
+/// Unrolls this for operation by the specified unroll factor. Returns failure
+/// if the loop cannot be unrolled either due to restrictions or due to invalid
+/// unroll factors. Requires positive loop bounds and step. If specified,
+/// annotates the Ops in each unrolled iteration by applying `annotateFn`.
+LogicalResult loopUnrollByFactor(
+    scf::ForOp forOp, uint64_t unrollFactor,
+    function_ref annotateFn = nullptr);
+
+/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
+/// parametric tile sizes that the outer loops have a fixed number of iterations
+/// as defined in `sizes`.
+using Loops = SmallVector;
+using TileLoops = std::pair;
+TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef sizes);
+
+/// Performs tiling fo imperfectly nested loops (with interchange) by
+/// strip-mining the `forOps` by `sizes` and sinking them, in their order of
+/// occurrence in `forOps`, under each of the `targets`.
+/// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
+/// nested immediately under each of `targets`.
+SmallVector tile(ArrayRef forOps, ArrayRef sizes,
+                           ArrayRef targets);
+
+/// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
+/// and sinking them, in their order of occurrence in `forOps`, under `target`.
+/// Returns the new AffineForOps, one per `forOps`, nested immediately under
+/// `target`.
+Loops tile(ArrayRef forOps, ArrayRef sizes,
+           scf::ForOp target);
+
+/// Tile a nest of scf::ForOp loops rooted at `rootForOp` with the given
+/// (parametric) sizes. Sizes are expected to be strictly positive values at
+/// runtime.  If more sizes than loops are provided, discard the trailing values
+/// in sizes.  Assumes the loop nest is permutable.
+/// Returns the newly created intra-tile loops.
+Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef sizes);
+
+/// Get perfectly nested sequence of loops starting at root of loop nest
+/// (the first op being another AffineFor, and the second op - a terminator).
+/// A loop is perfectly nested iff: the first op in the loop's body is another
+/// AffineForOp, and the second op is a terminator).
+void getPerfectlyNestedLoops(SmallVectorImpl &nestedLoops,
+                             scf::ForOp root);
+
 } // namespace mlir
+
 #endif // MLIR_DIALECT_SCF_UTILS_H_
diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.h b/mlir/include/mlir/Interfaces/LoopLikeInterface.h
index 48399ad0d53a8..df4631690a3f9 100644
--- a/mlir/include/mlir/Interfaces/LoopLikeInterface.h
+++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.h
@@ -15,7 +15,20 @@
 
 #include "mlir/IR/OpDefinition.h"
 
+//===----------------------------------------------------------------------===//
+// LoopLike Interfaces
+//===----------------------------------------------------------------------===//
+
 /// Include the generated interface declarations.
 #include "mlir/Interfaces/LoopLikeInterface.h.inc"
 
+//===----------------------------------------------------------------------===//
+// LoopLike Utilities
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+/// Move loop invariant code out of a `looplike` operation.
+LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike);
+} // namespace mlir
+
 #endif // MLIR_INTERFACES_LOOPLIKEINTERFACE_H_
diff --git a/mlir/include/mlir/Transforms/ControlFlowSinkUtils.h b/mlir/include/mlir/Transforms/ControlFlowSinkUtils.h
new file mode 100644
index 0000000000000..f45d753564f45
--- /dev/null
+++ b/mlir/include/mlir/Transforms/ControlFlowSinkUtils.h
@@ -0,0 +1,70 @@
+//===- ControlFlowSinkUtils.h - ControlFlow Sink Utils ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TRANSFORMS_CONTROLFLOWSINKUTILS_H
+#define MLIR_TRANSFORMS_CONTROLFLOWSINKUTILS_H
+
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+
+class DominanceInfo;
+class Operation;
+class Region;
+class RegionBranchOpInterface;
+
+/// Given a list of regions, perform control flow sinking on them. For each
+/// region, control-flow sinking moves operations that dominate the region but
+/// whose only users are in the region into the regions so that they aren't
+/// executed on paths where their results are not needed.
+///
+/// TODO: For the moment, this is a *simple* control-flow sink, i.e., no
+/// duplicating of ops. It should be made to accept a cost model to determine
+/// whether duplicating a particular op is profitable.
+///
+/// Example:
+///
+/// ```mlir
+/// %0 = arith.addi %arg0, %arg1
+/// scf.if %cond {
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// After control-flow sink:
+///
+/// ```mlir
+/// scf.if %cond {
+///   %0 = arith.addi %arg0, %arg1
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// Users must supply a callback `shouldMoveIntoRegion` that determines whether
+/// the given operation that only has users in the given operation should be
+/// moved into that region.
+///
+/// Returns the number of operations sunk.
+size_t
+controlFlowSink(ArrayRef regions, DominanceInfo &domInfo,
+                function_ref shouldMoveIntoRegion);
+
+/// Populates `regions` with regions of the provided region branch op that are
+/// executed at most once at that are reachable given the current operands of
+/// the op. These regions can be passed to `controlFlowSink` to perform sinking
+/// on the regions of the operation.
+void getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
+                                    SmallVectorImpl ®ions);
+
+} // namespace mlir
+
+#endif // MLIR_TRANSFORMS_CONTROLFLOWSINKUTILS_H
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 4876d705afcb9..e2b5f14d34adf 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -22,13 +22,8 @@
 
 namespace mlir {
 
-class AffineForOp;
 class GreedyRewriteConfig;
 
-/// Fusion mode to attempt. The default mode `Greedy` does both
-/// producer-consumer and sibling fusion.
-enum FusionMode { Greedy, ProducerConsumer, Sibling };
-
 //===----------------------------------------------------------------------===//
 // Passes
 //===----------------------------------------------------------------------===//
@@ -56,31 +51,10 @@ std::unique_ptr createControlFlowSinkPass();
 /// Creates a pass to perform common sub expression elimination.
 std::unique_ptr createCSEPass();
 
-/// Creates a loop fusion pass which fuses loops according to type of fusion
-/// specified in `fusionMode`. Buffers of size less than or equal to
-/// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`.
-std::unique_ptr>
-createLoopFusionPass(unsigned fastMemorySpace = 0,
-                     uint64_t localBufSizeThreshold = 0,
-                     bool maximalFusion = false,
-                     enum FusionMode fusionMode = FusionMode::Greedy);
-
 /// Creates a loop invariant code motion pass that hoists loop invariant
 /// instructions out of the loop.
 std::unique_ptr createLoopInvariantCodeMotionPass();
 
-/// Creates a pass to pipeline explicit movement of data across levels of the
-/// memory hierarchy.
-std::unique_ptr> createPipelineDataTransferPass();
-
-/// Creates a pass that transforms perfectly nested loops with independent
-/// bounds into a single loop.
-std::unique_ptr> createLoopCoalescingPass();
-
-/// Creates a pass that transforms a single ParallelLoop over N induction
-/// variables into another ParallelLoop over less than N induction variables.
-std::unique_ptr createParallelLoopCollapsingPass();
-
 /// Creates a pass to strip debug information from a function.
 std::unique_ptr createStripDebugInfoPass();
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 44bf475af24c9..3c3b43d75e440 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -16,207 +16,6 @@
 include "mlir/Pass/PassBase.td"
 include "mlir/Rewrite/PassUtil.td"
 
-def AffineLoopFusion : Pass<"affine-loop-fusion", "FuncOp"> {
-  let summary = "Fuse affine loop nests";
-  let description = [{
-    This pass performs fusion of loop nests using a slicing-based approach. It
-    combines two fusion strategies: producer-consumer fusion and sibling fusion.
-    Producer-consumer fusion is aimed at fusing pairs of loops where the first
-    one writes to a memref that the second reads. Sibling fusion targets pairs
-    of loops that share no dependences between them but that load from the same
-    memref. The fused loop nests, when possible, are rewritten to access
-    significantly smaller local buffers instead of the original memref's, and
-    the latter are often either completely optimized away or contracted. This
-    transformation leads to enhanced locality and lower memory footprint through
-    the elimination or contraction of temporaries/intermediate memref's. These
-    benefits are sometimes achieved at the expense of redundant computation
-    through a cost model that evaluates available choices such as the depth at
-    which a source slice should be materialized in the designation slice.
-
-    Example 1: Producer-consumer fusion.
-    Input:
-    ```mlir
-    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
-      %0 = memref.alloc() : memref<10xf32>
-      %1 = memref.alloc() : memref<10xf32>
-      %cst = arith.constant 0.000000e+00 : f32
-      affine.for %arg2 = 0 to 10 {
-        affine.store %cst, %0[%arg2] : memref<10xf32>
-        affine.store %cst, %1[%arg2] : memref<10xf32>
-      }
-      affine.for %arg2 = 0 to 10 {
-        %2 = affine.load %0[%arg2] : memref<10xf32>
-        %3 = arith.addf %2, %2 : f32
-        affine.store %3, %arg0[%arg2] : memref<10xf32>
-      }
-      affine.for %arg2 = 0 to 10 {
-        %2 = affine.load %1[%arg2] : memref<10xf32>
-        %3 = arith.mulf %2, %2 : f32
-        affine.store %3, %arg1[%arg2] : memref<10xf32>
-      }
-      return
-    }
-    ```
-    Output:
-    ```mlir
-    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
-      %0 = memref.alloc() : memref<1xf32>
-      %1 = memref.alloc() : memref<1xf32>
-      %cst = arith.constant 0.000000e+00 : f32
-      affine.for %arg2 = 0 to 10 {
-        affine.store %cst, %0[0] : memref<1xf32>
-        affine.store %cst, %1[0] : memref<1xf32>
-        %2 = affine.load %1[0] : memref<1xf32>
-        %3 = arith.mulf %2, %2 : f32
-        affine.store %3, %arg1[%arg2] : memref<10xf32>
-        %4 = affine.load %0[0] : memref<1xf32>
-        %5 = arith.addf %4, %4 : f32
-        affine.store %5, %arg0[%arg2] : memref<10xf32>
-      }
-      return
-    }
-    ```
-
-    Example 2: Sibling fusion.
-    Input:
-    ```mlir
-    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
-                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
-                         %arg4: memref<10x10xf32>) {
-      affine.for %arg5 = 0 to 3 {
-        affine.for %arg6 = 0 to 3 {
-          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
-          %2 = arith.mulf %0, %1 : f32
-          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
-        }
-      }
-      affine.for %arg5 = 0 to 3 {
-        affine.for %arg6 = 0 to 3 {
-          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
-          %2 = arith.addf %0, %1 : f32
-          affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32>
-        }
-      }
-      return
-    }
-    ```
-    Output:
-    ```mlir
-    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
-                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
-                         %arg4: memref<10x10xf32>) {
-      affine.for %arg5 = 0 to 3 {
-        affine.for %arg6 = 0 to 3 {
-          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
-          %2 = arith.mulf %0, %1 : f32
-          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
-          %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
-          %5 = arith.addf %3, %4 : f32
-          affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32>
-        }
-      }
-      return
-    }
-    ```
-  }];
-  let constructor = "mlir::createLoopFusionPass()";
-  let options = [
-    Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
-           /*default=*/"0.30f", "Fractional increase in additional computation "
-                                "tolerated while fusing">,
-    Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
-           /*default=*/"0",
-           "Faster memory space number to promote fusion buffers to">,
-    Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
-           /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
-                            "to fast memory space">,
-    Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
-           "Enables maximal loop fusion">,
-    Option<"affineFusionMode", "mode", "enum FusionMode",
-           "mlir::FusionMode::Greedy", "fusion mode to attempt",
-           "llvm::cl::values(clEnumValN(mlir::FusionMode::Greedy,"
-           " \"greedy\", \"Perform greedy (both producer-consumer and sibling)  fusion\"), "
-           "clEnumValN( mlir::FusionMode::ProducerConsumer, "
-           "\"producer\", \"Perform only producer-consumer fusion\"), "
-           "clEnumValN( mlir::FusionMode::Sibling, "
-           "\"sibling\", \"Perform only sibling fusion\"))">,
-    ];
-  let dependentDialects = ["memref::MemRefDialect"];
-}
-
-def AffinePipelineDataTransfer
-    : Pass<"affine-pipeline-data-transfer", "FuncOp"> {
-  let summary = "Pipeline non-blocking data transfers between explicitly "
-                "managed levels of the memory hierarchy";
-  let description = [{
-    This pass performs a transformation to overlap non-blocking DMA operations
-    in a loop with computations through double buffering. This is achieved by
-    advancing dma_start operations with respect to other operations.
-
-    Input
-
-    ```mlir
-    func @pipelinedatatransfer() {
-      %0 = memref.alloc() : memref<256xf32>
-      %1 = memref.alloc() : memref<32xf32, 1>
-      %2 = memref.alloc() : memref<1xf32>
-      %c0 = arith.constant 0 : index
-      %c128 = arith.constant 128 : index
-      affine.for %i0 = 0 to 8 {
-        affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
-        affine.dma_wait %2[%c0], %c128 : memref<1xf32>
-        %3 = affine.load %1[%i0] : memref<32xf32, 1>
-        %4 = "compute"(%3) : (f32) -> f32
-        affine.store %4, %1[%i0] : memref<32xf32, 1>
-      }
-      return
-    }
-    ```
-
-    Output
-
-    ```mlir
-    module {
-      func @pipelinedatatransfer() {
-        %c8 = arith.constant 8 : index
-        %c0 = arith.constant 0 : index
-        %0 = memref.alloc() : memref<256xf32>
-        %c0_0 = arith.constant 0 : index
-        %c128 = arith.constant 128 : index
-        %1 = memref.alloc() : memref<2x32xf32, 1>
-        %2 = memref.alloc() : memref<2x1xf32>
-        affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-        affine.for %arg0 = 1 to 8 {
-          affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-          %8 = affine.apply #map3(%arg0)
-          %9 = affine.apply #map4(%8)
-          %10 = affine.apply #map4(%8)
-          affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
-          %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
-          %12 = "compute"(%11) : (f32) -> f32
-          affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
-        }
-        %3 = affine.apply #map3(%c8)
-        %4 = affine.apply #map4(%3)
-        %5 = affine.apply #map4(%3)
-        affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
-        %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
-        %7 = "compute"(%6) : (f32) -> f32
-        affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
-        memref.dealloc %2 : memref<2x1xf32>
-        memref.dealloc %1 : memref<2x32xf32, 1>
-        return
-      }
-    }
-    ```
-  }];
-  let constructor = "mlir::createPipelineDataTransferPass()";
-}
-
 def Canonicalizer : Pass<"canonicalize"> {
   let summary = "Canonicalize operations";
   let description = [{
@@ -339,34 +138,11 @@ def LocationSnapshot : Pass<"snapshot-op-locations"> {
   ];
 }
 
-def LoopCoalescing : Pass<"loop-coalescing", "FuncOp"> {
-  let summary = "Coalesce nested loops with independent bounds into a single "
-                "loop";
-  let constructor = "mlir::createLoopCoalescingPass()";
-  let dependentDialects = ["arith::ArithmeticDialect"];
-}
-
 def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
   let summary = "Hoist loop invariant instructions outside of the loop";
   let constructor = "mlir::createLoopInvariantCodeMotionPass()";
 }
 
-def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
-  let summary = "Collapse parallel loops to use less induction variables";
-  let constructor = "mlir::createParallelLoopCollapsingPass()";
-  let options = [
-    ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
-               "Which loop indices to combine 0th loop index",
-               "llvm::cl::MiscFlags::CommaSeparated">,
-    ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned",
-               "Which loop indices to combine into the position 1 loop index",
-               "llvm::cl::MiscFlags::CommaSeparated">,
-    ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned",
-               "Which loop indices to combine into the position 2 loop index",
-               "llvm::cl::MiscFlags::CommaSeparated">,
-  ];
-}
-
 def PrintOpStats : Pass<"print-op-stats"> {
   let summary = "Print statistics of operations";
   let constructor = "mlir::createPrintOpStatsPass()";
diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h
deleted file mode 100644
index 5efbb19b08d25..0000000000000
--- a/mlir/include/mlir/Transforms/Utils.h
+++ /dev/null
@@ -1,200 +0,0 @@
-//===- Utils.h - General transformation utilities ---------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This header file defines prototypes for various transformation utilities for
-// memref's and non-loop IR structures. These are not passes by themselves but
-// are used either by passes, optimization sequences, or in turn by other
-// transformation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_UTILS_H
-#define MLIR_TRANSFORMS_UTILS_H
-
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/AffineMap.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-
-namespace mlir {
-
-class AffineApplyOp;
-class AffineForOp;
-class DominanceInfo;
-class Location;
-class OpBuilder;
-
-namespace memref {
-class AllocOp;
-} // namespace memref
-
-/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
-/// optionally remapping the old memref's indices using the supplied affine map,
-/// `indexRemap`. The new memref could be of a different shape or rank.
-/// `extraIndices` provides any additional access indices to be added to the
-/// start.
-///
-/// `indexRemap` remaps indices of the old memref access to a new set of indices
-/// that are used to index the memref. Additional input operands to indexRemap
-/// can be optionally provided in `extraOperands`, and they occupy the start
-/// of its input list. `indexRemap`'s dimensional inputs are expected to
-/// correspond to memref's indices, and its symbolic inputs if any should be
-/// provided in `symbolOperands`.
-///
-/// `domOpFilter`, if non-null, restricts the replacement to only those
-/// operations that are dominated by the former; similarly, `postDomOpFilter`
-/// restricts replacement to only those operations that are postdominated by it.
-///
-/// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing
-/// uses of a memref without any requirement for access index rewrites as long
-/// as the user operation has the MemRefsNormalizable trait. The default value
-/// of this flag is false.
-///
-/// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to
-/// also be a candidate for replacement. The default value of this flag is
-/// false.
-///
-/// Returns true on success and false if the replacement is not possible,
-/// whenever a memref is used as an operand in a non-dereferencing context and
-/// 'allowNonDereferencingOps' is false, except for dealloc's on the memref
-/// which are left untouched. See comments at function definition for an
-/// example.
-//
-//  Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]:
-//  The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and
-//  index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0,
-//  d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any
-//  extra operands, note that 'indexRemap' would just be applied to existing
-//  indices (%i, %j).
-//  TODO: allow extraIndices to be added at any position.
-LogicalResult replaceAllMemRefUsesWith(
-    Value oldMemRef, Value newMemRef, ArrayRef extraIndices = {},
-    AffineMap indexRemap = AffineMap(), ArrayRef extraOperands = {},
-    ArrayRef symbolOperands = {}, Operation *domOpFilter = nullptr,
-    Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false,
-    bool replaceInDeallocOp = false);
-
-/// Performs the same replacement as the other version above but only for the
-/// dereferencing uses of `oldMemRef` in `op`, except in cases where
-/// 'allowNonDereferencingOps' is set to true where we replace the
-/// non-dereferencing uses as well.
-LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
-                                       Operation *op,
-                                       ArrayRef extraIndices = {},
-                                       AffineMap indexRemap = AffineMap(),
-                                       ArrayRef extraOperands = {},
-                                       ArrayRef symbolOperands = {},
-                                       bool allowNonDereferencingOps = false);
-
-/// Rewrites the memref defined by this alloc op to have an identity layout map
-/// and updates all its indexing uses. Returns failure if any of its uses
-/// escape (while leaving the IR in a valid state).
-LogicalResult normalizeMemRef(memref::AllocOp *op);
-
-/// Uses the old memref type map layout and computes the new memref type to have
-/// a new shape and a layout map, where the old layout map has been normalized
-/// to an identity layout map. It returns the old memref in case no
-/// normalization was needed or a failure occurs while transforming the old map
-/// layout to an identity layout map.
-MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder,
-                               unsigned numSymbolicOperands);
-
-/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
-/// its results equal to the number of operands, as a composition
-/// of all other AffineApplyOps reachable from input parameter 'operands'. If
-/// different operands were drawing results from multiple affine apply ops,
-/// these will also be collected into a single (multi-result) affine apply op.
-/// The final results of the composed AffineApplyOp are returned in output
-/// parameter 'results'. Returns the affine apply op created.
-Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
-                                       ArrayRef operands,
-                                       ArrayRef affineApplyOps,
-                                       SmallVectorImpl *results);
-
-/// Given an operation, inserts one or more single result affine apply
-/// operations, results of which are exclusively used by this operation.
-/// The operands of these newly created affine apply ops are
-/// guaranteed to be loop iterators or terminal symbols of a function.
-///
-/// Before
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   send %A[%idx], ...
-///   %v = "compute"(%idx, ...)
-///
-/// After
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   send %A[%idx], ...
-///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
-///   %v = "compute"(%idx_, ...)
-
-/// This allows the application of different transformations on send and
-/// compute (for eg. different shifts/delays)
-///
-/// Fills `sliceOps` with the list of affine.apply operations.
-/// In the following cases, `sliceOps` remains empty:
-///   1. If none of opInst's operands were the result of an affine.apply
-///      (i.e., there was no affine computation slice to create).
-///   2. If all the affine.apply op's supplying operands to this opInst did not
-///      have any uses other than those in this opInst.
-void createAffineComputationSlice(Operation *opInst,
-                                  SmallVectorImpl *sliceOps);
-
-/// Given a list of regions, perform control flow sinking on them. For each
-/// region, control-flow sinking moves operations that dominate the region but
-/// whose only users are in the region into the regions so that they aren't
-/// executed on paths where their results are not needed.
-///
-/// TODO: For the moment, this is a *simple* control-flow sink, i.e., no
-/// duplicating of ops. It should be made to accept a cost model to determine
-/// whether duplicating a particular op is profitable.
-///
-/// Example:
-///
-/// ```mlir
-/// %0 = arith.addi %arg0, %arg1
-/// scf.if %cond {
-///   scf.yield %0
-/// } else {
-///   scf.yield %arg2
-/// }
-/// ```
-///
-/// After control-flow sink:
-///
-/// ```mlir
-/// scf.if %cond {
-///   %0 = arith.addi %arg0, %arg1
-///   scf.yield %0
-/// } else {
-///   scf.yield %arg2
-/// }
-/// ```
-///
-/// Users must supply a callback `shouldMoveIntoRegion` that determines whether
-/// the given operation that only has users in the given operation should be
-/// moved into that region.
-///
-/// Returns the number of operations sunk.
-size_t
-controlFlowSink(ArrayRef regions, DominanceInfo &domInfo,
-                function_ref shouldMoveIntoRegion);
-
-/// Populates `regions` with regions of the provided region branch op that are
-/// executed at most once at that are reachable given the current operands of
-/// the op. These regions can be passed to `controlFlowSink` to perform sinking
-/// on the regions of the operation.
-void getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
-                                    SmallVectorImpl ®ions);
-
-} // namespace mlir
-
-#endif // MLIR_TRANSFORMS_UTILS_H
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index d2faff9d32389..99a8b83882970 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -27,7 +27,6 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/Sequence.h"
diff --git a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
index 31e4cca72e90b..27808a0a72c61 100644
--- a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
+++ b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
@@ -23,7 +23,6 @@
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 using namespace mlir::scf;
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index f914bad4c6eaf..97a3be46b7ee8 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -33,7 +33,6 @@
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 98968eb20d0a7..551c3ab5b2ccf 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -16,7 +16,6 @@
 
 #include "../PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
index f902909a7c3b2..587a0d2d67fa4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -22,12 +22,12 @@
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index 19c16cf9ce2a7..bd331525b01c0 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -17,13 +17,13 @@
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
index 2fed4921c2a4b..61bae249bff98 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
@@ -18,10 +18,10 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/Affine/Passes.h.inc"
 #include "mlir/Dialect/Affine/Utils.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/Support/Debug.h"
 #include 
 
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index 784c5d04f1a44..a99bb5789dac4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -4,9 +4,12 @@ add_mlir_dialect_library(MLIRAffineTransforms
   AffineLoopNormalize.cpp
   AffineParallelize.cpp
   AffineScalarReplacement.cpp
+  LoopCoalescing.cpp
+  LoopFusion.cpp
   LoopTiling.cpp
   LoopUnroll.cpp
   LoopUnrollAndJam.cpp
+  PipelineDataTransfer.cpp
   SuperVectorize.cpp
   SimplifyAffineStructures.cpp
 
diff --git a/mlir/lib/Transforms/LoopCoalescing.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp
similarity index 97%
rename from mlir/lib/Transforms/LoopCoalescing.cpp
rename to mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp
index 4b6780398103a..75f57c0d947da 100644
--- a/mlir/lib/Transforms/LoopCoalescing.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp
@@ -8,9 +8,10 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
similarity index 99%
rename from mlir/lib/Transforms/LoopFusion.cpp
rename to mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
index 3ab98e0ee20cb..588dc63fbbff0 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
@@ -16,14 +16,14 @@
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopFusionUtils.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
index f15571660b4d8..a66fcdae2ecd4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
@@ -17,11 +17,11 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 using namespace mlir;
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index fb7cd11ebe6da..702319f8ffd38 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -12,11 +12,11 @@
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
index b763017d0837f..273ffdb579d4b 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
@@ -37,12 +37,12 @@
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/CommandLine.h"
 
diff --git a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
index c3d55a60e5634..a7262d76c88d3 100644
--- a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
@@ -16,6 +17,10 @@ namespace mlir {
 template 
 void registerDialect(DialectRegistry ®istry);
 
+namespace arith {
+class ArithmeticDialect;
+} // namespace arith
+
 namespace linalg {
 class LinalgDialect;
 } // namespace linalg
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
similarity index 99%
rename from mlir/lib/Transforms/PipelineDataTransfer.cpp
rename to mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
index 87ac14e770249..7429ecef39f38 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
@@ -11,17 +11,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
-#include "mlir/Transforms/Passes.h"
-
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Utils.h"
+#include "mlir/Transforms/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
index 08ccac678113e..d90b02c6f14c4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
@@ -14,9 +14,9 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/Utils.h"
 
 #define DEBUG_TYPE "simplify-affine-structure"
 
diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index 3bc37cfa3ba23..4393e2971a031 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -1,4 +1,6 @@
 add_mlir_dialect_library(MLIRAffineUtils
+  LoopFusionUtils.cpp
+  LoopUtils.cpp
   Utils.cpp
 
   ADDITIONAL_HEADER_DIRS
@@ -7,5 +9,6 @@ add_mlir_dialect_library(MLIRAffineUtils
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRAnalysis
+  MLIRMemRef
   MLIRTransformUtils
   )
diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
similarity index 99%
rename from mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
rename to mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
index 54adec2c09b1a..dcaa88a6dca19 100644
--- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
@@ -10,21 +10,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Transforms/LoopFusionUtils.h"
-
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Operation.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
similarity index 81%
rename from mlir/lib/Transforms/Utils/LoopUtils.cpp
rename to mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index b9352ac2c2b75..f0e784aeb81dc 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -10,14 +10,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Transforms/LoopUtils.h"
-
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -25,7 +25,6 @@
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/RegionUtils.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
@@ -108,42 +107,9 @@ getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
     lb.erase();
 }
 
-// Build the IR that performs ceil division of a positive value by a constant:
-//    ceildiv(a, B) = divis(a + (B-1), B)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
-                             int64_t divisor) {
-  assert(divisor > 0 && "expected positive divisor");
-  assert(dividend.getType().isIndex() && "expected index-typed value");
-
-  Value divisorMinusOneCst =
-      builder.create(loc, divisor - 1);
-  Value divisorCst = builder.create(loc, divisor);
-  Value sum = builder.create(loc, dividend, divisorMinusOneCst);
-  return builder.create(loc, sum, divisorCst);
-}
-
-// Build the IR that performs ceil division of a positive value by another
-// positive value:
-//    ceildiv(a, b) = divis(a + (b - 1), b)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
-                             Value divisor) {
-  assert(dividend.getType().isIndex() && "expected index-typed value");
-
-  Value cstOne = builder.create(loc, 1);
-  Value divisorMinusOne = builder.create(loc, divisor, cstOne);
-  Value sum = builder.create(loc, dividend, divisorMinusOne);
-  return builder.create(loc, sum, divisor);
-}
-
 /// Helper to replace uses of loop carried values (iter_args) and loop
-/// yield values while promoting single iteration affine.for and scf.for ops.
-template 
-static void replaceIterArgsAndYieldResults(AffineOrSCFForOp forOp) {
-  static_assert(
-      llvm::is_one_of::value,
-      "only for affine.for and scf.for ops");
+/// yield values while promoting single iteration affine.for ops.
+static void replaceIterArgsAndYieldResults(AffineForOp forOp) {
   // Replace uses of iter arguments with iter operands (initial values).
   auto iterOperands = forOp.getIterOperands();
   auto iterArgs = forOp.getRegionIterArgs();
@@ -203,46 +169,6 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
   return success();
 }
 
-/// Promotes the loop body of a forOp to its containing block if the forOp
-/// it can be determined that the loop has a single iteration.
-LogicalResult mlir::promoteIfSingleIteration(scf::ForOp forOp) {
-  auto lbCstOp = forOp.getLowerBound().getDefiningOp();
-  auto ubCstOp = forOp.getUpperBound().getDefiningOp();
-  auto stepCstOp = forOp.getStep().getDefiningOp();
-  if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.value() < 0 ||
-      ubCstOp.value() < 0 || stepCstOp.value() < 0)
-    return failure();
-  int64_t tripCount =
-      mlir::ceilDiv(ubCstOp.value() - lbCstOp.value(), stepCstOp.value());
-  if (tripCount != 1)
-    return failure();
-  auto iv = forOp.getInductionVar();
-  iv.replaceAllUsesWith(lbCstOp);
-
-  replaceIterArgsAndYieldResults(forOp);
-
-  // Move the loop body operations, except for its terminator, to the loop's
-  // containing block.
-  auto *parentBlock = forOp->getBlock();
-  forOp.getBody()->getTerminator()->erase();
-  parentBlock->getOperations().splice(Block::iterator(forOp),
-                                      forOp.getBody()->getOperations());
-  forOp.erase();
-  return success();
-}
-
-/// Promotes all single iteration 'for' ops in `f`, i.e., moves
-/// their body into the containing Block.
-void mlir::promoteSingleIterationLoops(FuncOp f) {
-  // Gathers all innermost loops through a post order pruned walk.
-  f.walk([](Operation *op) {
-    if (auto forOp = dyn_cast(op))
-      (void)promoteIfSingleIteration(forOp);
-    else if (auto forOp = dyn_cast(op))
-      (void)promoteIfSingleIteration(forOp);
-  });
-}
-
 /// Generates an affine.for op with the specified lower and upper bounds
 /// while generating the right IV remappings to realize shifts for operations in
 /// its body. The operations that go into the loop body are specified in
@@ -1011,38 +937,22 @@ mlir::tilePerfectlyNestedParametric(MutableArrayRef input,
   return success();
 }
 
-/// Collect perfectly nested loops starting from `rootForOps`.  Loops are
-/// perfectly nested if each loop is the first and only non-terminator operation
-/// in the parent loop.  Collect at most `maxLoops` loops and append them to
-/// `forOps`.
-template 
-static void getPerfectlyNestedLoopsImpl(
-    SmallVectorImpl &forOps, T rootForOp,
-    unsigned maxLoops = std::numeric_limits::max()) {
-  for (unsigned i = 0; i < maxLoops; ++i) {
-    forOps.push_back(rootForOp);
-    Block &body = rootForOp.getRegion().front();
-    if (body.begin() != std::prev(body.end(), 2))
-      return;
-
-    rootForOp = dyn_cast(&body.front());
-    if (!rootForOp)
-      return;
-  }
-}
-
 /// Get perfectly nested sequence of loops starting at root of loop nest
 /// (the first op being another AffineFor, and the second op - a terminator).
 /// A loop is perfectly nested iff: the first op in the loop's body is another
 /// AffineForOp, and the second op is a terminator).
 void mlir::getPerfectlyNestedLoops(SmallVectorImpl &nestedLoops,
                                    AffineForOp root) {
-  getPerfectlyNestedLoopsImpl(nestedLoops, root);
-}
+  for (unsigned i = 0; i < std::numeric_limits::max(); ++i) {
+    nestedLoops.push_back(root);
+    Block &body = root.getRegion().front();
+    if (body.begin() != std::prev(body.end(), 2))
+      return;
 
-void mlir::getPerfectlyNestedLoops(SmallVectorImpl &nestedLoops,
-                                   scf::ForOp root) {
-  getPerfectlyNestedLoopsImpl(nestedLoops, root);
+    root = dyn_cast(&body.front());
+    if (!root)
+      return;
+  }
 }
 
 /// Identify valid and profitable bands of loops to tile. This is currently just
@@ -1084,10 +994,10 @@ LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
   return loopUnrollByFactor(forOp, unrollFactor);
 }
 
-/// Generates unrolled copies of AffineForOp or scf::ForOp 'loopBodyBlock', with
-/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
-/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
-/// unrolled iteration using annotateFn.
+/// Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated
+/// 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap 'forOpIV' for each
+/// unrolled body. If specified, annotates the Ops in each unrolled iteration
+/// using annotateFn.
 static void generateUnrolledLoop(
     Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
     function_ref ivRemapFn,
@@ -1237,127 +1147,6 @@ LogicalResult mlir::loopUnrollByFactor(
   return success();
 }
 
-/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
-LogicalResult mlir::loopUnrollByFactor(
-    scf::ForOp forOp, uint64_t unrollFactor,
-    function_ref annotateFn) {
-  assert(unrollFactor > 0 && "expected positive unroll factor");
-
-  // Return if the loop body is empty.
-  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
-    return success();
-
-  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
-  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
-  OpBuilder boundsBuilder(forOp);
-  auto loc = forOp.getLoc();
-  auto step = forOp.getStep();
-  Value upperBoundUnrolled;
-  Value stepUnrolled;
-  bool generateEpilogueLoop = true;
-
-  auto lbCstOp = forOp.getLowerBound().getDefiningOp();
-  auto ubCstOp = forOp.getUpperBound().getDefiningOp();
-  auto stepCstOp = forOp.getStep().getDefiningOp();
-  if (lbCstOp && ubCstOp && stepCstOp) {
-    // Constant loop bounds computation.
-    int64_t lbCst = lbCstOp.value();
-    int64_t ubCst = ubCstOp.value();
-    int64_t stepCst = stepCstOp.value();
-    assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
-           "expected positive loop bounds and step");
-    int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
-
-    if (unrollFactor == 1) {
-      if (tripCount == 1 && failed(promoteIfSingleIteration(forOp)))
-        return failure();
-      return success();
-    }
-
-    int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
-    int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
-    assert(upperBoundUnrolledCst <= ubCst);
-    int64_t stepUnrolledCst = stepCst * unrollFactor;
-
-    // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
-    generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
-    if (generateEpilogueLoop)
-      upperBoundUnrolled = boundsBuilder.create(
-          loc, upperBoundUnrolledCst);
-    else
-      upperBoundUnrolled = ubCstOp;
-
-    // Create constant for 'stepUnrolled'.
-    stepUnrolled = stepCst == stepUnrolledCst
-                       ? step
-                       : boundsBuilder.create(
-                             loc, stepUnrolledCst);
-  } else {
-    // Dynamic loop bounds computation.
-    // TODO: Add dynamic asserts for negative lb/ub/step, or
-    // consider using ceilDiv from AffineApplyExpander.
-    auto lowerBound = forOp.getLowerBound();
-    auto upperBound = forOp.getUpperBound();
-    Value diff =
-        boundsBuilder.create(loc, upperBound, lowerBound);
-    Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
-    Value unrollFactorCst =
-        boundsBuilder.create(loc, unrollFactor);
-    Value tripCountRem =
-        boundsBuilder.create(loc, tripCount, unrollFactorCst);
-    // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
-    Value tripCountEvenMultiple =
-        boundsBuilder.create(loc, tripCount, tripCountRem);
-    // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
-    upperBoundUnrolled = boundsBuilder.create(
-        loc, lowerBound,
-        boundsBuilder.create(loc, tripCountEvenMultiple, step));
-    // Scale 'step' by 'unrollFactor'.
-    stepUnrolled =
-        boundsBuilder.create(loc, step, unrollFactorCst);
-  }
-
-  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
-  if (generateEpilogueLoop) {
-    OpBuilder epilogueBuilder(forOp->getContext());
-    epilogueBuilder.setInsertionPoint(forOp->getBlock(),
-                                      std::next(Block::iterator(forOp)));
-    auto epilogueForOp = cast(epilogueBuilder.clone(*forOp));
-    epilogueForOp.setLowerBound(upperBoundUnrolled);
-
-    // Update uses of loop results.
-    auto results = forOp.getResults();
-    auto epilogueResults = epilogueForOp.getResults();
-    auto epilogueIterOperands = epilogueForOp.getIterOperands();
-
-    for (auto e : llvm::zip(results, epilogueResults, epilogueIterOperands)) {
-      std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
-      epilogueForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
-    }
-    (void)promoteIfSingleIteration(epilogueForOp);
-  }
-
-  // Create unrolled loop.
-  forOp.setUpperBound(upperBoundUnrolled);
-  forOp.setStep(stepUnrolled);
-
-  auto iterArgs = ValueRange(forOp.getRegionIterArgs());
-  auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
-
-  generateUnrolledLoop(
-      forOp.getBody(), forOp.getInductionVar(), unrollFactor,
-      [&](unsigned i, Value iv, OpBuilder b) {
-        // iv' = iv + step * i;
-        auto stride = b.create(
-            loc, step, b.create(loc, i));
-        return b.create(loc, iv, stride);
-      },
-      annotateFn, iterArgs, yieldedValues);
-  // Promote the loop body up if this has turned into a single iteration loop.
-  (void)promoteIfSingleIteration(forOp);
-  return success();
-}
-
 LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
                                             uint64_t unrollJamFactor) {
   Optional mayBeConstantTripCount = getConstantTripCount(forOp);
@@ -1888,61 +1677,25 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
   return innerLoops;
 }
 
-static Loops stripmineSink(scf::ForOp forOp, Value factor,
-                           ArrayRef targets) {
-  auto originalStep = forOp.getStep();
-  auto iv = forOp.getInductionVar();
-
-  OpBuilder b(forOp);
-  forOp.setStep(b.create(forOp.getLoc(), originalStep, factor));
-
-  Loops innerLoops;
-  for (auto t : targets) {
-    // Save information for splicing ops out of t when done
-    auto begin = t.getBody()->begin();
-    auto nOps = t.getBody()->getOperations().size();
-
-    // Insert newForOp before the terminator of `t`.
-    auto b = OpBuilder::atBlockTerminator((t.getBody()));
-    Value stepped = b.create(t.getLoc(), iv, forOp.getStep());
-    Value less = b.create(t.getLoc(), arith::CmpIPredicate::slt,
-                                         forOp.getUpperBound(), stepped);
-    Value ub =
-        b.create(t.getLoc(), less, forOp.getUpperBound(), stepped);
-
-    // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
-    auto newForOp = b.create(t.getLoc(), iv, ub, originalStep);
-    newForOp.getBody()->getOperations().splice(
-        newForOp.getBody()->getOperations().begin(),
-        t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
-    replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
-                               newForOp.getRegion());
-
-    innerLoops.push_back(newForOp);
-  }
-
-  return innerLoops;
-}
-
 // Stripmines a `forOp` by `factor` and sinks it under a single `target`.
 // Returns the new AffineForOps, nested immediately under `target`.
-template 
-static ForType stripmineSink(ForType forOp, SizeType factor, ForType target) {
+template 
+static AffineForOp stripmineSink(AffineForOp forOp, SizeType factor,
+                                 AffineForOp target) {
   // TODO: Use cheap structural assertions that targets are nested under
   // forOp and that targets are not nested under each other when DominanceInfo
   // exposes the capability. It seems overkill to construct a whole function
   // dominance tree at this point.
-  auto res = stripmineSink(forOp, factor, ArrayRef{target});
+  auto res = stripmineSink(forOp, factor, ArrayRef(target));
   assert(res.size() == 1 && "Expected 1 inner forOp");
   return res[0];
 }
 
-template 
-static SmallVector, 8>
-tileImpl(ArrayRef forOps, ArrayRef sizes,
-         ArrayRef targets) {
-  SmallVector, 8> res;
-  SmallVector currentTargets(targets.begin(), targets.end());
+SmallVector, 8>
+mlir::tile(ArrayRef forOps, ArrayRef sizes,
+           ArrayRef targets) {
+  SmallVector, 8> res;
+  SmallVector currentTargets(targets.begin(), targets.end());
   for (auto it : llvm::zip(forOps, sizes)) {
     auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
     res.push_back(step);
@@ -1951,286 +1704,15 @@ tileImpl(ArrayRef forOps, ArrayRef sizes,
   return res;
 }
 
-SmallVector, 8>
-mlir::tile(ArrayRef forOps, ArrayRef sizes,
-           ArrayRef targets) {
-  return tileImpl(forOps, sizes, targets);
-}
-
-SmallVector mlir::tile(ArrayRef forOps,
-                                 ArrayRef sizes,
-                                 ArrayRef targets) {
-  return tileImpl(forOps, sizes, targets);
-}
-
-template 
-static SmallVector
-tileImpl(ArrayRef forOps, ArrayRef sizes, ForType target) {
-  SmallVector res;
-  for (auto loops : tile(forOps, sizes, ArrayRef{target})) {
-    assert(loops.size() == 1);
-    res.push_back(loops[0]);
-  }
-  return res;
-}
-
 SmallVector mlir::tile(ArrayRef forOps,
                                        ArrayRef sizes,
                                        AffineForOp target) {
-  return tileImpl(forOps, sizes, target);
-}
-
-Loops mlir::tile(ArrayRef forOps, ArrayRef sizes,
-                 scf::ForOp target) {
-  return tileImpl(forOps, sizes, target);
-}
-
-Loops mlir::tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef sizes) {
-  // Collect perfectly nested loops.  If more size values provided than nested
-  // loops available, truncate `sizes`.
-  SmallVector forOps;
-  forOps.reserve(sizes.size());
-  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
-  if (forOps.size() < sizes.size())
-    sizes = sizes.take_front(forOps.size());
-
-  return ::tile(forOps, sizes, forOps.back());
-}
-
-// Hoist the ops within `outer` that appear before `inner`.
-// Such ops include the ops that have been introduced by parametric tiling.
-// Ops that come from triangular loops (i.e. that belong to the program slice
-// rooted at `outer`) and ops that have side effects cannot be hoisted.
-// Return failure when any op fails to hoist.
-static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
-  SetVector forwardSlice;
-  getForwardSlice(
-      outer.getInductionVar(), &forwardSlice,
-      [&inner](Operation *op) { return op != inner.getOperation(); });
-  LogicalResult status = success();
-  SmallVector toHoist;
-  for (auto &op : outer.getBody()->without_terminator()) {
-    // Stop when encountering the inner loop.
-    if (&op == inner.getOperation())
-      break;
-    // Skip over non-hoistable ops.
-    if (forwardSlice.count(&op) > 0) {
-      status = failure();
-      continue;
-    }
-    // Skip intermediate scf::ForOp, these are not considered a failure.
-    if (isa(op))
-      continue;
-    // Skip other ops with regions.
-    if (op.getNumRegions() > 0) {
-      status = failure();
-      continue;
-    }
-    // Skip if op has side effects.
-    // TODO: loads to immutable memory regions are ok.
-    if (!MemoryEffectOpInterface::hasNoEffect(&op)) {
-      status = failure();
-      continue;
-    }
-    toHoist.push_back(&op);
-  }
-  auto *outerForOp = outer.getOperation();
-  for (auto *op : toHoist)
-    op->moveBefore(outerForOp);
-  return status;
-}
-
-// Traverse the interTile and intraTile loops and try to hoist ops such that
-// bands of perfectly nested loops are isolated.
-// Return failure if either perfect interTile or perfect intraTile bands cannot
-// be formed.
-static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
-  LogicalResult status = success();
-  const Loops &interTile = tileLoops.first;
-  const Loops &intraTile = tileLoops.second;
-  auto size = interTile.size();
-  assert(size == intraTile.size());
-  if (size <= 1)
-    return success();
-  for (unsigned s = 1; s < size; ++s)
-    status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
-                               : failure();
-  for (unsigned s = 1; s < size; ++s)
-    status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
-                               : failure();
-  return status;
-}
-
-TileLoops mlir::extractFixedOuterLoops(scf::ForOp rootForOp,
-                                       ArrayRef sizes) {
-  // Collect perfectly nested loops.  If more size values provided than nested
-  // loops available, truncate `sizes`.
-  SmallVector forOps;
-  forOps.reserve(sizes.size());
-  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
-  if (forOps.size() < sizes.size())
-    sizes = sizes.take_front(forOps.size());
-
-  // Compute the tile sizes such that i-th outer loop executes size[i]
-  // iterations.  Given that the loop current executes
-  //   numIterations = ceildiv((upperBound - lowerBound), step)
-  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
-  SmallVector tileSizes;
-  tileSizes.reserve(sizes.size());
-  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
-    assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
-
-    auto forOp = forOps[i];
-    OpBuilder builder(forOp);
-    auto loc = forOp.getLoc();
-    Value diff = builder.create(loc, forOp.getUpperBound(),
-                                               forOp.getLowerBound());
-    Value numIterations = ceilDivPositive(builder, loc, diff, forOp.getStep());
-    Value iterationsPerBlock =
-        ceilDivPositive(builder, loc, numIterations, sizes[i]);
-    tileSizes.push_back(iterationsPerBlock);
-  }
-
-  // Call parametric tiling with the given sizes.
-  auto intraTile = tile(forOps, tileSizes, forOps.back());
-  TileLoops tileLoops = std::make_pair(forOps, intraTile);
-
-  // TODO: for now we just ignore the result of band isolation.
-  // In the future, mapping decisions may be impacted by the ability to
-  // isolate perfectly nested bands.
-  (void)tryIsolateBands(tileLoops);
-
-  return tileLoops;
-}
-
-/// Return the new lower bound, upper bound, and step in that order. Insert any
-/// additional bounds calculations before the given builder and any additional
-/// conversion back to the original loop induction value inside the given Block.
-static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
-                                OpBuilder &insideLoopBuilder, Location loc,
-                                Value lowerBound, Value upperBound, Value step,
-                                Value inductionVar) {
-  // Check if the loop is already known to have a constant zero lower bound or
-  // a constant one step.
-  bool isZeroBased = false;
-  if (auto ubCst = lowerBound.getDefiningOp())
-    isZeroBased = ubCst.value() == 0;
-
-  bool isStepOne = false;
-  if (auto stepCst = step.getDefiningOp())
-    isStepOne = stepCst.value() == 1;
-
-  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
-  // assuming the step is strictly positive.  Update the bounds and the step
-  // of the loop to go from 0 to the number of iterations, if necessary.
-  // TODO: introduce support for negative steps or emit dynamic asserts
-  // on step positivity, whatever gets implemented first.
-  if (isZeroBased && isStepOne)
-    return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound,
-            /*step=*/step};
-
-  Value diff = boundsBuilder.create(loc, upperBound, lowerBound);
-  Value newUpperBound = ceilDivPositive(boundsBuilder, loc, diff, step);
-
-  Value newLowerBound =
-      isZeroBased ? lowerBound
-                  : boundsBuilder.create(loc, 0);
-  Value newStep =
-      isStepOne ? step : boundsBuilder.create(loc, 1);
-
-  // Insert code computing the value of the original loop induction variable
-  // from the "normalized" one.
-  Value scaled =
-      isStepOne
-          ? inductionVar
-          : insideLoopBuilder.create(loc, inductionVar, step);
-  Value shifted =
-      isZeroBased
-          ? scaled
-          : insideLoopBuilder.create(loc, scaled, lowerBound);
-
-  SmallPtrSet preserve{scaled.getDefiningOp(),
-                                       shifted.getDefiningOp()};
-  inductionVar.replaceAllUsesExcept(shifted, preserve);
-  return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound,
-          /*step=*/newStep};
-}
-
-/// Transform a loop with a strictly positive step
-///   for %i = %lb to %ub step %s
-/// into a 0-based loop with step 1
-///   for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
-///     %i = %ii * %s + %lb
-/// Insert the induction variable remapping in the body of `inner`, which is
-/// expected to be either `loop` or another loop perfectly nested under `loop`.
-/// Insert the definition of new bounds immediate before `outer`, which is
-/// expected to be either `loop` or its parent in the loop nest.
-static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
-  OpBuilder builder(outer);
-  OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody());
-  auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(),
-                                  loop.getLowerBound(), loop.getUpperBound(),
-                                  loop.getStep(), loop.getInductionVar());
-
-  loop.setLowerBound(loopPieces.lowerBound);
-  loop.setUpperBound(loopPieces.upperBound);
-  loop.setStep(loopPieces.step);
-}
-
-void mlir::coalesceLoops(MutableArrayRef loops) {
-  if (loops.size() < 2)
-    return;
-
-  scf::ForOp innermost = loops.back();
-  scf::ForOp outermost = loops.front();
-
-  // 1. Make sure all loops iterate from 0 to upperBound with step 1.  This
-  // allows the following code to assume upperBound is the number of iterations.
-  for (auto loop : loops)
-    normalizeLoop(loop, outermost, innermost);
-
-  // 2. Emit code computing the upper bound of the coalesced loop as product
-  // of the number of iterations of all loops.
-  OpBuilder builder(outermost);
-  Location loc = outermost.getLoc();
-  Value upperBound = outermost.getUpperBound();
-  for (auto loop : loops.drop_front())
-    upperBound =
-        builder.create(loc, upperBound, loop.getUpperBound());
-  outermost.setUpperBound(upperBound);
-
-  builder.setInsertionPointToStart(outermost.getBody());
-
-  // 3. Remap induction variables. For each original loop, the value of the
-  // induction variable can be obtained by dividing the induction variable of
-  // the linearized loop by the total number of iterations of the loops nested
-  // in it modulo the number of iterations in this loop (remove the values
-  // related to the outer loops):
-  //   iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
-  // Compute these iteratively from the innermost loop by creating a "running
-  // quotient" of division by the range.
-  Value previous = outermost.getInductionVar();
-  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
-    unsigned idx = loops.size() - i - 1;
-    if (i != 0)
-      previous = builder.create(loc, previous,
-                                                loops[idx + 1].getUpperBound());
-
-    Value iv = (i == e - 1) ? previous
-                            : builder.create(
-                                  loc, previous, loops[idx].getUpperBound());
-    replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
-                               loops.back().getRegion());
+  SmallVector res;
+  for (auto loops : tile(forOps, sizes, ArrayRef(target))) {
+    assert(loops.size() == 1);
+    res.push_back(loops[0]);
   }
-
-  // 4. Move the operations from the innermost just above the second-outermost
-  // loop, delete the extra terminator and the second-outermost loop.
-  scf::ForOp second = loops[1];
-  innermost.getBody()->back().erase();
-  outermost.getBody()->getOperations().splice(
-      Block::iterator(second.getOperation()),
-      innermost.getBody()->getOperations());
-  second.erase();
+  return res;
 }
 
 LogicalResult mlir::coalesceLoops(MutableArrayRef loops) {
@@ -2347,89 +1829,6 @@ LogicalResult mlir::coalesceLoops(MutableArrayRef loops) {
   return success();
 }
 
-void mlir::collapseParallelLoops(
-    scf::ParallelOp loops, ArrayRef> combinedDimensions) {
-  OpBuilder outsideBuilder(loops);
-  Location loc = loops.getLoc();
-
-  // Presort combined dimensions.
-  auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
-  for (auto &dims : sortedDimensions)
-    std::sort(dims.begin(), dims.end());
-
-  // Normalize ParallelOp's iteration pattern.
-  SmallVector normalizedLowerBounds, normalizedSteps,
-      normalizedUpperBounds;
-  for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
-    OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody());
-    auto resultBounds =
-        normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
-                      loops.getLowerBound()[i], loops.getUpperBound()[i],
-                      loops.getStep()[i], loops.getBody()->getArgument(i));
-
-    normalizedLowerBounds.push_back(resultBounds.lowerBound);
-    normalizedUpperBounds.push_back(resultBounds.upperBound);
-    normalizedSteps.push_back(resultBounds.step);
-  }
-
-  // Combine iteration spaces.
-  SmallVector lowerBounds, upperBounds, steps;
-  auto cst0 = outsideBuilder.create(loc, 0);
-  auto cst1 = outsideBuilder.create(loc, 1);
-  for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
-    Value newUpperBound = outsideBuilder.create(loc, 1);
-    for (auto idx : sortedDimensions[i]) {
-      newUpperBound = outsideBuilder.create(
-          loc, newUpperBound, normalizedUpperBounds[idx]);
-    }
-    lowerBounds.push_back(cst0);
-    steps.push_back(cst1);
-    upperBounds.push_back(newUpperBound);
-  }
-
-  // Create new ParallelLoop with conversions to the original induction values.
-  // The loop below uses divisions to get the relevant range of values in the
-  // new induction value that represent each range of the original induction
-  // value. The remainders then determine based on that range, which iteration
-  // of the original induction value this represents. This is a normalized value
-  // that is un-normalized already by the previous logic.
-  auto newPloop = outsideBuilder.create(
-      loc, lowerBounds, upperBounds, steps,
-      [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) {
-        for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
-          Value previous = ploopIVs[i];
-          unsigned numberCombinedDimensions = combinedDimensions[i].size();
-          // Iterate over all except the last induction value.
-          for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
-            unsigned idx = combinedDimensions[i][j];
-
-            // Determine the current induction value's current loop iteration
-            Value iv = insideBuilder.create(
-                loc, previous, normalizedUpperBounds[idx]);
-            replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
-                                       loops.getRegion());
-
-            // Remove the effect of the current induction value to prepare for
-            // the next value.
-            previous = insideBuilder.create(
-                loc, previous, normalizedUpperBounds[idx]);
-          }
-
-          // The final induction value is just the remaining value.
-          unsigned idx = combinedDimensions[i][0];
-          replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx),
-                                     previous, loops.getRegion());
-        }
-      });
-
-  // Replace the old loop with the new loop.
-  loops.getBody()->back().erase();
-  newPloop.getBody()->getOperations().splice(
-      Block::iterator(newPloop.getBody()->back()),
-      loops.getBody()->getOperations());
-  loops.erase();
-}
-
 void mlir::mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef processorId,
                                  ArrayRef numProcessors) {
   assert(processorId.size() == numProcessors.size());
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 941ff3c88778c..1e3a38f51cb61 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -16,12 +16,14 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
+
+#define DEBUG_TYPE "affine-utils"
 
 using namespace mlir;
 
@@ -856,3 +858,740 @@ void mlir::affineScalarReplace(FuncOp f, DominanceInfo &domInfo,
     defOp->erase();
   }
 }
+
+// Perform the replacement in `op`.
+LogicalResult mlir::replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
+                                             Operation *op,
+                                             ArrayRef extraIndices,
+                                             AffineMap indexRemap,
+                                             ArrayRef extraOperands,
+                                             ArrayRef symbolOperands,
+                                             bool allowNonDereferencingOps) {
+  unsigned newMemRefRank = newMemRef.getType().cast().getRank();
+  (void)newMemRefRank; // unused in opt mode
+  unsigned oldMemRefRank = oldMemRef.getType().cast().getRank();
+  (void)oldMemRefRank; // unused in opt mode
+  if (indexRemap) {
+    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
+           "symbolic operand count mismatch");
+    assert(indexRemap.getNumInputs() ==
+           extraOperands.size() + oldMemRefRank + symbolOperands.size());
+    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
+  } else {
+    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
+  }
+
+  // Assert same elemental type.
+  assert(oldMemRef.getType().cast().getElementType() ==
+         newMemRef.getType().cast().getElementType());
+
+  SmallVector usePositions;
+  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
+    if (opEntry.value() == oldMemRef)
+      usePositions.push_back(opEntry.index());
+  }
+
+  // If memref doesn't appear, nothing to do.
+  if (usePositions.empty())
+    return success();
+
+  if (usePositions.size() > 1) {
+    // TODO: extend it for this case when needed (rare).
+    assert(false && "multiple dereferencing uses in a single op not supported");
+    return failure();
+  }
+
+  unsigned memRefOperandPos = usePositions.front();
+
+  OpBuilder builder(op);
+  // The following checks if op is dereferencing memref and performs the access
+  // index rewrites.
+  auto affMapAccInterface = dyn_cast(op);
+  if (!affMapAccInterface) {
+    if (!allowNonDereferencingOps) {
+      // Failure: memref used in a non-dereferencing context (potentially
+      // escapes); no replacement in these cases unless allowNonDereferencingOps
+      // is set.
+      return failure();
+    }
+    op->setOperand(memRefOperandPos, newMemRef);
+    return success();
+  }
+  // Perform index rewrites for the dereferencing op and then replace the op
+  NamedAttribute oldMapAttrPair =
+      affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
+  AffineMap oldMap = oldMapAttrPair.getValue().cast().getValue();
+  unsigned oldMapNumInputs = oldMap.getNumInputs();
+  SmallVector oldMapOperands(
+      op->operand_begin() + memRefOperandPos + 1,
+      op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
+
+  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
+  SmallVector oldMemRefOperands;
+  SmallVector affineApplyOps;
+  oldMemRefOperands.reserve(oldMemRefRank);
+  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
+    for (auto resultExpr : oldMap.getResults()) {
+      auto singleResMap = AffineMap::get(oldMap.getNumDims(),
+                                         oldMap.getNumSymbols(), resultExpr);
+      auto afOp = builder.create(op->getLoc(), singleResMap,
+                                                oldMapOperands);
+      oldMemRefOperands.push_back(afOp);
+      affineApplyOps.push_back(afOp);
+    }
+  } else {
+    oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
+  }
+
+  // Construct new indices as a remap of the old ones if a remapping has been
+  // provided. The indices of a memref come right after it, i.e.,
+  // at position memRefOperandPos + 1.
+  SmallVector remapOperands;
+  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
+                        symbolOperands.size());
+  remapOperands.append(extraOperands.begin(), extraOperands.end());
+  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
+  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
+
+  SmallVector remapOutputs;
+  remapOutputs.reserve(oldMemRefRank);
+
+  if (indexRemap &&
+      indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
+    // Remapped indices.
+    for (auto resultExpr : indexRemap.getResults()) {
+      auto singleResMap = AffineMap::get(
+          indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
+      auto afOp = builder.create(op->getLoc(), singleResMap,
+                                                remapOperands);
+      remapOutputs.push_back(afOp);
+      affineApplyOps.push_back(afOp);
+    }
+  } else {
+    // No remapping specified.
+    remapOutputs.assign(remapOperands.begin(), remapOperands.end());
+  }
+
+  SmallVector newMapOperands;
+  newMapOperands.reserve(newMemRefRank);
+
+  // Prepend 'extraIndices' in 'newMapOperands'.
+  for (Value extraIndex : extraIndices) {
+    assert(extraIndex.getDefiningOp()->getNumResults() == 1 &&
+           "single result op's expected to generate these indices");
+    assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
+           "invalid memory op index");
+    newMapOperands.push_back(extraIndex);
+  }
+
+  // Append 'remapOutputs' to 'newMapOperands'.
+  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
+
+  // Create new fully composed AffineMap for new op to be created.
+  assert(newMapOperands.size() == newMemRefRank);
+  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
+  // TODO: Avoid creating/deleting temporary AffineApplyOps here.
+  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
+  newMap = simplifyAffineMap(newMap);
+  canonicalizeMapAndOperands(&newMap, &newMapOperands);
+  // Remove any affine.apply's that became dead as a result of composition.
+  for (Value value : affineApplyOps)
+    if (value.use_empty())
+      value.getDefiningOp()->erase();
+
+  OperationState state(op->getLoc(), op->getName());
+  // Construct the new operation using this memref.
+  state.operands.reserve(op->getNumOperands() + extraIndices.size());
+  // Insert the non-memref operands.
+  state.operands.append(op->operand_begin(),
+                        op->operand_begin() + memRefOperandPos);
+  // Insert the new memref value.
+  state.operands.push_back(newMemRef);
+
+  // Insert the new memref map operands.
+  state.operands.append(newMapOperands.begin(), newMapOperands.end());
+
+  // Insert the remaining operands unmodified.
+  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
+                            oldMapNumInputs,
+                        op->operand_end());
+
+  // Result types don't change. Both memref's are of the same elemental type.
+  state.types.reserve(op->getNumResults());
+  for (auto result : op->getResults())
+    state.types.push_back(result.getType());
+
+  // Add attribute for 'newMap', other Attributes do not change.
+  auto newMapAttr = AffineMapAttr::get(newMap);
+  for (auto namedAttr : op->getAttrs()) {
+    if (namedAttr.getName() == oldMapAttrPair.getName())
+      state.attributes.push_back({namedAttr.getName(), newMapAttr});
+    else
+      state.attributes.push_back(namedAttr);
+  }
+
+  // Create the new operation.
+  auto *repOp = builder.createOperation(state);
+  op->replaceAllUsesWith(repOp);
+  op->erase();
+
+  return success();
+}
+
+LogicalResult mlir::replaceAllMemRefUsesWith(
+    Value oldMemRef, Value newMemRef, ArrayRef extraIndices,
+    AffineMap indexRemap, ArrayRef extraOperands,
+    ArrayRef symbolOperands, Operation *domOpFilter,
+    Operation *postDomOpFilter, bool allowNonDereferencingOps,
+    bool replaceInDeallocOp) {
+  unsigned newMemRefRank = newMemRef.getType().cast().getRank();
+  (void)newMemRefRank; // unused in opt mode
+  unsigned oldMemRefRank = oldMemRef.getType().cast().getRank();
+  (void)oldMemRefRank;
+  if (indexRemap) {
+    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
+           "symbol operand count mismatch");
+    assert(indexRemap.getNumInputs() ==
+           extraOperands.size() + oldMemRefRank + symbolOperands.size());
+    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
+  } else {
+    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
+  }
+
+  // Assert same elemental type.
+  assert(oldMemRef.getType().cast().getElementType() ==
+         newMemRef.getType().cast().getElementType());
+
+  std::unique_ptr domInfo;
+  std::unique_ptr postDomInfo;
+  if (domOpFilter)
+    domInfo =
+        std::make_unique(domOpFilter->getParentOfType());
+
+  if (postDomOpFilter)
+    postDomInfo = std::make_unique(
+        postDomOpFilter->getParentOfType());
+
+  // Walk all uses of old memref; collect ops to perform replacement. We use a
+  // DenseSet since an operation could potentially have multiple uses of a
+  // memref (although rare), and the replacement later is going to erase ops.
+  DenseSet opsToReplace;
+  for (auto *op : oldMemRef.getUsers()) {
+    // Skip this use if it's not dominated by domOpFilter.
+    if (domOpFilter && !domInfo->dominates(domOpFilter, op))
+      continue;
+
+    // Skip this use if it's not post-dominated by postDomOpFilter.
+    if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
+      continue;
+
+    // Skip dealloc's - no replacement is necessary, and a memref replacement
+    // at other uses doesn't hurt these dealloc's.
+    if (isa(op) && !replaceInDeallocOp)
+      continue;
+
+    // Check if the memref was used in a non-dereferencing context. It is fine
+    // for the memref to be used in a non-dereferencing way outside of the
+    // region where this replacement is happening.
+    if (!isa(*op)) {
+      if (!allowNonDereferencingOps) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "Memref replacement failed: non-deferencing memref op: \n"
+                   << *op << '\n');
+        return failure();
+      }
+      // Non-dereferencing ops with the MemRefsNormalizable trait are
+      // supported for replacement.
+      if (!op->hasTrait()) {
+        LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
+                                   "memrefs normalizable trait: \n"
+                                << *op << '\n');
+        return failure();
+      }
+    }
+
+    // We'll first collect and then replace --- since replacement erases the op
+    // that has the use, and that op could be postDomFilter or domFilter itself!
+    opsToReplace.insert(op);
+  }
+
+  for (auto *op : opsToReplace) {
+    if (failed(replaceAllMemRefUsesWith(
+            oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
+            symbolOperands, allowNonDereferencingOps)))
+      llvm_unreachable("memref replacement guaranteed to succeed here");
+  }
+
+  return success();
+}
+
+/// Given an operation, inserts one or more single result affine
+/// apply operations, results of which are exclusively used by this operation
+/// operation. The operands of these newly created affine apply ops are
+/// guaranteed to be loop iterators or terminal symbols of a function.
+///
+/// Before
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   "send"(%idx, %A, ...)
+///   "compute"(%idx)
+///
+/// After
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   "send"(%idx, %A, ...)
+///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
+///   "compute"(%idx_)
+///
+/// This allows applying different transformations on send and compute (for eg.
+/// different shifts/delays).
+///
+/// Returns nullptr either if none of opInst's operands were the result of an
+/// affine.apply and thus there was no affine computation slice to create, or if
+/// all the affine.apply op's supplying operands to this opInst did not have any
+/// uses besides this opInst; otherwise returns the list of affine.apply
+/// operations created in output argument `sliceOps`.
+void mlir::createAffineComputationSlice(
+    Operation *opInst, SmallVectorImpl *sliceOps) {
+  // Collect all operands that are results of affine apply ops.
+  SmallVector subOperands;
+  subOperands.reserve(opInst->getNumOperands());
+  for (auto operand : opInst->getOperands())
+    if (isa_and_nonnull(operand.getDefiningOp()))
+      subOperands.push_back(operand);
+
+  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
+  SmallVector affineApplyOps;
+  getReachableAffineApplyOps(subOperands, affineApplyOps);
+  // Skip transforming if there are no affine maps to compose.
+  if (affineApplyOps.empty())
+    return;
+
+  // Check if all uses of the affine apply op's lie only in this op op, in
+  // which case there would be nothing to do.
+  bool localized = true;
+  for (auto *op : affineApplyOps) {
+    for (auto result : op->getResults()) {
+      for (auto *user : result.getUsers()) {
+        if (user != opInst) {
+          localized = false;
+          break;
+        }
+      }
+    }
+  }
+  if (localized)
+    return;
+
+  OpBuilder builder(opInst);
+  SmallVector composedOpOperands(subOperands);
+  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
+  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
+
+  // Create an affine.apply for each of the map results.
+  sliceOps->reserve(composedMap.getNumResults());
+  for (auto resultExpr : composedMap.getResults()) {
+    auto singleResMap = AffineMap::get(composedMap.getNumDims(),
+                                       composedMap.getNumSymbols(), resultExpr);
+    sliceOps->push_back(builder.create(
+        opInst->getLoc(), singleResMap, composedOpOperands));
+  }
+
+  // Construct the new operands that include the results from the composed
+  // affine apply op above instead of existing ones (subOperands). So, they
+  // differ from opInst's operands only for those operands in 'subOperands', for
+  // which they will be replaced by the corresponding one from 'sliceOps'.
+  SmallVector newOperands(opInst->getOperands());
+  for (unsigned i = 0, e = newOperands.size(); i < e; i++) {
+    // Replace the subOperands from among the new operands.
+    unsigned j, f;
+    for (j = 0, f = subOperands.size(); j < f; j++) {
+      if (newOperands[i] == subOperands[j])
+        break;
+    }
+    if (j < subOperands.size()) {
+      newOperands[i] = (*sliceOps)[j];
+    }
+  }
+  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) {
+    opInst->setOperand(idx, newOperands[idx]);
+  }
+}
+
+/// Enum to set patterns of affine expr in tiled-layout map.
+/// TileFloorDiv:  div 
+/// TileMod:  mod 
+/// TileNone: None of the above
+/// Example:
+/// #tiled_2d_128x256 = affine_map<(d0, d1)
+///            -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
+/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
+/// "d0 mod 128" and "d1 mod 256" ==> TileMod
+enum TileExprPattern { TileFloorDiv, TileMod, TileNone };
+
+/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
+/// being floordiv'ed by respective tile sizes appeare in a mod with the same
+/// tile sizes, and no other expression involves those k dimensions. This
+/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
+/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
+/// tiled layout, an empty vector is returned.
+static LogicalResult getTileSizePos(
+    AffineMap map,
+    SmallVectorImpl> &tileSizePos) {
+  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
+  // `floordiv` and its position in `map` output.
+  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
+  //                -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
+  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
+  SmallVector, 4> floordivExprs;
+  unsigned pos = 0;
+  for (AffineExpr expr : map.getResults()) {
+    if (expr.getKind() == AffineExprKind::FloorDiv) {
+      AffineBinaryOpExpr binaryExpr = expr.cast();
+      if (binaryExpr.getRHS().isa())
+        floordivExprs.emplace_back(
+            std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
+    }
+    pos++;
+  }
+  // Not tiled layout if `floordivExprs` is empty.
+  if (floordivExprs.empty()) {
+    tileSizePos = SmallVector>{};
+    return success();
+  }
+
+  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
+  // not tiled layout.
+  for (std::tuple fexpr : floordivExprs) {
+    AffineExpr floordivExprLHS = std::get<0>(fexpr);
+    AffineExpr floordivExprRHS = std::get<1>(fexpr);
+    unsigned floordivPos = std::get<2>(fexpr);
+
+    // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
+    // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
+    // other expr, the map is not tiled layout. Example of non tiled layout:
+    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
+    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
+    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
+    //   256)>
+    bool found = false;
+    pos = 0;
+    for (AffineExpr expr : map.getResults()) {
+      bool notTiled = false;
+      if (pos != floordivPos) {
+        expr.walk([&](AffineExpr e) {
+          if (e == floordivExprLHS) {
+            if (expr.getKind() == AffineExprKind::Mod) {
+              AffineBinaryOpExpr binaryExpr = expr.cast();
+              // If LHS and RHS of `mod` are the same with those of floordiv.
+              if (floordivExprLHS == binaryExpr.getLHS() &&
+                  floordivExprRHS == binaryExpr.getRHS()) {
+                // Save tile size (RHS of `mod`), and position of `floordiv` and
+                // `mod` if same expr with `mod` is not found yet.
+                if (!found) {
+                  tileSizePos.emplace_back(
+                      std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
+                  found = true;
+                } else {
+                  // Non tiled layout: Have multilpe `mod` with the same LHS.
+                  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
+                  // mod 256, d2 mod 256)>
+                  notTiled = true;
+                }
+              } else {
+                // Non tiled layout: RHS of `mod` is different from `floordiv`.
+                // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
+                // mod 128)>
+                notTiled = true;
+              }
+            } else {
+              // Non tiled layout: LHS is the same, but not `mod`.
+              // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
+              // floordiv 256)>
+              notTiled = true;
+            }
+          }
+        });
+      }
+      if (notTiled) {
+        tileSizePos = SmallVector>{};
+        return success();
+      }
+      pos++;
+    }
+  }
+  return success();
+}
+
+/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
+/// after normalization. Dimensions that include dynamic dimensions in the map
+/// output will become dynamic dimensions. Return true if `dim` is dynamic
+/// dimension.
+///
+/// Example:
+/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
+///
+/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
+/// memref<4x?xf32, #map0>  ==>  memref<4x?x?xf32>
+static bool
+isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
+                             SmallVectorImpl &inMemrefTypeDynDims,
+                             MLIRContext *context) {
+  bool isDynamicDim = false;
+  AffineExpr expr = layoutMap.getResults()[dim];
+  // Check if affine expr of the dimension includes dynamic dimension of input
+  // memrefType.
+  expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) {
+    if (e.isa()) {
+      for (unsigned dm : inMemrefTypeDynDims) {
+        if (e == getAffineDimExpr(dm, context)) {
+          isDynamicDim = true;
+        }
+      }
+    }
+  });
+  return isDynamicDim;
+}
+
+/// Create affine expr to calculate dimension size for a tiled-layout map.
+static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput,
+                                                  TileExprPattern pat) {
+  // Create map output for the patterns.
+  // "floordiv " ==> "ceildiv "
+  // "mod " ==> ""
+  AffineExpr newMapOutput;
+  AffineBinaryOpExpr binaryExpr = nullptr;
+  switch (pat) {
+  case TileExprPattern::TileMod:
+    binaryExpr = oldMapOutput.cast();
+    newMapOutput = binaryExpr.getRHS();
+    break;
+  case TileExprPattern::TileFloorDiv:
+    binaryExpr = oldMapOutput.cast();
+    newMapOutput = getAffineBinaryOpExpr(
+        AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
+    break;
+  default:
+    newMapOutput = oldMapOutput;
+  }
+  return newMapOutput;
+}
+
+/// Create new maps to calculate each dimension size of `newMemRefType`, and
+/// create `newDynamicSizes` from them by using AffineApplyOp.
+///
+/// Steps for normalizing dynamic memrefs for a tiled layout map
+/// Example:
+///    #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
+///    %0 = dim %arg0, %c1 :memref<4x?xf32>
+///    %1 = alloc(%0) : memref<4x?xf32, #map0>
+///
+/// (Before this function)
+/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
+/// single layout map is supported.
+///
+/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
+/// is memref<4x?x?xf32> in the above example.
+///
+/// (In this function)
+/// 3. Create new maps to calculate each dimension of the normalized memrefType
+/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
+/// dimension size can be calculated by replacing "floordiv " with
+/// "ceildiv " and "mod " with "".
+/// - New map in the above example
+///   #map0 = affine_map<(d0, d1) -> (d0)>
+///   #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
+///   #map2 = affine_map<(d0, d1) -> (32)>
+///
+/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
+/// is used in dynamicSizes of new AllocOp.
+///   %0 = dim %arg0, %c1 : memref<4x?xf32>
+///   %c4 = arith.constant 4 : index
+///   %1 = affine.apply #map1(%c4, %0)
+///   %2 = affine.apply #map2(%c4, %0)
+static void createNewDynamicSizes(MemRefType oldMemRefType,
+                                  MemRefType newMemRefType, AffineMap map,
+                                  memref::AllocOp *allocOp, OpBuilder b,
+                                  SmallVectorImpl &newDynamicSizes) {
+  // Create new input for AffineApplyOp.
+  SmallVector inAffineApply;
+  ArrayRef oldMemRefShape = oldMemRefType.getShape();
+  unsigned dynIdx = 0;
+  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
+    if (oldMemRefShape[d] < 0) {
+      // Use dynamicSizes of allocOp for dynamic dimension.
+      inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]);
+      dynIdx++;
+    } else {
+      // Create ConstantOp for static dimension.
+      Attribute constantAttr =
+          b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
+      inAffineApply.emplace_back(
+          b.create(allocOp->getLoc(), constantAttr));
+    }
+  }
+
+  // Create new map to calculate each dimension size of new memref for each
+  // original map output. Only for dynamic dimesion of `newMemRefType`.
+  unsigned newDimIdx = 0;
+  ArrayRef newMemRefShape = newMemRefType.getShape();
+  SmallVector> tileSizePos;
+  (void)getTileSizePos(map, tileSizePos);
+  for (AffineExpr expr : map.getResults()) {
+    if (newMemRefShape[newDimIdx] < 0) {
+      // Create new maps to calculate each dimension size of new memref.
+      enum TileExprPattern pat = TileExprPattern::TileNone;
+      for (auto pos : tileSizePos) {
+        if (newDimIdx == std::get<1>(pos))
+          pat = TileExprPattern::TileFloorDiv;
+        else if (newDimIdx == std::get<2>(pos))
+          pat = TileExprPattern::TileMod;
+      }
+      AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
+      AffineMap newMap =
+          AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
+      Value affineApp =
+          b.create(allocOp->getLoc(), newMap, inAffineApply);
+      newDynamicSizes.emplace_back(affineApp);
+    }
+    newDimIdx++;
+  }
+}
+
+// TODO: Currently works for static memrefs with a single layout map.
+LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) {
+  MemRefType memrefType = allocOp->getType();
+  OpBuilder b(*allocOp);
+
+  // Fetch a new memref type after normalizing the old memref to have an
+  // identity map layout.
+  MemRefType newMemRefType =
+      normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size());
+  if (newMemRefType == memrefType)
+    // Either memrefType already had an identity map or the map couldn't be
+    // transformed to an identity map.
+    return failure();
+
+  Value oldMemRef = allocOp->getResult();
+
+  SmallVector symbolOperands(allocOp->symbolOperands());
+  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
+  memref::AllocOp newAlloc;
+  // Check if `layoutMap` is a tiled layout. Only single layout map is
+  // supported for normalizing dynamic memrefs.
+  SmallVector> tileSizePos;
+  (void)getTileSizePos(layoutMap, tileSizePos);
+  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
+    MemRefType oldMemRefType = oldMemRef.getType().cast();
+    SmallVector newDynamicSizes;
+    createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
+                          newDynamicSizes);
+    // Add the new dynamic sizes in new AllocOp.
+    newAlloc =
+        b.create(allocOp->getLoc(), newMemRefType,
+                                  newDynamicSizes, allocOp->alignmentAttr());
+  } else {
+    newAlloc = b.create(allocOp->getLoc(), newMemRefType,
+                                         allocOp->alignmentAttr());
+  }
+  // Replace all uses of the old memref.
+  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
+                                      /*extraIndices=*/{},
+                                      /*indexRemap=*/layoutMap,
+                                      /*extraOperands=*/{},
+                                      /*symbolOperands=*/symbolOperands,
+                                      /*domOpFilter=*/nullptr,
+                                      /*postDomOpFilter=*/nullptr,
+                                      /*allowNonDereferencingOps=*/true))) {
+    // If it failed (due to escapes for example), bail out.
+    newAlloc.erase();
+    return failure();
+  }
+  // Replace any uses of the original alloc op and erase it. All remaining uses
+  // have to be dealloc's; RAMUW above would've failed otherwise.
+  assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) {
+    return isa(op);
+  }));
+  oldMemRef.replaceAllUsesWith(newAlloc);
+  allocOp->erase();
+  return success();
+}
+
+MemRefType mlir::normalizeMemRefType(MemRefType memrefType, OpBuilder b,
+                                     unsigned numSymbolicOperands) {
+  unsigned rank = memrefType.getRank();
+  if (rank == 0)
+    return memrefType;
+
+  if (memrefType.getLayout().isIdentity()) {
+    // Either no maps is associated with this memref or this memref has
+    // a trivial (identity) map.
+    return memrefType;
+  }
+  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
+
+  // We don't do any checks for one-to-one'ness; we assume that it is
+  // one-to-one.
+
+  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
+  // for now.
+  // TODO: Normalize the other types of dynamic memrefs.
+  SmallVector> tileSizePos;
+  (void)getTileSizePos(layoutMap, tileSizePos);
+  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
+    return memrefType;
+
+  // We have a single map that is not an identity map. Create a new memref
+  // with the right shape and an identity layout map.
+  ArrayRef shape = memrefType.getShape();
+  // FlatAffineConstraint may later on use symbolicOperands.
+  FlatAffineConstraints fac(rank, numSymbolicOperands);
+  SmallVector memrefTypeDynDims;
+  for (unsigned d = 0; d < rank; ++d) {
+    // Use constraint system only in static dimensions.
+    if (shape[d] > 0) {
+      fac.addBound(FlatAffineConstraints::LB, d, 0);
+      fac.addBound(FlatAffineConstraints::UB, d, shape[d] - 1);
+    } else {
+      memrefTypeDynDims.emplace_back(d);
+    }
+  }
+  // We compose this map with the original index (logical) space to derive
+  // the upper bounds for the new index space.
+  unsigned newRank = layoutMap.getNumResults();
+  if (failed(fac.composeMatchingMap(layoutMap)))
+    return memrefType;
+  // TODO: Handle semi-affine maps.
+  // Project out the old data dimensions.
+  fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds());
+  SmallVector newShape(newRank);
+  for (unsigned d = 0; d < newRank; ++d) {
+    // Check if each dimension of normalized memrefType is dynamic.
+    bool isDynDim = isNormalizedMemRefDynamicDim(
+        d, layoutMap, memrefTypeDynDims, b.getContext());
+    if (isDynDim) {
+      newShape[d] = -1;
+    } else {
+      // The lower bound for the shape is always zero.
+      auto ubConst = fac.getConstantBound(FlatAffineConstraints::UB, d);
+      // For a static memref and an affine map with no symbols, this is
+      // always bounded.
+      assert(ubConst.hasValue() && "should always have an upper bound");
+      if (ubConst.getValue() < 0)
+        // This is due to an invalid map that maps to a negative space.
+        return memrefType;
+      // If dimension of new memrefType is dynamic, the value is -1.
+      newShape[d] = ubConst.getValue() + 1;
+    }
+  }
+
+  // Create the new memref type after trivializing the old layout map.
+  MemRefType newMemRefType =
+      MemRefType::Builder(memrefType)
+          .setShape(newShape)
+          .setLayout(AffineMapAttr::get(b.getMultiDimIdentityMap(newRank)));
+
+  return newMemRefType;
+}
diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
index 182a24cd04444..62c11f12b3f89 100644
--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/GPU/MemoryPromotion.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -19,7 +20,6 @@
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
 
 using namespace mlir;
 using namespace mlir::gpu;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp
index db5da26cc9a8c..3606d9b832d1a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h"
-
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
 #include "mlir/Dialect/SCF/Transforms.h"
@@ -20,7 +19,6 @@
 #include "mlir/Dialect/Vector/VectorTransforms.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index 83c9aa4a54a01..8e863ab0c80e8 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h"
 #include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"
@@ -24,7 +23,6 @@
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dominance.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 097500f7190c4..33b70c0dc95fd 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -15,7 +15,6 @@
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"
@@ -27,7 +26,6 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
index 025adc2c56b2b..74d097ca5b766 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -16,6 +16,8 @@
 #include "PassDetail.h"
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
@@ -29,9 +31,7 @@
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 using namespace mlir::vector;
@@ -348,7 +348,13 @@ struct LinalgStrategyEnablePass
         return signalPassFailure();
     }
 
-    promoteSingleIterationLoops(funcOp);
+    // Gathers all innermost loops through a post order pruned walk.
+    funcOp.walk([](Operation *op) {
+      if (auto forOp = dyn_cast(op))
+        (void)promoteIfSingleIteration(forOp);
+      else if (auto forOp = dyn_cast(op))
+        (void)promoteIfSingleIteration(forOp);
+    });
     if (options.hoistRedundantVectorTransfers)
       hoistRedundantVectorTransfers(funcOp);
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 486a069a60ed0..ca4308851daed 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 1b0a6d7f2ba87..a26f623619212 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -31,7 +32,6 @@
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
index 0b5e49b2df528..89936111fce55 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
@@ -13,9 +13,9 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index f517079990015..2c723625038bf 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_dialect_library(MLIRSCFTransforms
   LoopPipelining.cpp
   LoopRangeFolding.cpp
   LoopSpecialization.cpp
+  ParallelLoopCollapsing.cpp
   ParallelLoopFusion.cpp
   ParallelLoopTiling.cpp
   StructuralTypeConversions.cpp
diff --git a/mlir/lib/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
similarity index 91%
rename from mlir/lib/Transforms/ParallelLoopCollapsing.cpp
rename to mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
index 2c3329cde2218..0e5d4da310bdb 100644
--- a/mlir/lib/Transforms/ParallelLoopCollapsing.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/SCF/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -20,7 +20,7 @@ using namespace mlir;
 
 namespace {
 struct ParallelLoopCollapsing
-    : public ParallelLoopCollapsingBase {
+    : public SCFParallelLoopCollapsingBase {
   void runOnOperation() override {
     Operation *module = getOperation();
 
diff --git a/mlir/lib/Dialect/SCF/Transforms/Utils.cpp b/mlir/lib/Dialect/SCF/Transforms/Utils.cpp
index 66be70bf1a19d..04b41306c6da6 100644
--- a/mlir/lib/Dialect/SCF/Transforms/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/Utils.cpp
@@ -11,20 +11,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/SCF/Utils.h"
-
+#include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/RegionUtils.h"
-
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 using namespace mlir;
 
+namespace {
+// This structure is to pass and return sets of loop parameters without
+// confusing the order.
+struct LoopParams {
+  Value lowerBound;
+  Value upperBound;
+  Value step;
+};
+} // namespace
+
 scf::ForOp mlir::cloneWithNewYields(OpBuilder &b, scf::ForOp loop,
                                     ValueRange newIterOperands,
                                     ValueRange newYieldedValues,
@@ -230,3 +241,682 @@ bool mlir::getInnermostParallelLoops(Operation *rootOp,
   }
   return rootEnclosesPloops;
 }
+
+// Build the IR that performs ceil division of a positive value by a constant:
+//    ceildiv(a, B) = divis(a + (B-1), B)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+                             int64_t divisor) {
+  assert(divisor > 0 && "expected positive divisor");
+  assert(dividend.getType().isIndex() && "expected index-typed value");
+
+  Value divisorMinusOneCst =
+      builder.create(loc, divisor - 1);
+  Value divisorCst = builder.create(loc, divisor);
+  Value sum = builder.create(loc, dividend, divisorMinusOneCst);
+  return builder.create(loc, sum, divisorCst);
+}
+
+// Build the IR that performs ceil division of a positive value by another
+// positive value:
+//    ceildiv(a, b) = divis(a + (b - 1), b)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+                             Value divisor) {
+  assert(dividend.getType().isIndex() && "expected index-typed value");
+
+  Value cstOne = builder.create(loc, 1);
+  Value divisorMinusOne = builder.create(loc, divisor, cstOne);
+  Value sum = builder.create(loc, dividend, divisorMinusOne);
+  return builder.create(loc, sum, divisor);
+}
+
+/// Helper to replace uses of loop carried values (iter_args) and loop
+/// yield values while promoting single iteration scf.for ops.
+static void replaceIterArgsAndYieldResults(scf::ForOp forOp) {
+  // Replace uses of iter arguments with iter operands (initial values).
+  auto iterOperands = forOp.getIterOperands();
+  auto iterArgs = forOp.getRegionIterArgs();
+  for (auto e : llvm::zip(iterOperands, iterArgs))
+    std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
+
+  // Replace uses of loop results with the values yielded by the loop.
+  auto outerResults = forOp.getResults();
+  auto innerResults = forOp.getBody()->getTerminator()->getOperands();
+  for (auto e : llvm::zip(outerResults, innerResults))
+    std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
+}
+
+/// Promotes the loop body of a forOp to its containing block if the forOp
+/// it can be determined that the loop has a single iteration.
+LogicalResult mlir::promoteIfSingleIteration(scf::ForOp forOp) {
+  auto lbCstOp = forOp.getLowerBound().getDefiningOp();
+  auto ubCstOp = forOp.getUpperBound().getDefiningOp();
+  auto stepCstOp = forOp.getStep().getDefiningOp();
+  if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.value() < 0 ||
+      ubCstOp.value() < 0 || stepCstOp.value() < 0)
+    return failure();
+  int64_t tripCount =
+      mlir::ceilDiv(ubCstOp.value() - lbCstOp.value(), stepCstOp.value());
+  if (tripCount != 1)
+    return failure();
+  auto iv = forOp.getInductionVar();
+  iv.replaceAllUsesWith(lbCstOp);
+
+  replaceIterArgsAndYieldResults(forOp);
+
+  // Move the loop body operations, except for its terminator, to the loop's
+  // containing block.
+  auto *parentBlock = forOp->getBlock();
+  forOp.getBody()->getTerminator()->erase();
+  parentBlock->getOperations().splice(Block::iterator(forOp),
+                                      forOp.getBody()->getOperations());
+  forOp.erase();
+  return success();
+}
+
+/// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
+/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
+/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
+/// unrolled iteration using annotateFn.
+static void generateUnrolledLoop(
+    Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
+    function_ref ivRemapFn,
+    function_ref annotateFn,
+    ValueRange iterArgs, ValueRange yieldedValues) {
+  // Builder to insert unrolled bodies just before the terminator of the body of
+  // 'forOp'.
+  auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
+
+  if (!annotateFn)
+    annotateFn = [](unsigned, Operation *, OpBuilder) {};
+
+  // Keep a pointer to the last non-terminator operation in the original block
+  // so that we know what to clone (since we are doing this in-place).
+  Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
+
+  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
+  SmallVector lastYielded(yieldedValues);
+
+  for (unsigned i = 1; i < unrollFactor; i++) {
+    BlockAndValueMapping operandMap;
+
+    // Prepare operand map.
+    operandMap.map(iterArgs, lastYielded);
+
+    // If the induction variable is used, create a remapping to the value for
+    // this unrolled instance.
+    if (!forOpIV.use_empty()) {
+      Value ivUnroll = ivRemapFn(i, forOpIV, builder);
+      operandMap.map(forOpIV, ivUnroll);
+    }
+
+    // Clone the original body of 'forOp'.
+    for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
+      Operation *clonedOp = builder.clone(*it, operandMap);
+      annotateFn(i, clonedOp, builder);
+    }
+
+    // Update yielded values.
+    for (unsigned i = 0, e = lastYielded.size(); i < e; i++)
+      lastYielded[i] = operandMap.lookup(yieldedValues[i]);
+  }
+
+  // Make sure we annotate the Ops in the original body. We do this last so that
+  // any annotations are not copied into the cloned Ops above.
+  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
+    annotateFn(0, &*it, builder);
+
+  // Update operands of the yield statement.
+  loopBodyBlock->getTerminator()->setOperands(lastYielded);
+}
+
+/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
+LogicalResult mlir::loopUnrollByFactor(
+    scf::ForOp forOp, uint64_t unrollFactor,
+    function_ref annotateFn) {
+  assert(unrollFactor > 0 && "expected positive unroll factor");
+
+  // Return if the loop body is empty.
+  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
+    return success();
+
+  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
+  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
+  OpBuilder boundsBuilder(forOp);
+  auto loc = forOp.getLoc();
+  auto step = forOp.getStep();
+  Value upperBoundUnrolled;
+  Value stepUnrolled;
+  bool generateEpilogueLoop = true;
+
+  auto lbCstOp = forOp.getLowerBound().getDefiningOp();
+  auto ubCstOp = forOp.getUpperBound().getDefiningOp();
+  auto stepCstOp = forOp.getStep().getDefiningOp();
+  if (lbCstOp && ubCstOp && stepCstOp) {
+    // Constant loop bounds computation.
+    int64_t lbCst = lbCstOp.value();
+    int64_t ubCst = ubCstOp.value();
+    int64_t stepCst = stepCstOp.value();
+    assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
+           "expected positive loop bounds and step");
+    int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
+
+    if (unrollFactor == 1) {
+      if (tripCount == 1 && failed(promoteIfSingleIteration(forOp)))
+        return failure();
+      return success();
+    }
+
+    int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
+    int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
+    assert(upperBoundUnrolledCst <= ubCst);
+    int64_t stepUnrolledCst = stepCst * unrollFactor;
+
+    // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
+    generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
+    if (generateEpilogueLoop)
+      upperBoundUnrolled = boundsBuilder.create(
+          loc, upperBoundUnrolledCst);
+    else
+      upperBoundUnrolled = ubCstOp;
+
+    // Create constant for 'stepUnrolled'.
+    stepUnrolled = stepCst == stepUnrolledCst
+                       ? step
+                       : boundsBuilder.create(
+                             loc, stepUnrolledCst);
+  } else {
+    // Dynamic loop bounds computation.
+    // TODO: Add dynamic asserts for negative lb/ub/step, or
+    // consider using ceilDiv from AffineApplyExpander.
+    auto lowerBound = forOp.getLowerBound();
+    auto upperBound = forOp.getUpperBound();
+    Value diff =
+        boundsBuilder.create(loc, upperBound, lowerBound);
+    Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
+    Value unrollFactorCst =
+        boundsBuilder.create(loc, unrollFactor);
+    Value tripCountRem =
+        boundsBuilder.create(loc, tripCount, unrollFactorCst);
+    // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
+    Value tripCountEvenMultiple =
+        boundsBuilder.create(loc, tripCount, tripCountRem);
+    // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
+    upperBoundUnrolled = boundsBuilder.create(
+        loc, lowerBound,
+        boundsBuilder.create(loc, tripCountEvenMultiple, step));
+    // Scale 'step' by 'unrollFactor'.
+    stepUnrolled =
+        boundsBuilder.create(loc, step, unrollFactorCst);
+  }
+
+  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
+  if (generateEpilogueLoop) {
+    OpBuilder epilogueBuilder(forOp->getContext());
+    epilogueBuilder.setInsertionPoint(forOp->getBlock(),
+                                      std::next(Block::iterator(forOp)));
+    auto epilogueForOp = cast(epilogueBuilder.clone(*forOp));
+    epilogueForOp.setLowerBound(upperBoundUnrolled);
+
+    // Update uses of loop results.
+    auto results = forOp.getResults();
+    auto epilogueResults = epilogueForOp.getResults();
+    auto epilogueIterOperands = epilogueForOp.getIterOperands();
+
+    for (auto e : llvm::zip(results, epilogueResults, epilogueIterOperands)) {
+      std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
+      epilogueForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
+    }
+    (void)promoteIfSingleIteration(epilogueForOp);
+  }
+
+  // Create unrolled loop.
+  forOp.setUpperBound(upperBoundUnrolled);
+  forOp.setStep(stepUnrolled);
+
+  auto iterArgs = ValueRange(forOp.getRegionIterArgs());
+  auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
+
+  generateUnrolledLoop(
+      forOp.getBody(), forOp.getInductionVar(), unrollFactor,
+      [&](unsigned i, Value iv, OpBuilder b) {
+        // iv' = iv + step * i;
+        auto stride = b.create(
+            loc, step, b.create(loc, i));
+        return b.create(loc, iv, stride);
+      },
+      annotateFn, iterArgs, yieldedValues);
+  // Promote the loop body up if this has turned into a single iteration loop.
+  (void)promoteIfSingleIteration(forOp);
+  return success();
+}
+
+/// Return the new lower bound, upper bound, and step in that order. Insert any
+/// additional bounds calculations before the given builder and any additional
+/// conversion back to the original loop induction value inside the given Block.
+static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
+                                OpBuilder &insideLoopBuilder, Location loc,
+                                Value lowerBound, Value upperBound, Value step,
+                                Value inductionVar) {
+  // Check if the loop is already known to have a constant zero lower bound or
+  // a constant one step.
+  bool isZeroBased = false;
+  if (auto ubCst = lowerBound.getDefiningOp())
+    isZeroBased = ubCst.value() == 0;
+
+  bool isStepOne = false;
+  if (auto stepCst = step.getDefiningOp())
+    isStepOne = stepCst.value() == 1;
+
+  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
+  // assuming the step is strictly positive.  Update the bounds and the step
+  // of the loop to go from 0 to the number of iterations, if necessary.
+  // TODO: introduce support for negative steps or emit dynamic asserts
+  // on step positivity, whatever gets implemented first.
+  if (isZeroBased && isStepOne)
+    return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound,
+            /*step=*/step};
+
+  Value diff = boundsBuilder.create(loc, upperBound, lowerBound);
+  Value newUpperBound = ceilDivPositive(boundsBuilder, loc, diff, step);
+
+  Value newLowerBound =
+      isZeroBased ? lowerBound
+                  : boundsBuilder.create(loc, 0);
+  Value newStep =
+      isStepOne ? step : boundsBuilder.create(loc, 1);
+
+  // Insert code computing the value of the original loop induction variable
+  // from the "normalized" one.
+  Value scaled =
+      isStepOne
+          ? inductionVar
+          : insideLoopBuilder.create(loc, inductionVar, step);
+  Value shifted =
+      isZeroBased
+          ? scaled
+          : insideLoopBuilder.create(loc, scaled, lowerBound);
+
+  SmallPtrSet preserve{scaled.getDefiningOp(),
+                                       shifted.getDefiningOp()};
+  inductionVar.replaceAllUsesExcept(shifted, preserve);
+  return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound,
+          /*step=*/newStep};
+}
+
+/// Transform a loop with a strictly positive step
+///   for %i = %lb to %ub step %s
+/// into a 0-based loop with step 1
+///   for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
+///     %i = %ii * %s + %lb
+/// Insert the induction variable remapping in the body of `inner`, which is
+/// expected to be either `loop` or another loop perfectly nested under `loop`.
+/// Insert the definition of new bounds immediate before `outer`, which is
+/// expected to be either `loop` or its parent in the loop nest.
+static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
+  OpBuilder builder(outer);
+  OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody());
+  auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(),
+                                  loop.getLowerBound(), loop.getUpperBound(),
+                                  loop.getStep(), loop.getInductionVar());
+
+  loop.setLowerBound(loopPieces.lowerBound);
+  loop.setUpperBound(loopPieces.upperBound);
+  loop.setStep(loopPieces.step);
+}
+
+void mlir::coalesceLoops(MutableArrayRef loops) {
+  if (loops.size() < 2)
+    return;
+
+  scf::ForOp innermost = loops.back();
+  scf::ForOp outermost = loops.front();
+
+  // 1. Make sure all loops iterate from 0 to upperBound with step 1.  This
+  // allows the following code to assume upperBound is the number of iterations.
+  for (auto loop : loops)
+    normalizeLoop(loop, outermost, innermost);
+
+  // 2. Emit code computing the upper bound of the coalesced loop as product
+  // of the number of iterations of all loops.
+  OpBuilder builder(outermost);
+  Location loc = outermost.getLoc();
+  Value upperBound = outermost.getUpperBound();
+  for (auto loop : loops.drop_front())
+    upperBound =
+        builder.create(loc, upperBound, loop.getUpperBound());
+  outermost.setUpperBound(upperBound);
+
+  builder.setInsertionPointToStart(outermost.getBody());
+
+  // 3. Remap induction variables. For each original loop, the value of the
+  // induction variable can be obtained by dividing the induction variable of
+  // the linearized loop by the total number of iterations of the loops nested
+  // in it modulo the number of iterations in this loop (remove the values
+  // related to the outer loops):
+  //   iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
+  // Compute these iteratively from the innermost loop by creating a "running
+  // quotient" of division by the range.
+  Value previous = outermost.getInductionVar();
+  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
+    unsigned idx = loops.size() - i - 1;
+    if (i != 0)
+      previous = builder.create(loc, previous,
+                                                loops[idx + 1].getUpperBound());
+
+    Value iv = (i == e - 1) ? previous
+                            : builder.create(
+                                  loc, previous, loops[idx].getUpperBound());
+    replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
+                               loops.back().getRegion());
+  }
+
+  // 4. Move the operations from the innermost just above the second-outermost
+  // loop, delete the extra terminator and the second-outermost loop.
+  scf::ForOp second = loops[1];
+  innermost.getBody()->back().erase();
+  outermost.getBody()->getOperations().splice(
+      Block::iterator(second.getOperation()),
+      innermost.getBody()->getOperations());
+  second.erase();
+}
+
+void mlir::collapseParallelLoops(
+    scf::ParallelOp loops, ArrayRef> combinedDimensions) {
+  OpBuilder outsideBuilder(loops);
+  Location loc = loops.getLoc();
+
+  // Presort combined dimensions.
+  auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
+  for (auto &dims : sortedDimensions)
+    std::sort(dims.begin(), dims.end());
+
+  // Normalize ParallelOp's iteration pattern.
+  SmallVector normalizedLowerBounds, normalizedSteps,
+      normalizedUpperBounds;
+  for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
+    OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody());
+    auto resultBounds =
+        normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
+                      loops.getLowerBound()[i], loops.getUpperBound()[i],
+                      loops.getStep()[i], loops.getBody()->getArgument(i));
+
+    normalizedLowerBounds.push_back(resultBounds.lowerBound);
+    normalizedUpperBounds.push_back(resultBounds.upperBound);
+    normalizedSteps.push_back(resultBounds.step);
+  }
+
+  // Combine iteration spaces.
+  SmallVector lowerBounds, upperBounds, steps;
+  auto cst0 = outsideBuilder.create(loc, 0);
+  auto cst1 = outsideBuilder.create(loc, 1);
+  for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
+    Value newUpperBound = outsideBuilder.create(loc, 1);
+    for (auto idx : sortedDimensions[i]) {
+      newUpperBound = outsideBuilder.create(
+          loc, newUpperBound, normalizedUpperBounds[idx]);
+    }
+    lowerBounds.push_back(cst0);
+    steps.push_back(cst1);
+    upperBounds.push_back(newUpperBound);
+  }
+
+  // Create new ParallelLoop with conversions to the original induction values.
+  // The loop below uses divisions to get the relevant range of values in the
+  // new induction value that represent each range of the original induction
+  // value. The remainders then determine based on that range, which iteration
+  // of the original induction value this represents. This is a normalized value
+  // that is un-normalized already by the previous logic.
+  auto newPloop = outsideBuilder.create(
+      loc, lowerBounds, upperBounds, steps,
+      [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) {
+        for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
+          Value previous = ploopIVs[i];
+          unsigned numberCombinedDimensions = combinedDimensions[i].size();
+          // Iterate over all except the last induction value.
+          for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
+            unsigned idx = combinedDimensions[i][j];
+
+            // Determine the current induction value's current loop iteration
+            Value iv = insideBuilder.create(
+                loc, previous, normalizedUpperBounds[idx]);
+            replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
+                                       loops.getRegion());
+
+            // Remove the effect of the current induction value to prepare for
+            // the next value.
+            previous = insideBuilder.create(
+                loc, previous, normalizedUpperBounds[idx]);
+          }
+
+          // The final induction value is just the remaining value.
+          unsigned idx = combinedDimensions[i][0];
+          replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx),
+                                     previous, loops.getRegion());
+        }
+      });
+
+  // Replace the old loop with the new loop.
+  loops.getBody()->back().erase();
+  newPloop.getBody()->getOperations().splice(
+      Block::iterator(newPloop.getBody()->back()),
+      loops.getBody()->getOperations());
+  loops.erase();
+}
+
+// Hoist the ops within `outer` that appear before `inner`.
+// Such ops include the ops that have been introduced by parametric tiling.
+// Ops that come from triangular loops (i.e. that belong to the program slice
+// rooted at `outer`) and ops that have side effects cannot be hoisted.
+// Return failure when any op fails to hoist.
+static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
+  SetVector forwardSlice;
+  getForwardSlice(
+      outer.getInductionVar(), &forwardSlice,
+      [&inner](Operation *op) { return op != inner.getOperation(); });
+  LogicalResult status = success();
+  SmallVector toHoist;
+  for (auto &op : outer.getBody()->without_terminator()) {
+    // Stop when encountering the inner loop.
+    if (&op == inner.getOperation())
+      break;
+    // Skip over non-hoistable ops.
+    if (forwardSlice.count(&op) > 0) {
+      status = failure();
+      continue;
+    }
+    // Skip intermediate scf::ForOp, these are not considered a failure.
+    if (isa(op))
+      continue;
+    // Skip other ops with regions.
+    if (op.getNumRegions() > 0) {
+      status = failure();
+      continue;
+    }
+    // Skip if op has side effects.
+    // TODO: loads to immutable memory regions are ok.
+    if (!MemoryEffectOpInterface::hasNoEffect(&op)) {
+      status = failure();
+      continue;
+    }
+    toHoist.push_back(&op);
+  }
+  auto *outerForOp = outer.getOperation();
+  for (auto *op : toHoist)
+    op->moveBefore(outerForOp);
+  return status;
+}
+
+// Traverse the interTile and intraTile loops and try to hoist ops such that
+// bands of perfectly nested loops are isolated.
+// Return failure if either perfect interTile or perfect intraTile bands cannot
+// be formed.
+static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
+  LogicalResult status = success();
+  const Loops &interTile = tileLoops.first;
+  const Loops &intraTile = tileLoops.second;
+  auto size = interTile.size();
+  assert(size == intraTile.size());
+  if (size <= 1)
+    return success();
+  for (unsigned s = 1; s < size; ++s)
+    status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
+                               : failure();
+  for (unsigned s = 1; s < size; ++s)
+    status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
+                               : failure();
+  return status;
+}
+
+/// Collect perfectly nested loops starting from `rootForOps`.  Loops are
+/// perfectly nested if each loop is the first and only non-terminator operation
+/// in the parent loop.  Collect at most `maxLoops` loops and append them to
+/// `forOps`.
+template 
+static void getPerfectlyNestedLoopsImpl(
+    SmallVectorImpl &forOps, T rootForOp,
+    unsigned maxLoops = std::numeric_limits::max()) {
+  for (unsigned i = 0; i < maxLoops; ++i) {
+    forOps.push_back(rootForOp);
+    Block &body = rootForOp.getRegion().front();
+    if (body.begin() != std::prev(body.end(), 2))
+      return;
+
+    rootForOp = dyn_cast(&body.front());
+    if (!rootForOp)
+      return;
+  }
+}
+
+static Loops stripmineSink(scf::ForOp forOp, Value factor,
+                           ArrayRef targets) {
+  auto originalStep = forOp.getStep();
+  auto iv = forOp.getInductionVar();
+
+  OpBuilder b(forOp);
+  forOp.setStep(b.create(forOp.getLoc(), originalStep, factor));
+
+  Loops innerLoops;
+  for (auto t : targets) {
+    // Save information for splicing ops out of t when done
+    auto begin = t.getBody()->begin();
+    auto nOps = t.getBody()->getOperations().size();
+
+    // Insert newForOp before the terminator of `t`.
+    auto b = OpBuilder::atBlockTerminator((t.getBody()));
+    Value stepped = b.create(t.getLoc(), iv, forOp.getStep());
+    Value less = b.create(t.getLoc(), arith::CmpIPredicate::slt,
+                                         forOp.getUpperBound(), stepped);
+    Value ub =
+        b.create(t.getLoc(), less, forOp.getUpperBound(), stepped);
+
+    // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
+    auto newForOp = b.create(t.getLoc(), iv, ub, originalStep);
+    newForOp.getBody()->getOperations().splice(
+        newForOp.getBody()->getOperations().begin(),
+        t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
+    replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
+                               newForOp.getRegion());
+
+    innerLoops.push_back(newForOp);
+  }
+
+  return innerLoops;
+}
+
+// Stripmines a `forOp` by `factor` and sinks it under a single `target`.
+// Returns the new for operation, nested immediately under `target`.
+template 
+static scf::ForOp stripmineSink(scf::ForOp forOp, SizeType factor,
+                                scf::ForOp target) {
+  // TODO: Use cheap structural assertions that targets are nested under
+  // forOp and that targets are not nested under each other when DominanceInfo
+  // exposes the capability. It seems overkill to construct a whole function
+  // dominance tree at this point.
+  auto res = stripmineSink(forOp, factor, ArrayRef(target));
+  assert(res.size() == 1 && "Expected 1 inner forOp");
+  return res[0];
+}
+
+SmallVector mlir::tile(ArrayRef forOps,
+                                 ArrayRef sizes,
+                                 ArrayRef targets) {
+  SmallVector, 8> res;
+  SmallVector currentTargets(targets.begin(), targets.end());
+  for (auto it : llvm::zip(forOps, sizes)) {
+    auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
+    res.push_back(step);
+    currentTargets = step;
+  }
+  return res;
+}
+
+Loops mlir::tile(ArrayRef forOps, ArrayRef sizes,
+                 scf::ForOp target) {
+  SmallVector res;
+  for (auto loops : tile(forOps, sizes, ArrayRef(target))) {
+    assert(loops.size() == 1);
+    res.push_back(loops[0]);
+  }
+  return res;
+}
+
+Loops mlir::tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef sizes) {
+  // Collect perfectly nested loops.  If more size values provided than nested
+  // loops available, truncate `sizes`.
+  SmallVector forOps;
+  forOps.reserve(sizes.size());
+  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
+  if (forOps.size() < sizes.size())
+    sizes = sizes.take_front(forOps.size());
+
+  return ::tile(forOps, sizes, forOps.back());
+}
+
+void mlir::getPerfectlyNestedLoops(SmallVectorImpl &nestedLoops,
+                                   scf::ForOp root) {
+  getPerfectlyNestedLoopsImpl(nestedLoops, root);
+}
+
+TileLoops mlir::extractFixedOuterLoops(scf::ForOp rootForOp,
+                                       ArrayRef sizes) {
+  // Collect perfectly nested loops.  If more size values provided than nested
+  // loops available, truncate `sizes`.
+  SmallVector forOps;
+  forOps.reserve(sizes.size());
+  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
+  if (forOps.size() < sizes.size())
+    sizes = sizes.take_front(forOps.size());
+
+  // Compute the tile sizes such that i-th outer loop executes size[i]
+  // iterations.  Given that the loop current executes
+  //   numIterations = ceildiv((upperBound - lowerBound), step)
+  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
+  SmallVector tileSizes;
+  tileSizes.reserve(sizes.size());
+  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
+    assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
+
+    auto forOp = forOps[i];
+    OpBuilder builder(forOp);
+    auto loc = forOp.getLoc();
+    Value diff = builder.create(loc, forOp.getUpperBound(),
+                                               forOp.getLowerBound());
+    Value numIterations = ceilDivPositive(builder, loc, diff, forOp.getStep());
+    Value iterationsPerBlock =
+        ceilDivPositive(builder, loc, numIterations, sizes[i]);
+    tileSizes.push_back(iterationsPerBlock);
+  }
+
+  // Call parametric tiling with the given sizes.
+  auto intraTile = tile(forOps, tileSizes, forOps.back());
+  TileLoops tileLoops = std::make_pair(forOps, intraTile);
+
+  // TODO: for now we just ignore the result of band isolation.
+  // In the future, mapping decisions may be impacted by the ability to
+  // isolate perfectly nested bands.
+  (void)tryIsolateBands(tileLoops);
+
+  return tileLoops;
+}
diff --git a/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp
index cda4472695b2d..73d8d261b4d57 100644
--- a/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp
@@ -14,7 +14,6 @@
 #include 
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
index cd26750c9eb7a..3ead467bc5181 100644
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -13,7 +13,6 @@
 #include 
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp b/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp
index 319a6ab6b710e..ec502702d093b 100644
--- a/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Vector/VectorTransforms.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/Interfaces/VectorInterfaces.h"
diff --git a/mlir/lib/Interfaces/LoopLikeInterface.cpp b/mlir/lib/Interfaces/LoopLikeInterface.cpp
index 4a0c5d3a18761..8ec4b51aa757b 100644
--- a/mlir/lib/Interfaces/LoopLikeInterface.cpp
+++ b/mlir/lib/Interfaces/LoopLikeInterface.cpp
@@ -7,12 +7,95 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Debug.h"
 
 using namespace mlir;
 
+#define DEBUG_TYPE "loop-like"
+
 //===----------------------------------------------------------------------===//
 // LoopLike Interfaces
 //===----------------------------------------------------------------------===//
 
 /// Include the definitions of the loop-like interfaces.
 #include "mlir/Interfaces/LoopLikeInterface.cpp.inc"
+
+//===----------------------------------------------------------------------===//
+// LoopLike Utilities
+//===----------------------------------------------------------------------===//
+
+// Checks whether the given op can be hoisted by checking that
+// - the op and any of its contained operations do not depend on SSA values
+//   defined inside of the loop (by means of calling definedOutside).
+// - the op has no side-effects. If sideEffecting is Never, sideeffects of this
+//   op and its nested ops are ignored.
+static bool canBeHoisted(Operation *op,
+                         function_ref definedOutside) {
+  // Check that dependencies are defined outside of loop.
+  if (!llvm::all_of(op->getOperands(), definedOutside))
+    return false;
+  // Check whether this op is side-effect free. If we already know that there
+  // can be no side-effects because the surrounding op has claimed so, we can
+  // (and have to) skip this step.
+  if (auto memInterface = dyn_cast(op)) {
+    if (!memInterface.hasNoEffect())
+      return false;
+    // If the operation doesn't have side effects and it doesn't recursively
+    // have side effects, it can always be hoisted.
+    if (!op->hasTrait())
+      return true;
+
+    // Otherwise, if the operation doesn't provide the memory effect interface
+    // and it doesn't have recursive side effects we treat it conservatively as
+    // side-effecting.
+  } else if (!op->hasTrait()) {
+    return false;
+  }
+
+  // Recurse into the regions for this op and check whether the contained ops
+  // can be hoisted.
+  for (auto ®ion : op->getRegions()) {
+    for (auto &block : region) {
+      for (auto &innerOp : block)
+        if (!canBeHoisted(&innerOp, definedOutside))
+          return false;
+    }
+  }
+  return true;
+}
+
+LogicalResult mlir::moveLoopInvariantCode(LoopLikeOpInterface looplike) {
+  auto &loopBody = looplike.getLoopBody();
+
+  // We use two collections here as we need to preserve the order for insertion
+  // and this is easiest.
+  SmallPtrSet willBeMovedSet;
+  SmallVector opsToMove;
+
+  // Helper to check whether an operation is loop invariant wrt. SSA properties.
+  auto isDefinedOutsideOfBody = [&](Value value) {
+    auto *definingOp = value.getDefiningOp();
+    return (definingOp && !!willBeMovedSet.count(definingOp)) ||
+           looplike.isDefinedOutsideOfLoop(value);
+  };
+
+  // Do not use walk here, as we do not want to go into nested regions and hoist
+  // operations from there. These regions might have semantics unknown to this
+  // rewriting. If the nested regions are loops, they will have been processed.
+  for (auto &block : loopBody) {
+    for (auto &op : block.without_terminator()) {
+      if (canBeHoisted(&op, isDefinedOutsideOfBody)) {
+        opsToMove.push_back(&op);
+        willBeMovedSet.insert(&op);
+      }
+    }
+  }
+
+  // For all instructions that we found to be invariant, move outside of the
+  // loop.
+  LogicalResult result = looplike.moveOutOfLoop(opsToMove);
+  LLVM_DEBUG(looplike.print(llvm::dbgs() << "\n\nModified loop:\n"));
+  return result;
+}
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 7826650f57471..6e59ef2d696ac 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -6,12 +6,8 @@ add_mlir_library(MLIRTransforms
   CSE.cpp
   Inliner.cpp
   LocationSnapshot.cpp
-  LoopCoalescing.cpp
-  LoopFusion.cpp
   LoopInvariantCodeMotion.cpp
   OpStats.cpp
-  ParallelLoopCollapsing.cpp
-  PipelineDataTransfer.cpp
   SCCP.cpp
   StripDebugInfo.cpp
   SymbolDCE.cpp
@@ -21,18 +17,13 @@ add_mlir_library(MLIRTransforms
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
 
   DEPENDS
-  MLIRStandardOpsIncGen
   MLIRTransformsPassIncGen
 
   LINK_LIBS PUBLIC
-  MLIRAffine
   MLIRAnalysis
   MLIRCopyOpInterface
   MLIRLoopLikeInterface
-  MLIRMemRef
-  MLIRSCF
   MLIRPass
   MLIRSupport
   MLIRTransformUtils
-  MLIRVector
   )
diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp
index bc265a4a233ee..235abda14f0e8 100644
--- a/mlir/lib/Transforms/CSE.cpp
+++ b/mlir/lib/Transforms/CSE.cpp
@@ -15,7 +15,6 @@
 #include "mlir/IR/Dominance.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/ScopedHashTable.h"
diff --git a/mlir/lib/Transforms/ControlFlowSink.cpp b/mlir/lib/Transforms/ControlFlowSink.cpp
index 10d41b0f0013f..6643158ac5de0 100644
--- a/mlir/lib/Transforms/ControlFlowSink.cpp
+++ b/mlir/lib/Transforms/ControlFlowSink.cpp
@@ -16,8 +16,8 @@
 #include "PassDetail.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/ControlFlowSinkUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 
diff --git a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
index 3c8e14aa66bbd..e4e3c16a14656 100644
--- a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
@@ -11,13 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
-#include "mlir/Transforms/Passes.h"
-
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -34,80 +31,6 @@ struct LoopInvariantCodeMotion
 };
 } // namespace
 
-// Checks whether the given op can be hoisted by checking that
-// - the op and any of its contained operations do not depend on SSA values
-//   defined inside of the loop (by means of calling definedOutside).
-// - the op has no side-effects. If sideEffecting is Never, sideeffects of this
-//   op and its nested ops are ignored.
-static bool canBeHoisted(Operation *op,
-                         function_ref definedOutside) {
-  // Check that dependencies are defined outside of loop.
-  if (!llvm::all_of(op->getOperands(), definedOutside))
-    return false;
-  // Check whether this op is side-effect free. If we already know that there
-  // can be no side-effects because the surrounding op has claimed so, we can
-  // (and have to) skip this step.
-  if (auto memInterface = dyn_cast(op)) {
-    if (!memInterface.hasNoEffect())
-      return false;
-    // If the operation doesn't have side effects and it doesn't recursively
-    // have side effects, it can always be hoisted.
-    if (!op->hasTrait())
-      return true;
-
-    // Otherwise, if the operation doesn't provide the memory effect interface
-    // and it doesn't have recursive side effects we treat it conservatively as
-    // side-effecting.
-  } else if (!op->hasTrait()) {
-    return false;
-  }
-
-  // Recurse into the regions for this op and check whether the contained ops
-  // can be hoisted.
-  for (auto ®ion : op->getRegions()) {
-    for (auto &block : region) {
-      for (auto &innerOp : block)
-        if (!canBeHoisted(&innerOp, definedOutside))
-          return false;
-    }
-  }
-  return true;
-}
-
-LogicalResult mlir::moveLoopInvariantCode(LoopLikeOpInterface looplike) {
-  auto &loopBody = looplike.getLoopBody();
-
-  // We use two collections here as we need to preserve the order for insertion
-  // and this is easiest.
-  SmallPtrSet willBeMovedSet;
-  SmallVector opsToMove;
-
-  // Helper to check whether an operation is loop invariant wrt. SSA properties.
-  auto isDefinedOutsideOfBody = [&](Value value) {
-    auto *definingOp = value.getDefiningOp();
-    return (definingOp && !!willBeMovedSet.count(definingOp)) ||
-           looplike.isDefinedOutsideOfLoop(value);
-  };
-
-  // Do not use walk here, as we do not want to go into nested regions and hoist
-  // operations from there. These regions might have semantics unknown to this
-  // rewriting. If the nested regions are loops, they will have been processed.
-  for (auto &block : loopBody) {
-    for (auto &op : block.without_terminator()) {
-      if (canBeHoisted(&op, isDefinedOutsideOfBody)) {
-        opsToMove.push_back(&op);
-        willBeMovedSet.insert(&op);
-      }
-    }
-  }
-
-  // For all instructions that we found to be invariant, move outside of the
-  // loop.
-  auto result = looplike.moveOutOfLoop(opsToMove);
-  LLVM_DEBUG(looplike.print(llvm::dbgs() << "\n\nModified loop:\n"));
-  return result;
-}
-
 void LoopInvariantCodeMotion::runOnOperation() {
   // Walk through all loops in a function in innermost-loop-first order. This
   // way, we first LICM from the inner loop, and place the ops in
diff --git a/mlir/lib/Transforms/PassDetail.h b/mlir/lib/Transforms/PassDetail.h
index 4496c4223cbec..7e1fedc136c9e 100644
--- a/mlir/lib/Transforms/PassDetail.h
+++ b/mlir/lib/Transforms/PassDetail.h
@@ -13,23 +13,8 @@
 #include "mlir/Transforms/Passes.h"
 
 namespace mlir {
-class AffineDialect;
-
-// Forward declaration from Dialect.h
-template 
-void registerDialect(DialectRegistry ®istry);
-
-namespace arith {
-class ArithmeticDialect;
-} // namespace arith
-
-namespace memref {
-class MemRefDialect;
-} // namespace memref
-
 #define GEN_PASS_CLASSES
 #include "mlir/Transforms/Passes.h.inc"
-
 } // namespace mlir
 
 #endif // TRANSFORMS_PASSDETAIL_H_
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
index c42d45325e1b2..8f16426e11b1f 100644
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -4,25 +4,12 @@ add_mlir_library(MLIRTransformUtils
   FoldUtils.cpp
   GreedyPatternRewriteDriver.cpp
   InliningUtils.cpp
-  LoopFusionUtils.cpp
-  LoopUtils.cpp
   RegionUtils.cpp
-  Utils.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
 
-  DEPENDS
-  MLIRStandardOpsIncGen
-
   LINK_LIBS PUBLIC
-  MLIRAffine
-  MLIRArithmetic
   MLIRAnalysis
-  MLIRAffineAnalysis
-  MLIRMemRef
-  MLIRSCF
-  MLIRPass
   MLIRRewrite
-  MLIRStandard
   )
diff --git a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
index 868174b261c4a..0101f9026dc71 100644
--- a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
+++ b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
@@ -18,10 +18,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Transforms/ControlFlowSinkUtils.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
-#include "mlir/Transforms/Utils.h"
 #include 
 
 #define DEBUG_TYPE "cf-sink"
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index cbc8532606281..de15a23b906df 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -13,7 +13,6 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/FunctionInterfaces.h"
 #include "mlir/Rewrite/PatternApplicator.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
deleted file mode 100644
index b2936bbca9620..0000000000000
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ /dev/null
@@ -1,767 +0,0 @@
-//===- Utils.cpp ---- Misc utilities for code and data transformation -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements miscellaneous transformation routines for non-loop IR
-// structures.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/Utils.h"
-#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
-#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
-#include "mlir/Dialect/Affine/Analysis/Utils.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Dominance.h"
-#include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/TypeSwitch.h"
-
-#define DEBUG_TYPE "transforms-utils"
-
-using namespace mlir;
-
-// Perform the replacement in `op`.
-LogicalResult mlir::replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
-                                             Operation *op,
-                                             ArrayRef extraIndices,
-                                             AffineMap indexRemap,
-                                             ArrayRef extraOperands,
-                                             ArrayRef symbolOperands,
-                                             bool allowNonDereferencingOps) {
-  unsigned newMemRefRank = newMemRef.getType().cast().getRank();
-  (void)newMemRefRank; // unused in opt mode
-  unsigned oldMemRefRank = oldMemRef.getType().cast().getRank();
-  (void)oldMemRefRank; // unused in opt mode
-  if (indexRemap) {
-    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
-           "symbolic operand count mismatch");
-    assert(indexRemap.getNumInputs() ==
-           extraOperands.size() + oldMemRefRank + symbolOperands.size());
-    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
-  } else {
-    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
-  }
-
-  // Assert same elemental type.
-  assert(oldMemRef.getType().cast().getElementType() ==
-         newMemRef.getType().cast().getElementType());
-
-  SmallVector usePositions;
-  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
-    if (opEntry.value() == oldMemRef)
-      usePositions.push_back(opEntry.index());
-  }
-
-  // If memref doesn't appear, nothing to do.
-  if (usePositions.empty())
-    return success();
-
-  if (usePositions.size() > 1) {
-    // TODO: extend it for this case when needed (rare).
-    assert(false && "multiple dereferencing uses in a single op not supported");
-    return failure();
-  }
-
-  unsigned memRefOperandPos = usePositions.front();
-
-  OpBuilder builder(op);
-  // The following checks if op is dereferencing memref and performs the access
-  // index rewrites.
-  auto affMapAccInterface = dyn_cast(op);
-  if (!affMapAccInterface) {
-    if (!allowNonDereferencingOps) {
-      // Failure: memref used in a non-dereferencing context (potentially
-      // escapes); no replacement in these cases unless allowNonDereferencingOps
-      // is set.
-      return failure();
-    }
-    op->setOperand(memRefOperandPos, newMemRef);
-    return success();
-  }
-  // Perform index rewrites for the dereferencing op and then replace the op
-  NamedAttribute oldMapAttrPair =
-      affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
-  AffineMap oldMap = oldMapAttrPair.getValue().cast().getValue();
-  unsigned oldMapNumInputs = oldMap.getNumInputs();
-  SmallVector oldMapOperands(
-      op->operand_begin() + memRefOperandPos + 1,
-      op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
-
-  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
-  SmallVector oldMemRefOperands;
-  SmallVector affineApplyOps;
-  oldMemRefOperands.reserve(oldMemRefRank);
-  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
-    for (auto resultExpr : oldMap.getResults()) {
-      auto singleResMap = AffineMap::get(oldMap.getNumDims(),
-                                         oldMap.getNumSymbols(), resultExpr);
-      auto afOp = builder.create(op->getLoc(), singleResMap,
-                                                oldMapOperands);
-      oldMemRefOperands.push_back(afOp);
-      affineApplyOps.push_back(afOp);
-    }
-  } else {
-    oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
-  }
-
-  // Construct new indices as a remap of the old ones if a remapping has been
-  // provided. The indices of a memref come right after it, i.e.,
-  // at position memRefOperandPos + 1.
-  SmallVector remapOperands;
-  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
-                        symbolOperands.size());
-  remapOperands.append(extraOperands.begin(), extraOperands.end());
-  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
-  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
-
-  SmallVector remapOutputs;
-  remapOutputs.reserve(oldMemRefRank);
-
-  if (indexRemap &&
-      indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
-    // Remapped indices.
-    for (auto resultExpr : indexRemap.getResults()) {
-      auto singleResMap = AffineMap::get(
-          indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
-      auto afOp = builder.create(op->getLoc(), singleResMap,
-                                                remapOperands);
-      remapOutputs.push_back(afOp);
-      affineApplyOps.push_back(afOp);
-    }
-  } else {
-    // No remapping specified.
-    remapOutputs.assign(remapOperands.begin(), remapOperands.end());
-  }
-
-  SmallVector newMapOperands;
-  newMapOperands.reserve(newMemRefRank);
-
-  // Prepend 'extraIndices' in 'newMapOperands'.
-  for (Value extraIndex : extraIndices) {
-    assert(extraIndex.getDefiningOp()->getNumResults() == 1 &&
-           "single result op's expected to generate these indices");
-    assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
-           "invalid memory op index");
-    newMapOperands.push_back(extraIndex);
-  }
-
-  // Append 'remapOutputs' to 'newMapOperands'.
-  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
-
-  // Create new fully composed AffineMap for new op to be created.
-  assert(newMapOperands.size() == newMemRefRank);
-  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
-  // TODO: Avoid creating/deleting temporary AffineApplyOps here.
-  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
-  newMap = simplifyAffineMap(newMap);
-  canonicalizeMapAndOperands(&newMap, &newMapOperands);
-  // Remove any affine.apply's that became dead as a result of composition.
-  for (Value value : affineApplyOps)
-    if (value.use_empty())
-      value.getDefiningOp()->erase();
-
-  OperationState state(op->getLoc(), op->getName());
-  // Construct the new operation using this memref.
-  state.operands.reserve(op->getNumOperands() + extraIndices.size());
-  // Insert the non-memref operands.
-  state.operands.append(op->operand_begin(),
-                        op->operand_begin() + memRefOperandPos);
-  // Insert the new memref value.
-  state.operands.push_back(newMemRef);
-
-  // Insert the new memref map operands.
-  state.operands.append(newMapOperands.begin(), newMapOperands.end());
-
-  // Insert the remaining operands unmodified.
-  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
-                            oldMapNumInputs,
-                        op->operand_end());
-
-  // Result types don't change. Both memref's are of the same elemental type.
-  state.types.reserve(op->getNumResults());
-  for (auto result : op->getResults())
-    state.types.push_back(result.getType());
-
-  // Add attribute for 'newMap', other Attributes do not change.
-  auto newMapAttr = AffineMapAttr::get(newMap);
-  for (auto namedAttr : op->getAttrs()) {
-    if (namedAttr.getName() == oldMapAttrPair.getName())
-      state.attributes.push_back({namedAttr.getName(), newMapAttr});
-    else
-      state.attributes.push_back(namedAttr);
-  }
-
-  // Create the new operation.
-  auto *repOp = builder.createOperation(state);
-  op->replaceAllUsesWith(repOp);
-  op->erase();
-
-  return success();
-}
-
-LogicalResult mlir::replaceAllMemRefUsesWith(
-    Value oldMemRef, Value newMemRef, ArrayRef extraIndices,
-    AffineMap indexRemap, ArrayRef extraOperands,
-    ArrayRef symbolOperands, Operation *domOpFilter,
-    Operation *postDomOpFilter, bool allowNonDereferencingOps,
-    bool replaceInDeallocOp) {
-  unsigned newMemRefRank = newMemRef.getType().cast().getRank();
-  (void)newMemRefRank; // unused in opt mode
-  unsigned oldMemRefRank = oldMemRef.getType().cast().getRank();
-  (void)oldMemRefRank;
-  if (indexRemap) {
-    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
-           "symbol operand count mismatch");
-    assert(indexRemap.getNumInputs() ==
-           extraOperands.size() + oldMemRefRank + symbolOperands.size());
-    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
-  } else {
-    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
-  }
-
-  // Assert same elemental type.
-  assert(oldMemRef.getType().cast().getElementType() ==
-         newMemRef.getType().cast().getElementType());
-
-  std::unique_ptr domInfo;
-  std::unique_ptr postDomInfo;
-  if (domOpFilter)
-    domInfo =
-        std::make_unique(domOpFilter->getParentOfType());
-
-  if (postDomOpFilter)
-    postDomInfo = std::make_unique(
-        postDomOpFilter->getParentOfType());
-
-  // Walk all uses of old memref; collect ops to perform replacement. We use a
-  // DenseSet since an operation could potentially have multiple uses of a
-  // memref (although rare), and the replacement later is going to erase ops.
-  DenseSet opsToReplace;
-  for (auto *op : oldMemRef.getUsers()) {
-    // Skip this use if it's not dominated by domOpFilter.
-    if (domOpFilter && !domInfo->dominates(domOpFilter, op))
-      continue;
-
-    // Skip this use if it's not post-dominated by postDomOpFilter.
-    if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
-      continue;
-
-    // Skip dealloc's - no replacement is necessary, and a memref replacement
-    // at other uses doesn't hurt these dealloc's.
-    if (isa(op) && !replaceInDeallocOp)
-      continue;
-
-    // Check if the memref was used in a non-dereferencing context. It is fine
-    // for the memref to be used in a non-dereferencing way outside of the
-    // region where this replacement is happening.
-    if (!isa(*op)) {
-      if (!allowNonDereferencingOps) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << "Memref replacement failed: non-deferencing memref op: \n"
-                   << *op << '\n');
-        return failure();
-      }
-      // Non-dereferencing ops with the MemRefsNormalizable trait are
-      // supported for replacement.
-      if (!op->hasTrait()) {
-        LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
-                                   "memrefs normalizable trait: \n"
-                                << *op << '\n');
-        return failure();
-      }
-    }
-
-    // We'll first collect and then replace --- since replacement erases the op
-    // that has the use, and that op could be postDomFilter or domFilter itself!
-    opsToReplace.insert(op);
-  }
-
-  for (auto *op : opsToReplace) {
-    if (failed(replaceAllMemRefUsesWith(
-            oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
-            symbolOperands, allowNonDereferencingOps)))
-      llvm_unreachable("memref replacement guaranteed to succeed here");
-  }
-
-  return success();
-}
-
-/// Given an operation, inserts one or more single result affine
-/// apply operations, results of which are exclusively used by this operation
-/// operation. The operands of these newly created affine apply ops are
-/// guaranteed to be loop iterators or terminal symbols of a function.
-///
-/// Before
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "send"(%idx, %A, ...)
-///   "compute"(%idx)
-///
-/// After
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "send"(%idx, %A, ...)
-///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "compute"(%idx_)
-///
-/// This allows applying different transformations on send and compute (for eg.
-/// different shifts/delays).
-///
-/// Returns nullptr either if none of opInst's operands were the result of an
-/// affine.apply and thus there was no affine computation slice to create, or if
-/// all the affine.apply op's supplying operands to this opInst did not have any
-/// uses besides this opInst; otherwise returns the list of affine.apply
-/// operations created in output argument `sliceOps`.
-void mlir::createAffineComputationSlice(
-    Operation *opInst, SmallVectorImpl *sliceOps) {
-  // Collect all operands that are results of affine apply ops.
-  SmallVector subOperands;
-  subOperands.reserve(opInst->getNumOperands());
-  for (auto operand : opInst->getOperands())
-    if (isa_and_nonnull(operand.getDefiningOp()))
-      subOperands.push_back(operand);
-
-  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
-  SmallVector affineApplyOps;
-  getReachableAffineApplyOps(subOperands, affineApplyOps);
-  // Skip transforming if there are no affine maps to compose.
-  if (affineApplyOps.empty())
-    return;
-
-  // Check if all uses of the affine apply op's lie only in this op op, in
-  // which case there would be nothing to do.
-  bool localized = true;
-  for (auto *op : affineApplyOps) {
-    for (auto result : op->getResults()) {
-      for (auto *user : result.getUsers()) {
-        if (user != opInst) {
-          localized = false;
-          break;
-        }
-      }
-    }
-  }
-  if (localized)
-    return;
-
-  OpBuilder builder(opInst);
-  SmallVector composedOpOperands(subOperands);
-  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
-  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
-
-  // Create an affine.apply for each of the map results.
-  sliceOps->reserve(composedMap.getNumResults());
-  for (auto resultExpr : composedMap.getResults()) {
-    auto singleResMap = AffineMap::get(composedMap.getNumDims(),
-                                       composedMap.getNumSymbols(), resultExpr);
-    sliceOps->push_back(builder.create(
-        opInst->getLoc(), singleResMap, composedOpOperands));
-  }
-
-  // Construct the new operands that include the results from the composed
-  // affine apply op above instead of existing ones (subOperands). So, they
-  // differ from opInst's operands only for those operands in 'subOperands', for
-  // which they will be replaced by the corresponding one from 'sliceOps'.
-  SmallVector newOperands(opInst->getOperands());
-  for (unsigned i = 0, e = newOperands.size(); i < e; i++) {
-    // Replace the subOperands from among the new operands.
-    unsigned j, f;
-    for (j = 0, f = subOperands.size(); j < f; j++) {
-      if (newOperands[i] == subOperands[j])
-        break;
-    }
-    if (j < subOperands.size()) {
-      newOperands[i] = (*sliceOps)[j];
-    }
-  }
-  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) {
-    opInst->setOperand(idx, newOperands[idx]);
-  }
-}
-
-/// Enum to set patterns of affine expr in tiled-layout map.
-/// TileFloorDiv:  div 
-/// TileMod:  mod 
-/// TileNone: None of the above
-/// Example:
-/// #tiled_2d_128x256 = affine_map<(d0, d1)
-///            -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
-/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
-/// "d0 mod 128" and "d1 mod 256" ==> TileMod
-enum TileExprPattern { TileFloorDiv, TileMod, TileNone };
-
-/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
-/// being floordiv'ed by respective tile sizes appeare in a mod with the same
-/// tile sizes, and no other expression involves those k dimensions. This
-/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
-/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
-/// tiled layout, an empty vector is returned.
-static LogicalResult getTileSizePos(
-    AffineMap map,
-    SmallVectorImpl> &tileSizePos) {
-  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
-  // `floordiv` and its position in `map` output.
-  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
-  //                -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
-  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
-  SmallVector, 4> floordivExprs;
-  unsigned pos = 0;
-  for (AffineExpr expr : map.getResults()) {
-    if (expr.getKind() == AffineExprKind::FloorDiv) {
-      AffineBinaryOpExpr binaryExpr = expr.cast();
-      if (binaryExpr.getRHS().isa())
-        floordivExprs.emplace_back(
-            std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
-    }
-    pos++;
-  }
-  // Not tiled layout if `floordivExprs` is empty.
-  if (floordivExprs.empty()) {
-    tileSizePos = SmallVector>{};
-    return success();
-  }
-
-  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
-  // not tiled layout.
-  for (std::tuple fexpr : floordivExprs) {
-    AffineExpr floordivExprLHS = std::get<0>(fexpr);
-    AffineExpr floordivExprRHS = std::get<1>(fexpr);
-    unsigned floordivPos = std::get<2>(fexpr);
-
-    // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
-    // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
-    // other expr, the map is not tiled layout. Example of non tiled layout:
-    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
-    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
-    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
-    //   256)>
-    bool found = false;
-    pos = 0;
-    for (AffineExpr expr : map.getResults()) {
-      bool notTiled = false;
-      if (pos != floordivPos) {
-        expr.walk([&](AffineExpr e) {
-          if (e == floordivExprLHS) {
-            if (expr.getKind() == AffineExprKind::Mod) {
-              AffineBinaryOpExpr binaryExpr = expr.cast();
-              // If LHS and RHS of `mod` are the same with those of floordiv.
-              if (floordivExprLHS == binaryExpr.getLHS() &&
-                  floordivExprRHS == binaryExpr.getRHS()) {
-                // Save tile size (RHS of `mod`), and position of `floordiv` and
-                // `mod` if same expr with `mod` is not found yet.
-                if (!found) {
-                  tileSizePos.emplace_back(
-                      std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
-                  found = true;
-                } else {
-                  // Non tiled layout: Have multilpe `mod` with the same LHS.
-                  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
-                  // mod 256, d2 mod 256)>
-                  notTiled = true;
-                }
-              } else {
-                // Non tiled layout: RHS of `mod` is different from `floordiv`.
-                // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
-                // mod 128)>
-                notTiled = true;
-              }
-            } else {
-              // Non tiled layout: LHS is the same, but not `mod`.
-              // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
-              // floordiv 256)>
-              notTiled = true;
-            }
-          }
-        });
-      }
-      if (notTiled) {
-        tileSizePos = SmallVector>{};
-        return success();
-      }
-      pos++;
-    }
-  }
-  return success();
-}
-
-/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
-/// after normalization. Dimensions that include dynamic dimensions in the map
-/// output will become dynamic dimensions. Return true if `dim` is dynamic
-/// dimension.
-///
-/// Example:
-/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
-///
-/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
-/// memref<4x?xf32, #map0>  ==>  memref<4x?x?xf32>
-static bool
-isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
-                             SmallVectorImpl &inMemrefTypeDynDims,
-                             MLIRContext *context) {
-  bool isDynamicDim = false;
-  AffineExpr expr = layoutMap.getResults()[dim];
-  // Check if affine expr of the dimension includes dynamic dimension of input
-  // memrefType.
-  expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) {
-    if (e.isa()) {
-      for (unsigned dm : inMemrefTypeDynDims) {
-        if (e == getAffineDimExpr(dm, context)) {
-          isDynamicDim = true;
-        }
-      }
-    }
-  });
-  return isDynamicDim;
-}
-
-/// Create affine expr to calculate dimension size for a tiled-layout map.
-static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput,
-                                                  TileExprPattern pat) {
-  // Create map output for the patterns.
-  // "floordiv " ==> "ceildiv "
-  // "mod " ==> ""
-  AffineExpr newMapOutput;
-  AffineBinaryOpExpr binaryExpr = nullptr;
-  switch (pat) {
-  case TileExprPattern::TileMod:
-    binaryExpr = oldMapOutput.cast();
-    newMapOutput = binaryExpr.getRHS();
-    break;
-  case TileExprPattern::TileFloorDiv:
-    binaryExpr = oldMapOutput.cast();
-    newMapOutput = getAffineBinaryOpExpr(
-        AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
-    break;
-  default:
-    newMapOutput = oldMapOutput;
-  }
-  return newMapOutput;
-}
-
-/// Create new maps to calculate each dimension size of `newMemRefType`, and
-/// create `newDynamicSizes` from them by using AffineApplyOp.
-///
-/// Steps for normalizing dynamic memrefs for a tiled layout map
-/// Example:
-///    #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
-///    %0 = dim %arg0, %c1 :memref<4x?xf32>
-///    %1 = alloc(%0) : memref<4x?xf32, #map0>
-///
-/// (Before this function)
-/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
-/// single layout map is supported.
-///
-/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
-/// is memref<4x?x?xf32> in the above example.
-///
-/// (In this function)
-/// 3. Create new maps to calculate each dimension of the normalized memrefType
-/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
-/// dimension size can be calculated by replacing "floordiv " with
-/// "ceildiv " and "mod " with "".
-/// - New map in the above example
-///   #map0 = affine_map<(d0, d1) -> (d0)>
-///   #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
-///   #map2 = affine_map<(d0, d1) -> (32)>
-///
-/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
-/// is used in dynamicSizes of new AllocOp.
-///   %0 = dim %arg0, %c1 : memref<4x?xf32>
-///   %c4 = arith.constant 4 : index
-///   %1 = affine.apply #map1(%c4, %0)
-///   %2 = affine.apply #map2(%c4, %0)
-static void createNewDynamicSizes(MemRefType oldMemRefType,
-                                  MemRefType newMemRefType, AffineMap map,
-                                  memref::AllocOp *allocOp, OpBuilder b,
-                                  SmallVectorImpl &newDynamicSizes) {
-  // Create new input for AffineApplyOp.
-  SmallVector inAffineApply;
-  ArrayRef oldMemRefShape = oldMemRefType.getShape();
-  unsigned dynIdx = 0;
-  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
-    if (oldMemRefShape[d] < 0) {
-      // Use dynamicSizes of allocOp for dynamic dimension.
-      inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]);
-      dynIdx++;
-    } else {
-      // Create ConstantOp for static dimension.
-      Attribute constantAttr =
-          b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
-      inAffineApply.emplace_back(
-          b.create(allocOp->getLoc(), constantAttr));
-    }
-  }
-
-  // Create new map to calculate each dimension size of new memref for each
-  // original map output. Only for dynamic dimesion of `newMemRefType`.
-  unsigned newDimIdx = 0;
-  ArrayRef newMemRefShape = newMemRefType.getShape();
-  SmallVector> tileSizePos;
-  (void)getTileSizePos(map, tileSizePos);
-  for (AffineExpr expr : map.getResults()) {
-    if (newMemRefShape[newDimIdx] < 0) {
-      // Create new maps to calculate each dimension size of new memref.
-      enum TileExprPattern pat = TileExprPattern::TileNone;
-      for (auto pos : tileSizePos) {
-        if (newDimIdx == std::get<1>(pos))
-          pat = TileExprPattern::TileFloorDiv;
-        else if (newDimIdx == std::get<2>(pos))
-          pat = TileExprPattern::TileMod;
-      }
-      AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
-      AffineMap newMap =
-          AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
-      Value affineApp =
-          b.create(allocOp->getLoc(), newMap, inAffineApply);
-      newDynamicSizes.emplace_back(affineApp);
-    }
-    newDimIdx++;
-  }
-}
-
-// TODO: Currently works for static memrefs with a single layout map.
-LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) {
-  MemRefType memrefType = allocOp->getType();
-  OpBuilder b(*allocOp);
-
-  // Fetch a new memref type after normalizing the old memref to have an
-  // identity map layout.
-  MemRefType newMemRefType =
-      normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size());
-  if (newMemRefType == memrefType)
-    // Either memrefType already had an identity map or the map couldn't be
-    // transformed to an identity map.
-    return failure();
-
-  Value oldMemRef = allocOp->getResult();
-
-  SmallVector symbolOperands(allocOp->symbolOperands());
-  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
-  memref::AllocOp newAlloc;
-  // Check if `layoutMap` is a tiled layout. Only single layout map is
-  // supported for normalizing dynamic memrefs.
-  SmallVector> tileSizePos;
-  (void)getTileSizePos(layoutMap, tileSizePos);
-  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
-    MemRefType oldMemRefType = oldMemRef.getType().cast();
-    SmallVector newDynamicSizes;
-    createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
-                          newDynamicSizes);
-    // Add the new dynamic sizes in new AllocOp.
-    newAlloc =
-        b.create(allocOp->getLoc(), newMemRefType,
-                                  newDynamicSizes, allocOp->alignmentAttr());
-  } else {
-    newAlloc = b.create(allocOp->getLoc(), newMemRefType,
-                                         allocOp->alignmentAttr());
-  }
-  // Replace all uses of the old memref.
-  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
-                                      /*extraIndices=*/{},
-                                      /*indexRemap=*/layoutMap,
-                                      /*extraOperands=*/{},
-                                      /*symbolOperands=*/symbolOperands,
-                                      /*domOpFilter=*/nullptr,
-                                      /*postDomOpFilter=*/nullptr,
-                                      /*allowNonDereferencingOps=*/true))) {
-    // If it failed (due to escapes for example), bail out.
-    newAlloc.erase();
-    return failure();
-  }
-  // Replace any uses of the original alloc op and erase it. All remaining uses
-  // have to be dealloc's; RAMUW above would've failed otherwise.
-  assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) {
-    return isa(op);
-  }));
-  oldMemRef.replaceAllUsesWith(newAlloc);
-  allocOp->erase();
-  return success();
-}
-
-MemRefType mlir::normalizeMemRefType(MemRefType memrefType, OpBuilder b,
-                                     unsigned numSymbolicOperands) {
-  unsigned rank = memrefType.getRank();
-  if (rank == 0)
-    return memrefType;
-
-  if (memrefType.getLayout().isIdentity()) {
-    // Either no maps is associated with this memref or this memref has
-    // a trivial (identity) map.
-    return memrefType;
-  }
-  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
-
-  // We don't do any checks for one-to-one'ness; we assume that it is
-  // one-to-one.
-
-  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
-  // for now.
-  // TODO: Normalize the other types of dynamic memrefs.
-  SmallVector> tileSizePos;
-  (void)getTileSizePos(layoutMap, tileSizePos);
-  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
-    return memrefType;
-
-  // We have a single map that is not an identity map. Create a new memref
-  // with the right shape and an identity layout map.
-  ArrayRef shape = memrefType.getShape();
-  // FlatAffineConstraint may later on use symbolicOperands.
-  FlatAffineConstraints fac(rank, numSymbolicOperands);
-  SmallVector memrefTypeDynDims;
-  for (unsigned d = 0; d < rank; ++d) {
-    // Use constraint system only in static dimensions.
-    if (shape[d] > 0) {
-      fac.addBound(FlatAffineConstraints::LB, d, 0);
-      fac.addBound(FlatAffineConstraints::UB, d, shape[d] - 1);
-    } else {
-      memrefTypeDynDims.emplace_back(d);
-    }
-  }
-  // We compose this map with the original index (logical) space to derive
-  // the upper bounds for the new index space.
-  unsigned newRank = layoutMap.getNumResults();
-  if (failed(fac.composeMatchingMap(layoutMap)))
-    return memrefType;
-  // TODO: Handle semi-affine maps.
-  // Project out the old data dimensions.
-  fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds());
-  SmallVector newShape(newRank);
-  for (unsigned d = 0; d < newRank; ++d) {
-    // Check if each dimension of normalized memrefType is dynamic.
-    bool isDynDim = isNormalizedMemRefDynamicDim(
-        d, layoutMap, memrefTypeDynDims, b.getContext());
-    if (isDynDim) {
-      newShape[d] = -1;
-    } else {
-      // The lower bound for the shape is always zero.
-      auto ubConst = fac.getConstantBound(FlatAffineConstraints::UB, d);
-      // For a static memref and an affine map with no symbols, this is
-      // always bounded.
-      assert(ubConst.hasValue() && "should always have an upper bound");
-      if (ubConst.getValue() < 0)
-        // This is due to an invalid map that maps to a negative space.
-        return memrefType;
-      // If dimension of new memrefType is dynamic, the value is -1.
-      newShape[d] = ubConst.getValue() + 1;
-    }
-  }
-
-  // Create the new memref type after trivializing the old layout map.
-  MemRefType newMemRefType =
-      MemRefType::Builder(memrefType)
-          .setShape(newShape)
-          .setLayout(AffineMapAttr::get(b.getMultiDimIdentityMap(newRank)));
-
-  return newMemRefType;
-}
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
index bad8e06ef35df..6ac033edb7f3f 100644
--- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -3,6 +3,8 @@ add_mlir_library(MLIRAffineTransformsTestPasses
   TestAffineDataCopy.cpp
   TestAffineLoopUnswitching.cpp
   TestAffineLoopParametricTiling.cpp
+  TestLoopFusion.cpp
+  TestLoopMapping.cpp
   TestLoopPermutation.cpp
   TestVectorizationUtils.cpp
 
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index 400af2a66ba7c..b7490935b1294 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -13,10 +13,10 @@
 
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 
 #define PASS_NAME "test-affine-data-copy"
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
index d215a3a792dd6..7096e11d1ef65 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
-#include "mlir/Transforms/LoopUtils.h"
 
 using namespace mlir;
 
diff --git a/mlir/test/lib/Transforms/TestLoopFusion.cpp b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
similarity index 98%
rename from mlir/test/lib/Transforms/TestLoopFusion.cpp
rename to mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
index 70fcdeb9f5517..592f41ee547ce 100644
--- a/mlir/test/lib/Transforms/TestLoopFusion.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
@@ -12,11 +12,10 @@
 
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopFusionUtils.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 #define DEBUG_TYPE "test-loop-fusion"
 
diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp
similarity index 96%
rename from mlir/test/lib/Transforms/TestLoopMapping.cpp
rename to mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp
index 09ea69d68dcbe..78c2fb50d5745 100644
--- a/mlir/test/lib/Transforms/TestLoopMapping.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp
@@ -12,11 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 #include "llvm/ADT/SetVector.h"
 
diff --git a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp
index 1db0981485e84..ee8d371d33693 100644
--- a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp
@@ -12,9 +12,8 @@
 
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 #define PASS_NAME "test-loop-permutation"
 
diff --git a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
index 958d3621a054a..9174d67c527d4 100644
--- a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Dialect/Vector/VectorUtils.h"
@@ -21,7 +22,6 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Diagnostics.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 
 #include "llvm/ADT/STLExtras.h"
diff --git a/mlir/test/lib/Dialect/SCF/CMakeLists.txt b/mlir/test/lib/Dialect/SCF/CMakeLists.txt
index 31b04b1f3a98a..f2cd9db3d3fec 100644
--- a/mlir/test/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/SCF/CMakeLists.txt
@@ -1,5 +1,7 @@
 # Exclude tests from libMLIR.so
 add_mlir_library(MLIRSCFTestPasses
+  TestLoopParametricTiling.cpp
+  TestLoopUnrolling.cpp
   TestSCFUtils.cpp
 
   EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp b/mlir/test/lib/Dialect/SCF/TestLoopParametricTiling.cpp
similarity index 93%
rename from mlir/test/lib/Transforms/TestLoopParametricTiling.cpp
rename to mlir/test/lib/Dialect/SCF/TestLoopParametricTiling.cpp
index 184a7331bc559..9a6bbad3bc521 100644
--- a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp
+++ b/mlir/test/lib/Dialect/SCF/TestLoopParametricTiling.cpp
@@ -11,10 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
 
@@ -31,8 +30,7 @@ class SimpleParametricLoopTilingPass
   }
   StringRef getDescription() const final {
     return "test application of parametric tiling to the outer loops so that "
-           "the "
-           "ranges of outer loops become static";
+           "the ranges of outer loops become static";
   }
   SimpleParametricLoopTilingPass() = default;
   SimpleParametricLoopTilingPass(const SimpleParametricLoopTilingPass &) {}
diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp
similarity index 97%
rename from mlir/test/lib/Transforms/TestLoopUnrolling.cpp
rename to mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp
index 1063853e434ba..5749bd99074c2 100644
--- a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp
+++ b/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp
@@ -12,11 +12,10 @@
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
 
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 8d5dc53ad8d13..b8e65bf9a37c0 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -2,10 +2,6 @@
 add_mlir_library(MLIRTestTransforms
   TestConstantFold.cpp
   TestInlining.cpp
-  TestLoopFusion.cpp
-  TestLoopMapping.cpp
-  TestLoopParametricTiling.cpp
-  TestLoopUnrolling.cpp
 
   EXCLUDE_FROM_LIBMLIR
 
diff --git a/mlir/test/lib/Transforms/TestConstantFold.cpp b/mlir/test/lib/Transforms/TestConstantFold.cpp
index f0ea5fa888265..23daca4f26ad9 100644
--- a/mlir/test/lib/Transforms/TestConstantFold.cpp
+++ b/mlir/test/lib/Transforms/TestConstantFold.cpp
@@ -9,7 +9,6 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/FoldUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 
diff --git a/mlir/unittests/Transforms/CMakeLists.txt b/mlir/unittests/Transforms/CMakeLists.txt
index b78f3cd8cf223..3b08c8ecffbe0 100644
--- a/mlir/unittests/Transforms/CMakeLists.txt
+++ b/mlir/unittests/Transforms/CMakeLists.txt
@@ -4,4 +4,5 @@ add_mlir_unittest(MLIRTransformsTests
 )
 target_link_libraries(MLIRTransformsTests
   PRIVATE
+  MLIRParser
   MLIRTransforms)

From 88c1df64bdd379b4fb5a7946124c7f52a9c795da Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Fri, 21 Jan 2022 12:35:08 -0800
Subject: [PATCH 492/946] [mlir:ArmSVE][NFC] Remove dead code and unnecessary
 dependencies

Differential Revision: https://reviews.llvm.org/D117981
---
 mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td    |  1 -
 mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp  | 35 ++++++++++---------
 mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt     |  1 -
 .../Transforms/LegalizeForLLVMExport.cpp      | 14 --------
 4 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td b/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td
index 5ffcf740496d1..19dcfabe972ba 100644
--- a/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td
+++ b/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td
@@ -15,7 +15,6 @@
 
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
-include "mlir/Dialect/Arithmetic/IR/ArithmeticBase.td"
 
 //===----------------------------------------------------------------------===//
 // ArmSVE dialect definition
diff --git a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp
index b3c79040c48da..1ea2fad56272a 100644
--- a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp
+++ b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/OpImplementation.h"
@@ -20,11 +19,26 @@
 #include "llvm/ADT/TypeSwitch.h"
 
 using namespace mlir;
-using namespace arm_sve;
+using namespace mlir::arm_sve;
 
-#include "mlir/Dialect/ArmSVE/ArmSVEDialect.cpp.inc"
+//===----------------------------------------------------------------------===//
+// ScalableVector versions of general helpers for comparison ops
+//===----------------------------------------------------------------------===//
+
+/// Return the scalable vector of the same shape and containing i1.
+static Type getI1SameShape(Type type) {
+  auto i1Type = IntegerType::get(type.getContext(), 1);
+  if (auto sVectorType = type.dyn_cast())
+    return VectorType::get(sVectorType.getShape(), i1Type,
+                           sVectorType.getNumScalableDims());
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Tablegen Definitions
+//===----------------------------------------------------------------------===//
 
-static Type getI1SameShape(Type type);
+#include "mlir/Dialect/ArmSVE/ArmSVEDialect.cpp.inc"
 
 #define GET_OP_CLASSES
 #include "mlir/Dialect/ArmSVE/ArmSVE.cpp.inc"
@@ -38,16 +52,3 @@ void ArmSVEDialect::initialize() {
 #include "mlir/Dialect/ArmSVE/ArmSVE.cpp.inc"
       >();
 }
-
-//===----------------------------------------------------------------------===//
-// ScalableVector versions of general helpers for comparison ops
-//===----------------------------------------------------------------------===//
-
-// Return the scalable vector of the same shape and containing i1.
-static Type getI1SameShape(Type type) {
-  auto i1Type = IntegerType::get(type.getContext(), 1);
-  if (auto sVectorType = type.dyn_cast())
-    return VectorType::get(sVectorType.getShape(), i1Type,
-                           sVectorType.getNumScalableDims());
-  return nullptr;
-}
diff --git a/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt b/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt
index 4a2393e7ac3d9..9177b5889b948 100644
--- a/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt
@@ -10,6 +10,5 @@ add_mlir_dialect_library(MLIRArmSVE
   LINK_LIBS PUBLIC
   MLIRIR
   MLIRLLVMIR
-  MLIRStandard
   MLIRSideEffectInterfaces
   )
diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
index fdda398fa01ac..95bc3e6b29599 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
@@ -11,7 +11,6 @@
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/ArmSVE/Transforms.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/PatternMatch.h"
 
@@ -34,19 +33,6 @@ class ForwardOperands : public OpConversionPattern {
   }
 };
 
-class ReturnOpTypeConversion : public OpConversionPattern {
-public:
-  using OpConversionPattern::OpConversionPattern;
-
-  LogicalResult
-  matchAndRewrite(ReturnOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const final {
-    rewriter.updateRootInPlace(
-        op, [&]() { op->setOperands(adaptor.getOperands()); });
-    return success();
-  }
-};
-
 using SdotOpLowering = OneToOneConvertToLLVMPattern;
 using SmmlaOpLowering = OneToOneConvertToLLVMPattern;
 using UdotOpLowering = OneToOneConvertToLLVMPattern;

From 65e7cd13bbc6c16804614235aaeaed2e2ed94358 Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Mon, 24 Jan 2022 11:41:00 -0800
Subject: [PATCH 493/946] [mlir] Remove a bunch of unnecessary dialect
 dependencies

A lot of dialects have dependencies that are unnecessary, either because of copy/paste
of files when creating things or some other means. This commit cleans up a bunch of
the simple ones:

* Copy/Paste or missed during refactoring
Most of the dependencies cleaned up here look like copy/paste errors when creating
new dialects/transformations, or because the dependency wasn't removed during a
refactoring (e.g. when splitting the standard dialect).

* Unnecessary hard coding of constant operations in matchers
There are a few instances where a dialect had a dependency because it
was hardcoding checks for constant operations instead of using the better m_Constant
approach.

Differential Revision: https://reviews.llvm.org/D118062
---
 mlir/include/mlir/Dialect/GPU/GPUOps.td        |  1 -
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp         |  1 -
 .../GPU/Transforms/AsyncRegionRewriter.cpp     |  1 -
 .../Dialect/GPU/Transforms/MemoryPromotion.cpp |  1 -
 mlir/lib/Dialect/OpenACC/CMakeLists.txt        |  2 --
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp        | 18 ++++++++++--------
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp   |  1 -
 mlir/lib/Dialect/Quant/CMakeLists.txt          |  1 -
 .../Dialect/Quant/Transforms/ConvertConst.cpp  |  1 -
 mlir/lib/Dialect/SCF/CMakeLists.txt            |  1 -
 mlir/lib/Dialect/SCF/SCF.cpp                   |  3 +--
 mlir/lib/Dialect/Shape/IR/CMakeLists.txt       |  1 -
 mlir/lib/Dialect/Shape/IR/Shape.cpp            |  1 -
 .../Dialect/Shape/IR/ShapeCanonicalization.td  |  1 -
 .../lib/Dialect/SparseTensor/IR/CMakeLists.txt |  2 --
 .../SparseTensor/IR/SparseTensorDialect.cpp    | 15 ++++++++-------
 16 files changed, 19 insertions(+), 32 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 4e54bda5ab0eb..c76772b4470d3 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -15,7 +15,6 @@
 
 include "mlir/Dialect/DLTI/DLTIBase.td"
 include "mlir/Dialect/GPU/GPUBase.td"
-include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
 include "mlir/IR/EnumAttr.td"
 include "mlir/IR/FunctionInterfaces.td"
 include "mlir/IR/SymbolInterfaces.td"
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index e44832460c0e0..d195ba121c81b 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -15,7 +15,6 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
index 5d7ded743da59..7b89936682477 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
@@ -16,7 +16,6 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/GPU/Utils.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/PatternMatch.h"
diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
index 62c11f12b3f89..62304fbb2cfa9 100644
--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@@ -17,7 +17,6 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/Pass/Pass.h"
 
diff --git a/mlir/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/CMakeLists.txt
index ba1c2e9e82906..cc84ba20569c3 100644
--- a/mlir/lib/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/CMakeLists.txt
@@ -8,8 +8,6 @@ add_mlir_dialect_library(MLIROpenACC
   MLIROpenACCOpsIncGen
 
   LINK_LIBS PUBLIC
-  MLIRArithmetic
   MLIRIR
-  MLIRStandard
   )
 
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 31bde5ec9a486..cfd1fbdf341e9 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -7,12 +7,11 @@
 // =============================================================================
 
 #include "mlir/Dialect/OpenACC/OpenACC.h"
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/OpenACC/OpenACCOpsEnums.cpp.inc"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/TypeSwitch.h"
@@ -175,14 +174,17 @@ struct RemoveConstantIfCondition : public OpRewritePattern {
   LogicalResult matchAndRewrite(OpTy op,
                                 PatternRewriter &rewriter) const override {
     // Early return if there is no condition.
-    if (!op.ifCond())
+    Value ifCond = op.ifCond();
+    if (!ifCond)
       return success();
 
-    auto constOp = op.ifCond().template getDefiningOp();
-    if (constOp && constOp.getValue().template cast().getInt())
-      rewriter.updateRootInPlace(op, [&]() { op.ifCondMutable().erase(0); });
-    else if (constOp)
-      rewriter.eraseOp(op);
+    IntegerAttr constAttr;
+    if (matchPattern(ifCond, m_Constant(&constAttr))) {
+      if (constAttr.getInt())
+        rewriter.updateRootInPlace(op, [&]() { op.ifCondMutable().erase(0); });
+      else
+        rewriter.eraseOp(op);
+    }
 
     return success();
   }
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index a527954dfd8ec..f0c5cdc6eb63e 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/OpImplementation.h"
diff --git a/mlir/lib/Dialect/Quant/CMakeLists.txt b/mlir/lib/Dialect/Quant/CMakeLists.txt
index a122b4f626afc..5c447dcbc2e26 100644
--- a/mlir/lib/Dialect/Quant/CMakeLists.txt
+++ b/mlir/lib/Dialect/Quant/CMakeLists.txt
@@ -22,6 +22,5 @@ add_mlir_dialect_library(MLIRQuant
   MLIRPass
   MLIRSideEffectInterfaces
   MLIRSupport
-  MLIRStandard
   MLIRTransformUtils
   )
diff --git a/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp b/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp
index 7b66dba229541..3dc9f9f8a5365 100644
--- a/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp
+++ b/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp
@@ -12,7 +12,6 @@
 #include "mlir/Dialect/Quant/QuantOps.h"
 #include "mlir/Dialect/Quant/QuantizeUtils.h"
 #include "mlir/Dialect/Quant/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
diff --git a/mlir/lib/Dialect/SCF/CMakeLists.txt b/mlir/lib/Dialect/SCF/CMakeLists.txt
index 2f7506c58c820..0af9b77709c16 100644
--- a/mlir/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/CMakeLists.txt
@@ -12,7 +12,6 @@ add_mlir_dialect_library(MLIRSCF
   MLIRBufferization
   MLIRIR
   MLIRLoopLikeInterface
-  MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRStandard
   )
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index 7276ed0cdb740..807b0b6df78d8 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -9,14 +9,13 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/InliningUtils.h"
+
 using namespace mlir;
 using namespace mlir::scf;
 
diff --git a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
index 8365d4808e2f0..e0cc5abe33438 100644
--- a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
@@ -19,6 +19,5 @@ add_mlir_dialect_library(MLIRShape
   MLIRInferTypeOpInterface
   MLIRIR
   MLIRSideEffectInterfaces
-  MLIRStandard
   MLIRTensor
   )
diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp
index 7e97846c33572..7352d850f338d 100644
--- a/mlir/lib/Dialect/Shape/IR/Shape.cpp
+++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/CommonFolders.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Traits.h"
 #include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td b/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td
index 0825f0f680979..947b28851fdf0 100644
--- a/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td
+++ b/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td
@@ -1,5 +1,4 @@
 include "mlir/Dialect/Shape/IR/ShapeOps.td"
-include "mlir/Dialect/StandardOps/IR/Ops.td"
 include "mlir/Dialect/Tensor/IR/TensorOps.td"
 
 def AllInputShapesEq : Constraint()) {
-    unsigned d = constantOp.getValue().cast().getInt();
+  IntegerAttr constantAttr;
+  if (matchPattern(dim, m_Constant(&constantAttr))) {
+    unsigned d = constantAttr.getInt();
     if (d >= tensor.getType().cast().getRank())
       return failure();
   }
@@ -227,11 +227,12 @@ static LogicalResult verify(InitOp op) {
   for (unsigned i = 0; i < rank; i++) {
     if (shape[i] == ShapedType::kDynamicSize)
       continue;
-    auto constantOp = op.sizes()[i].getDefiningOp();
-    if (!constantOp ||
-        constantOp.getValue().cast().getInt() != shape[i])
+    IntegerAttr constantAttr;
+    if (!matchPattern(op.sizes()[i], m_Constant(&constantAttr)) ||
+        constantAttr.getInt() != shape[i]) {
       return op.emitError("unexpected mismatch with static dimension size ")
              << shape[i];
+    }
   }
   return success();
 }

From 03e9ba274072a8921d78c678222fb0b43111125b Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha 
Date: Mon, 24 Jan 2022 16:56:02 -0800
Subject: [PATCH 494/946] [ObjCARC] Remove unused RetainRVDep dependency kind.
 NFC.

---
 llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 3 ---
 llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h   | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4921209f041b4..de0f5803b4c77 100644
--- a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -194,9 +194,6 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
       return CanInterruptRV(Class);
     }
   }
-
-  case RetainRVDep:
-    return CanInterruptRV(GetBasicARCInstKind(Inst));
   }
 
   llvm_unreachable("Invalid dependence flavor");
diff --git a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
index cf4c05ebe91ca..dd6a1c3f97958 100644
--- a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -46,8 +46,7 @@ enum DependenceKind {
   AutoreleasePoolBoundary,
   CanChangeRetainCount,
   RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
-  RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
-  RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
+  RetainAutoreleaseRVDep      ///< Blocks objc_retainAutoreleaseReturnValue.
 };
 
 /// Find dependent instructions. If there is exactly one dependent instruction,

From e7298464c5d004a119583cdb8a120dc3d968508d Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha 
Date: Mon, 24 Jan 2022 17:14:39 -0800
Subject: [PATCH 495/946] [ObjCARC] Use "UnsafeClaimRV" to refer to unsafeClaim
 in enums. NFC.

This matches the actual runtime function more closely.
I considered also renaming both RetainRV/UnsafeClaimRV to end with
"ARV", for AutoreleasedReturnValue, but there's less potential
for confusion there.
---
 llvm/include/llvm/Analysis/ObjCARCInstKind.h  |  2 +-
 llvm/include/llvm/Analysis/ObjCARCUtil.h      |  6 ++--
 llvm/lib/Analysis/ObjCARCInstKind.cpp         | 28 +++++++++----------
 llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp |  2 +-
 .../ObjCARC/ARCRuntimeEntryPoints.h           | 10 +++----
 .../Transforms/ObjCARC/ObjCARCContract.cpp    |  2 +-
 llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp   | 20 ++++++-------
 llvm/lib/Transforms/Utils/InlineFunction.cpp  |  4 +--
 8 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ObjCARCInstKind.h b/llvm/include/llvm/Analysis/ObjCARCInstKind.h
index 84565b9315c78..e332bcf88be79 100644
--- a/llvm/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/llvm/include/llvm/Analysis/ObjCARCInstKind.h
@@ -28,7 +28,7 @@ namespace objcarc {
 enum class ARCInstKind {
   Retain,                   ///< objc_retain
   RetainRV,                 ///< objc_retainAutoreleasedReturnValue
-  ClaimRV,                  ///< objc_unsafeClaimAutoreleasedReturnValue
+  UnsafeClaimRV,            ///< objc_unsafeClaimAutoreleasedReturnValue
   RetainBlock,              ///< objc_retainBlock
   Release,                  ///< objc_release
   Autorelease,              ///< objc_autorelease
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 362dd6c299925..1d330ca58a872 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -48,15 +48,15 @@ inline Optional getAttachedARCFunction(const CallBase *CB) {
   return cast(B->Inputs[0]);
 }
 
-/// Check whether the function is retainRV/claimRV.
+/// Check whether the function is retainRV/unsafeClaimRV.
 inline bool isRetainOrClaimRV(ARCInstKind Kind) {
-  return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::ClaimRV;
+  return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::UnsafeClaimRV;
 }
 
 /// This function returns the ARCInstKind of the function attached to operand
 /// bundle clang_arc_attachedcall. It returns None if the call doesn't have the
 /// operand bundle or the operand is null. Otherwise it returns either RetainRV
-/// or ClaimRV.
+/// or UnsafeClaimRV.
 inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) {
   Optional Fn = getAttachedARCFunction(CB);
   if (!Fn.hasValue())
diff --git a/llvm/lib/Analysis/ObjCARCInstKind.cpp b/llvm/lib/Analysis/ObjCARCInstKind.cpp
index f74a9f7f104fa..d177ee056a93a 100644
--- a/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -32,8 +32,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
     return OS << "ARCInstKind::Retain";
   case ARCInstKind::RetainRV:
     return OS << "ARCInstKind::RetainRV";
-  case ARCInstKind::ClaimRV:
-    return OS << "ARCInstKind::ClaimRV";
+  case ARCInstKind::UnsafeClaimRV:
+    return OS << "ARCInstKind::UnsafeClaimRV";
   case ARCInstKind::RetainBlock:
     return OS << "ARCInstKind::RetainBlock";
   case ARCInstKind::Release:
@@ -127,7 +127,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
   case Intrinsic::objc_clang_arc_use:
     return ARCInstKind::IntrinsicUser;
   case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
-    return ARCInstKind::ClaimRV;
+    return ARCInstKind::UnsafeClaimRV;
   case Intrinsic::objc_retainedObject:
     return ARCInstKind::NoopCast;
   case Intrinsic::objc_unretainedObject:
@@ -334,7 +334,7 @@ bool llvm::objcarc::IsUser(ARCInstKind Class) {
   case ARCInstKind::StoreStrong:
   case ARCInstKind::Call:
   case ARCInstKind::None:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
     return false;
   }
   llvm_unreachable("covered switch isn't covered?");
@@ -370,7 +370,7 @@ bool llvm::objcarc::IsRetain(ARCInstKind Class) {
   case ARCInstKind::Call:
   case ARCInstKind::User:
   case ARCInstKind::None:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
     return false;
   }
   llvm_unreachable("covered switch isn't covered?");
@@ -384,7 +384,7 @@ bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
     return true;
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::RetainBlock:
   case ARCInstKind::Release:
   case ARCInstKind::AutoreleasepoolPush:
@@ -416,7 +416,7 @@ bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
   case ARCInstKind::NoopCast:
@@ -451,7 +451,7 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
@@ -486,7 +486,7 @@ bool llvm::objcarc::IsNoopOnGlobal(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
@@ -522,7 +522,7 @@ bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::AutoreleaseRV:
     return true;
   case ARCInstKind::Release:
@@ -563,7 +563,7 @@ bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
     return true;
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::AutoreleaseRV:
   case ARCInstKind::Release:
   case ARCInstKind::RetainBlock:
@@ -598,7 +598,7 @@ bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
@@ -643,7 +643,7 @@ bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) {
     return true;
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::AutoreleasepoolPush:
   case ARCInstKind::RetainBlock:
@@ -696,7 +696,7 @@ bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) {
   case ARCInstKind::StoreStrong:
   case ARCInstKind::CallOrUser:
   case ARCInstKind::Call:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
     return true;
   }
 
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index e3eb3f825851d..74b903f99284d 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -97,7 +97,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
       objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
       (void)Kind;
       assert((Kind == objcarc::ARCInstKind::RetainRV ||
-              Kind == objcarc::ARCInstKind::ClaimRV) &&
+              Kind == objcarc::ARCInstKind::UnsafeClaimRV) &&
              "use expected to be the argument of operand bundle "
              "\"clang.arc.attachedcall\"");
       U.set(FCache.getCallee());
diff --git a/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 764dc5f927073..c11691c613ac7 100644
--- a/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -42,7 +42,7 @@ enum class ARCRuntimeEntryPointKind {
   Autorelease,
   StoreStrong,
   RetainRV,
-  ClaimRV,
+  UnsafeClaimRV,
   RetainAutorelease,
   RetainAutoreleaseRV,
 };
@@ -62,7 +62,7 @@ class ARCRuntimeEntryPoints {
     Autorelease = nullptr;
     StoreStrong = nullptr;
     RetainRV = nullptr;
-    ClaimRV = nullptr;
+    UnsafeClaimRV = nullptr;
     RetainAutorelease = nullptr;
     RetainAutoreleaseRV = nullptr;
   }
@@ -87,9 +87,9 @@ class ARCRuntimeEntryPoints {
     case ARCRuntimeEntryPointKind::RetainRV:
       return getIntrinsicEntryPoint(RetainRV,
                                 Intrinsic::objc_retainAutoreleasedReturnValue);
-    case ARCRuntimeEntryPointKind::ClaimRV:
+    case ARCRuntimeEntryPointKind::UnsafeClaimRV:
       return getIntrinsicEntryPoint(
-          ClaimRV, Intrinsic::objc_unsafeClaimAutoreleasedReturnValue);
+          UnsafeClaimRV, Intrinsic::objc_unsafeClaimAutoreleasedReturnValue);
     case ARCRuntimeEntryPointKind::RetainAutorelease:
       return getIntrinsicEntryPoint(RetainAutorelease,
                                     Intrinsic::objc_retainAutorelease);
@@ -127,7 +127,7 @@ class ARCRuntimeEntryPoints {
   Function *RetainRV = nullptr;
 
   /// Declaration for objc_unsafeClaimAutoreleasedReturnValue().
-  Function *ClaimRV = nullptr;
+  Function *UnsafeClaimRV = nullptr;
 
   /// Declaration for objc_retainAutorelease().
   Function *RetainAutorelease = nullptr;
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index c2ed94e8e1f62..9e2832827686a 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -433,7 +433,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
     // If we succeed in our optimization, fall through.
     LLVM_FALLTHROUGH;
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV: {
+  case ARCInstKind::UnsafeClaimRV: {
     bool IsInstContainedInBundle = BundledInsts->contains(Inst);
 
     // Return now if the target doesn't need a special inline-asm marker. Return
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 0fa4904456cdb..d07fa1d118e42 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -515,7 +515,7 @@ class ObjCARCOpt {
       Function &F, DenseMap &BlockColors,
       Instruction *Inst, ARCInstKind Class, const Value *Arg);
 
-  /// Try to optimize an AutoreleaseRV with a RetainRV or ClaimRV.  If the
+  /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV.  If the
   /// optimization occurs, returns true to indicate that the caller should
   /// assume the instructions are dead.
   bool OptimizeInlinedAutoreleaseRVCall(
@@ -705,14 +705,14 @@ bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall(
     return true;
   }
 
-  // ClaimRV is a frontend peephole for RetainRV + Release.  Since the
-  // AutoreleaseRV and RetainRV cancel out, replace the ClaimRV with a Release.
-  assert(Class == ARCInstKind::ClaimRV);
+  // UnsafeClaimRV is a frontend peephole for RetainRV + Release.  Since the
+  // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release.
+  assert(Class == ARCInstKind::UnsafeClaimRV);
   Value *CallArg = cast(Inst)->getArgOperand(0);
   CallInst *Release = CallInst::Create(
       EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", Inst);
-  assert(IsAlwaysTail(ARCInstKind::ClaimRV) &&
-         "Expected ClaimRV to be safe to tail call");
+  assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) &&
+         "Expected UnsafeClaimRV to be safe to tail call");
   Release->setTailCall();
   Inst->replaceAllUsesWith(CallArg);
   EraseInstruction(Inst);
@@ -810,7 +810,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     BlockColors = colorEHFunclets(F);
 
   // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired
-  // with RetainRV and ClaimRV.
+  // with RetainRV and UnsafeClaimRV.
   Instruction *DelayedAutoreleaseRV = nullptr;
   const Value *DelayedAutoreleaseRVArg = nullptr;
   auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) {
@@ -837,7 +837,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       return false;
 
     // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and
-    // ClaimRV, it's probably safe to skip over even opaque function calls
+    // UnsafeClaimRV, it's probably safe to skip over even opaque function calls
     // here since OptimizeInlinedAutoreleaseRVCall will confirm that they
     // have the same RCIdentityRoot.  However, what really matters is
     // skipping instructions or intrinsics that the inliner could leave behind;
@@ -881,7 +881,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       setDelayedAutoreleaseRV(Inst);
       continue;
     case ARCInstKind::RetainRV:
-    case ARCInstKind::ClaimRV:
+    case ARCInstKind::UnsafeClaimRV:
       if (DelayedAutoreleaseRV) {
         // We have a potential RV pair.  Check if they cancel out.
         if (OptimizeInlinedAutoreleaseRVCall(F, BlockColors, Inst, Arg, Class,
@@ -1165,7 +1165,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
       DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg,
                                      Inst->getParent(), Inst, PA);
       break;
-    case ARCInstKind::ClaimRV:
+    case ARCInstKind::UnsafeClaimRV:
     case ARCInstKind::RetainRV:
     case ARCInstKind::AutoreleaseRV:
       // Don't move these; the RV optimization depends on the autoreleaseRV
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index fcf001be953ae..c9f872f5b7e1b 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1667,7 +1667,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
   Module *Mod = CB.getModule();
   assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
   bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
-       IsClaimRV = !IsRetainRV;
+       IsUnsafeClaimRV = !IsRetainRV;
 
   for (auto *RI : Returns) {
     Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
@@ -1694,7 +1694,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
         //   and erase the autoreleaseRV call.
         // - If retainRV is attached to the call, just erase the autoreleaseRV
         //   call.
-        if (IsClaimRV) {
+        if (IsUnsafeClaimRV) {
           Builder.SetInsertPoint(II);
           Function *IFn =
               Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);

From 07be76f2ae19a3b656b01d9e630fccf944aec0b3 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu 
Date: Tue, 25 Jan 2022 13:07:29 +0800
Subject: [PATCH 496/946] [M68k][Disassembler][NFC] Re-organize test files

Put test cases of each instruction category into their own files. NFC.
---
 llvm/test/MC/Disassembler/M68k/arithmetic.txt | 10 +++++
 llvm/test/MC/Disassembler/M68k/bits.txt       |  4 ++
 llvm/test/MC/Disassembler/M68k/control.txt    | 14 +++++++
 llvm/test/MC/Disassembler/M68k/data.txt       |  6 +++
 .../MC/Disassembler/M68k/instructions.txt     | 38 -------------------
 .../MC/Disassembler/M68k/shift-rotate.txt     | 12 ++++++
 6 files changed, 46 insertions(+), 38 deletions(-)
 create mode 100644 llvm/test/MC/Disassembler/M68k/arithmetic.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/bits.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/control.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/data.txt
 delete mode 100644 llvm/test/MC/Disassembler/M68k/instructions.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/shift-rotate.txt

diff --git a/llvm/test/MC/Disassembler/M68k/arithmetic.txt b/llvm/test/MC/Disassembler/M68k/arithmetic.txt
new file mode 100644
index 0000000000000..670d4297dab87
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/arithmetic.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: adda.l %a0, %a1
+0xd3 0xc8
+# CHECK: sub.w %d3, %d1
+0x92 0x43
+# CHECK: cmp.w %d1, %d0
+0xb0 0x41
+# CHECK: neg.w %d0
+0x44 0x40
diff --git a/llvm/test/MC/Disassembler/M68k/bits.txt b/llvm/test/MC/Disassembler/M68k/bits.txt
new file mode 100644
index 0000000000000..c0a3001ffd265
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/bits.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: btst #0, %d3
+0x08 0x03 0x00 0x00
diff --git a/llvm/test/MC/Disassembler/M68k/control.txt b/llvm/test/MC/Disassembler/M68k/control.txt
new file mode 100644
index 0000000000000..8140e28ac7e6b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/control.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: bra $0
+0x60 0x00 0x00 0x00
+# CHECK: jsr $0
+0x4e 0xb9 0x00 0x00 0x00 0x00
+# CHECK: rts
+0x4e 0x75
+# CHECK: seq %d0
+0x57 0xc0
+# CHECK: sgt %d0
+0x5e 0xc0
+# CHECK: nop
+0x4e 0x71
diff --git a/llvm/test/MC/Disassembler/M68k/data.txt b/llvm/test/MC/Disassembler/M68k/data.txt
new file mode 100644
index 0000000000000..c29ca08c3a41c
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/data.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: move.l %a1, %a0
+0x20 0x49
+# CHECK: lea (50,%a0), %a1
+0x43 0xe8 0x00 0x32
diff --git a/llvm/test/MC/Disassembler/M68k/instructions.txt b/llvm/test/MC/Disassembler/M68k/instructions.txt
deleted file mode 100644
index 23316503f094b..0000000000000
--- a/llvm/test/MC/Disassembler/M68k/instructions.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
-
-# CHECK: move.l %a1, %a0
-0x20 0x49
-# CHECK: adda.l %a0, %a1
-0xd3 0xc8
-# CHECK: sub.w %d3, %d1
-0x92 0x43
-# CHECK: cmp.w %d1, %d0
-0xb0 0x41
-# CHECK: neg.w %d0
-0x44 0x40
-# CHECK: btst #0, %d3
-0x08 0x03 0x00 0x00
-# CHECK: bra $0
-0x60 0x00 0x00 0x00
-# CHECK: jsr $0
-0x4e 0xb9 0x00 0x00 0x00 0x00
-# CHECK: seq %d0
-0x57 0xc0
-# CHECK: sgt %d0
-0x5e 0xc0
-# CHECK: lea (50,%a0), %a1
-0x43 0xe8 0x00 0x32
-# CHECK: lsl.l #5, %d1
-0xeb 0x89
-# CHECK: lsr.l #5, %d1
-0xea 0x89
-# CHECK: asr.l #5, %d1
-0xea 0x81
-# CHECK: rol.l #5, %d1
-0xeb 0x99
-# CHECK: ror.l #5, %d1
-0xea 0x99
-# CHECK: nop
-0x4e 0x71
-# CHECK: rts
-0x4e 0x75
diff --git a/llvm/test/MC/Disassembler/M68k/shift-rotate.txt b/llvm/test/MC/Disassembler/M68k/shift-rotate.txt
new file mode 100644
index 0000000000000..37e5e2c800f37
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/shift-rotate.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: lsl.l #5, %d1
+0xeb 0x89
+# CHECK: lsr.l #5, %d1
+0xea 0x89
+# CHECK: asr.l #5, %d1
+0xea 0x81
+# CHECK: rol.l #5, %d1
+0xeb 0x99
+# CHECK: ror.l #5, %d1
+0xea 0x99

From 63b8018468420a1e848068385dcfaab04f730d54 Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Mon, 24 Jan 2022 21:09:19 -0800
Subject: [PATCH 497/946] [mlir:LoopLikeInterface] Add missing dependency on
 SideEffectInterfaces

This was missed when moveLoopInvariantCode was added.
---
 mlir/lib/Interfaces/CMakeLists.txt | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index a6f8e1118ddee..d1aae8e28273d 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -36,8 +36,21 @@ add_mlir_interface_library(CopyOpInterface)
 add_mlir_interface_library(DataLayoutInterfaces)
 add_mlir_interface_library(DerivedAttributeOpInterface)
 add_mlir_interface_library(InferTypeOpInterface)
-add_mlir_interface_library(LoopLikeInterface)
 add_mlir_interface_library(SideEffectInterfaces)
 add_mlir_interface_library(TilingInterface)
 add_mlir_interface_library(VectorInterfaces)
 add_mlir_interface_library(ViewLikeInterface)
+
+add_mlir_library(MLIRLoopLikeInterface
+  LoopLikeInterface.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
+
+  DEPENDS
+  MLIRLoopLikeInterfaceIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRSideEffectInterfaces
+  )

From e51a20e166b6c4c7e610fa323d8dcf1213ce88a3 Mon Sep 17 00:00:00 2001
From: Mehdi Amini 
Date: Tue, 25 Jan 2022 05:15:08 +0000
Subject: [PATCH 498/946] Fix python test to register all passes before using
 "normalize-memrefs"

The pass moved from mlir.transforms to the Memref dialect.
---
 mlir/test/python/pass_manager.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py
index 380dc7ca3fb28..e5a42573a0e22 100644
--- a/mlir/test/python/pass_manager.py
+++ b/mlir/test/python/pass_manager.py
@@ -71,6 +71,7 @@ def testParseFail():
 def testInvalidNesting():
   with Context():
     try:
+      import mlir.all_passes_registration
       pm = PassManager.parse("builtin.func(normalize-memrefs)")
     except ValueError as e:
       # CHECK: Can't add pass 'NormalizeMemRefs' restricted to 'builtin.module' on a PassManager intended to run on 'builtin.func', did you intend to nest?

From 61b81e0f49510918cb11d79b4636de17b014806b Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Mon, 24 Jan 2022 21:23:30 -0800
Subject: [PATCH 499/946] [mlir:MLIRAffineUtils] Add missing dependency on
 MLIRAffineAnalysis

This was missed in a70aa7bb0d9a6066831b339e0a09a2c1bc74fe2b.
---
 mlir/lib/Dialect/Affine/Utils/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index 4393e2971a031..96920e793daf3 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -8,6 +8,7 @@ add_mlir_dialect_library(MLIRAffineUtils
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
   MLIRAnalysis
   MLIRMemRef
   MLIRTransformUtils

From 71cb5ed03c9b564c5bf65c46ea0d89e886b2302f Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht 
Date: Mon, 24 Jan 2022 21:21:37 -0800
Subject: [PATCH 500/946] [bazel] Update MLIR deps

I believe this is due to D117839, D117848, maybe others.

Latest build failure: https://buildkite.com/llvm-project/upstream-bazel-rbe/builds/18053
---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index a3876c0b71f4f..436e7a79833ec 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -2186,7 +2186,11 @@ cc_library(
             "lib/Dialect/Affine/Utils/*.h",
         ],
     ),
-    hdrs = ["include/mlir/Dialect/Affine/Utils.h"],
+    hdrs = [
+        "include/mlir/Dialect/Affine/LoopFusionUtils.h",
+        "include/mlir/Dialect/Affine/LoopUtils.h",
+        "include/mlir/Dialect/Affine/Utils.h",
+    ],
     includes = ["include"],
     deps = [
         ":Affine",
@@ -2194,7 +2198,10 @@ cc_library(
         ":Analysis",
         ":IR",
         ":MemRefDialect",
+        ":SCFDialect",
+        ":Support",
         ":TransformUtils",
+        "//llvm:Support",
     ],
 )
 
@@ -2466,6 +2473,7 @@ cc_library(
     deps = [
         ":IR",
         ":LoopLikeInterfaceIncGen",
+        "//llvm:Support",
     ],
 )
 
@@ -3152,6 +3160,7 @@ cc_library(
         ":ArithmeticDialect",
         ":Async",
         ":DLTIDialect",
+        ":AffineUtils",
         ":GPUDialect",
         ":GPUPassIncGen",
         ":MemRefDialect",
@@ -6599,6 +6608,7 @@ cc_library(
     deps = [
         ":ArithmeticDialect",
         ":BufferizationDialect",
+        ":BufferizationTransforms",
         ":IR",
         ":MemRefDialect",
         ":Support",
@@ -7836,6 +7846,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":Affine",
+        ":AffineUtils",
         ":ArithmeticDialect",
         ":IR",
         ":InferTypeOpInterface",
@@ -8013,6 +8024,7 @@ cc_library(
         ":DialectUtils",
         ":IR",
         ":InferTypeOpInterface",
+        ":LoopLikeInterface",
         ":MemRefDialect",
         ":Pass",
         ":StandardOps",

From b827b6340bf821abf443eccf84756f571e2ee47e Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Mon, 24 Jan 2022 21:31:29 -0800
Subject: [PATCH 501/946] [mlir] Add missing dependencies after D118062

These used to be covered transitively, but now need to be explicit.
---
 mlir/lib/Conversion/AffineToStandard/CMakeLists.txt | 1 +
 mlir/lib/Dialect/Tosa/CMakeLists.txt                | 1 +
 2 files changed, 2 insertions(+)

diff --git a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
index aae55d7e235e6..5a5189241d9db 100644
--- a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
@@ -19,4 +19,5 @@ add_mlir_conversion_library(MLIRAffineToStandard
   MLIRStandard
   MLIRTransforms
   MLIRIR
+  MLIRVector
   )
diff --git a/mlir/lib/Dialect/Tosa/CMakeLists.txt b/mlir/lib/Dialect/Tosa/CMakeLists.txt
index 9a9c80f933aec..de3328fde0b7b 100644
--- a/mlir/lib/Dialect/Tosa/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tosa/CMakeLists.txt
@@ -20,6 +20,7 @@ add_mlir_dialect_library(MLIRTosa
   MLIRControlFlowInterfaces
   MLIRQuant
   MLIRSideEffectInterfaces
+  MLIRTensor
   MLIRViewLikeInterface
   )
 

From e697b971487d5288b18ae261b5840665510436c6 Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Mon, 24 Jan 2022 21:36:31 -0800
Subject: [PATCH 502/946] [mlir] Add more missing dependencies after D118062

These used to be covered transitively, but now need to be explicit.
---
 mlir/lib/Conversion/VectorToGPU/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt
index e9f2ff83a6e0d..e8ec3041c7beb 100644
--- a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt
@@ -13,4 +13,5 @@ add_mlir_conversion_library(MLIRVectorToGPU
   MLIRLLVMIR
   MLIRMemRef
   MLIRTransforms
+  MLIRVector
   )

From 8676e10f744ce3500b491af16bd96713e9ce2803 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula 
Date: Sun, 23 Jan 2022 07:39:01 +0530
Subject: [PATCH 503/946] [MLIR] Improve doc for -mlir-print-local-scope and
 unhide

This is a pretty important debugging option to stay hidden. Also,
improve its cmd-line description; the current description gives no hint
that this is the one to use to have locations printed inline.
Out-of-line locations are also unproductive to work with in many cases
where the locations are actually compact, which is also why this option
should be more visible.  This revision doesn't change the default on it
though.

Reviewed By: rriddle, jpienaar

Differential Revision: https://reviews.llvm.org/D117186
---
 mlir/lib/IR/AsmPrinter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index f69d147c51b38..992fd1f793923 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -143,8 +143,8 @@ struct AsmPrinterOptions {
 
   llvm::cl::opt printLocalScopeOpt{
       "mlir-print-local-scope", llvm::cl::init(false),
-      llvm::cl::desc("Print assuming in local scope by default"),
-      llvm::cl::Hidden};
+      llvm::cl::desc("Print with local scope and inline information (eliding "
+                     "aliases for attributes, types, and locations")};
 };
 } // namespace
 

From 633f5badbf1315e972e9abb141111648a8fd77fd Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Mon, 24 Jan 2022 22:11:53 -0800
Subject: [PATCH 504/946] [mlir] Add more missing dependencies after D118062

These used to be covered transitively, but now need to be explicit.
---
 mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt    | 1 +
 mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt   | 1 +
 mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt     | 2 ++
 mlir/lib/Conversion/MathToLibm/CMakeLists.txt          | 1 +
 mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt          | 1 +
 mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt      | 1 +
 mlir/lib/Conversion/VectorToSCF/CMakeLists.txt         | 1 +
 mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt      | 2 ++
 mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt      | 1 +
 mlir/lib/Dialect/GPU/CMakeLists.txt                    | 1 +
 mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt           | 2 ++
 mlir/lib/Dialect/Math/Transforms/CMakeLists.txt        | 1 +
 mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt         | 1 +
 mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt | 1 +
 14 files changed, 17 insertions(+)

diff --git a/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt
index 14cf225634c12..170b63b0243fe 100644
--- a/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRArithmeticToLLVM
   Core
 
   LINK_LIBS PUBLIC
+  MLIRArithmetic
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   )
diff --git a/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt
index e50d6320439e4..6c3c41b2b19f4 100644
--- a/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRArithmeticToSPIRV
   Core
 
   LINK_LIBS PUBLIC
+  MLIRArithmetic
   MLIRSPIRVConversion
   MLIRSPIRV
   )
diff --git a/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt b/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt
index 5c729c86373a3..438c659be4119 100644
--- a/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt
+++ b/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt
@@ -11,8 +11,10 @@ add_mlir_conversion_library(MLIRArmNeon2dToIntr
   Core
 
   LINK_LIBS PUBLIC
+  MLIRArithmetic
   MLIRArmNeon
   MLIRPass
   MLIRTransforms
   MLIRIR
+  MLIRVector
   )
diff --git a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
index e5a52d525eeae..0b196d7394c53 100644
--- a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
+++ b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
@@ -14,4 +14,5 @@ add_mlir_conversion_library(MLIRMathToLibm
   MLIRArithmetic
   MLIRMath
   MLIRStandardOpsTransforms
+  MLIRVector
   )
diff --git a/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt
index 3c1fc8aa9f4c9..615c6b2a6d759 100644
--- a/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt
@@ -16,6 +16,7 @@ add_mlir_conversion_library(MLIRSCFToSPIRV
   MLIRStandardToSPIRV
   MLIRIR
   MLIRPass
+  MLIRSCF
   MLIRStandard
   MLIRSupport
   MLIRTransforms
diff --git a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
index d6c8b7e1247ab..4a637648ca2c0 100644
--- a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
@@ -18,5 +18,6 @@ add_mlir_conversion_library(MLIRStandardToLLVM
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRMath
+  MLIRStandard
   MLIRTransforms
   )
diff --git a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
index 2b092978265ec..7bce61590a839 100644
--- a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
@@ -12,4 +12,5 @@ add_mlir_conversion_library(MLIRVectorToSCF
   MLIRLLVMIR
   MLIRMemRef
   MLIRTransforms
+  MLIRVector
   )
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index a99bb5789dac4..94456c2bb4054 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -23,11 +23,13 @@ add_mlir_dialect_library(MLIRAffineTransforms
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
   MLIRAffineUtils
   MLIRArithmetic
   MLIRIR
   MLIRMemRef
   MLIRPass
+  MLIRSCFTransforms
   MLIRSideEffectInterfaces
   MLIRStandard
   MLIRTransformUtils
diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
index 8b2fe73252ef0..80f6e305a188b 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
@@ -9,4 +9,5 @@ add_mlir_dialect_library(MLIRArmSVETransforms
   MLIRIR
   MLIRLLVMCommonConversion
   MLIRLLVMIR
+  MLIRStandard
   )
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 798213d2d95b0..6c14a3bcfdc49 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -66,6 +66,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
   MLIRParallelLoopMapperEnumsGen
 
   LINK_LIBS PUBLIC
+  MLIRAffineUtils
   MLIRArithmetic
   MLIRAsync
   MLIRDataLayoutInterfaces
diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
index 1231f378a306d..dacabad7ddf8c 100644
--- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
@@ -6,6 +6,8 @@ add_mlir_dialect_library(MLIRLinalgUtils
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
+  MLIRAffineUtils
   MLIRArithmetic
   MLIRIR
   MLIRLinalg
diff --git a/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt
index c2182562fc244..1e8b05a2a89fd 100644
--- a/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt
@@ -14,4 +14,5 @@ add_mlir_dialect_library(MLIRMathTransforms
   MLIRStandard
   MLIRTransforms
   MLIRX86Vector
+  MLIRVector
   )
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index 2c723625038bf..ef4dbb4ffc8e0 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -20,6 +20,7 @@ add_mlir_dialect_library(MLIRSCFTransforms
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
   MLIRArithmetic
   MLIRBufferizationTransforms
   MLIRIR
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
index d8d5714ab19ae..82e25923840dd 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
@@ -14,6 +14,7 @@ add_mlir_dialect_library(MLIRStandardOpsTransforms
   MLIRStandardTransformsIncGen
 
   LINK_LIBS PUBLIC
+  MLIRAffine
   MLIRArithmeticTransforms
   MLIRBufferizationTransforms
   MLIRIR

From c913dccfde69617e2357ac25532f25d2a81bca2c Mon Sep 17 00:00:00 2001
From: Max Kazantsev 
Date: Tue, 25 Jan 2022 13:21:37 +0700
Subject: [PATCH 505/946] [SCEV] Use lshr in implications

This patch adds support for implication inference logic for the
following pattern:
```
  lhs < (y >> z) <= y, y <= rhs --> lhs < rhs
```
We should be able to use the fact that value shifted to right is
not greater than the original value (provided it is non-negative).

Differential Revision: https://reviews.llvm.org/D116150
Reviewed-By: apilipenko
---
 llvm/include/llvm/Analysis/ScalarEvolution.h  |  9 ++++
 llvm/lib/Analysis/ScalarEvolution.cpp         | 45 +++++++++++++++++++
 .../IndVarSimplify/shift-range-checks.ll      | 20 ++-------
 3 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index fd23ba7ae3872..1e6dac44cf2b7 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1898,6 +1898,15 @@ class ScalarEvolution {
                          const SCEV *FoundLHS, const SCEV *FoundRHS,
                          unsigned Depth);
 
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+  /// true.
+  ///
+  /// This routine tries to reason about shifts.
+  bool isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                     const SCEV *RHS, const SCEV *FoundLHS,
+                                     const SCEV *FoundRHS);
+
   /// If we know that the specified Phi is in the header of its containing
   /// loop, we know the loop executes a constant number of times, and the PHI
   /// node is just a recurrence involving constants, fold it.
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3019ff526b66d..fdfbd2c3ca77f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -11468,6 +11468,48 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
   return true;
 }
 
+bool ScalarEvolution::isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred,
+                                                    const SCEV *LHS,
+                                                    const SCEV *RHS,
+                                                    const SCEV *FoundLHS,
+                                                    const SCEV *FoundRHS) {
+  // We want to imply LHS < RHS from LHS < (RHS >> shiftvalue).  First, make
+  // sure that we are dealing with same LHS.
+  if (RHS == FoundRHS) {
+    std::swap(LHS, RHS);
+    std::swap(FoundLHS, FoundRHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+  if (LHS != FoundLHS)
+    return false;
+
+  auto *SUFoundRHS = dyn_cast(FoundRHS);
+  if (!SUFoundRHS)
+    return false;
+
+  Value *Shiftee, *ShiftValue;
+
+  using namespace PatternMatch;
+  if (match(SUFoundRHS->getValue(),
+            m_LShr(m_Value(Shiftee), m_Value(ShiftValue)))) {
+    auto *ShifteeS = getSCEV(Shiftee);
+    // Prove one of the following:
+    // LHS > shiftvalue) && shiftee <=u RHS ---> LHS > shiftvalue) && shiftee <=u RHS ---> LHS <=u RHS
+    // LHS > shiftvalue) && shiftee <=s RHS && shiftee >=s 0
+    //   ---> LHS > shiftvalue) && shiftee <=s RHS && shiftee >=s 0
+    //   ---> LHS <=s RHS
+    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
+      return isKnownPredicate(ICmpInst::ICMP_ULE, ShifteeS, RHS);
+    if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+      if (isKnownNonNegative(ShifteeS))
+        return isKnownPredicate(ICmpInst::ICMP_SLE, ShifteeS, RHS);
+  }
+
+  return false;
+}
+
 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
                                             const SCEV *LHS, const SCEV *RHS,
                                             const SCEV *FoundLHS,
@@ -11479,6 +11521,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
   if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
     return true;
 
+  if (isImpliedCondOperandsViaShift(Pred, LHS, RHS, FoundLHS, FoundRHS))
+    return true;
+
   if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
                                           CtxI))
     return true;
diff --git a/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
index 1dc91d105ab3c..ab4cea2d879f5 100644
--- a/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
+++ b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
@@ -5,8 +5,6 @@
 declare i1 @cond()
 declare void @exit(i32 %code)
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_01(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_01(
 ; CHECK-NEXT:  entry:
@@ -18,8 +16,7 @@ define void @test_01(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp slt i32 [[IV]], [[X_SHIFTED]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()
@@ -64,8 +61,6 @@ never_happens:
   unreachable
 }
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_02(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_02(
 ; CHECK-NEXT:  entry:
@@ -77,8 +72,7 @@ define void @test_02(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp sgt i32 [[X_SHIFTED]], [[IV]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()
@@ -123,8 +117,6 @@ never_happens:
   unreachable
 }
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_03(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_03(
 ; CHECK-NEXT:  entry:
@@ -136,8 +128,7 @@ define void @test_03(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp ult i32 [[IV]], [[X_SHIFTED]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()
@@ -182,8 +173,6 @@ never_happens:
   unreachable
 }
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_04(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_04(
 ; CHECK-NEXT:  entry:
@@ -195,8 +184,7 @@ define void @test_04(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp ugt i32 [[X_SHIFTED]], [[IV]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()

From f6984b299afcbeddefe53db7ee1ce62e5d68cc4f Mon Sep 17 00:00:00 2001
From: "Liu, Chen3" 
Date: Tue, 25 Jan 2022 14:08:58 +0800
Subject: [PATCH 506/946] Fix the wrong value of bit_AVXVNNI

Differential Revision: https://reviews.llvm.org/D118103
---
 clang/lib/Headers/cpuid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 6df1b4a111726..5d262a60735f2 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -200,7 +200,7 @@
 #define bit_AMXINT8       0x02000000
 
 /* Features in %eax for leaf 7 sub-leaf 1 */
-#define bit_AVXVNNI       0x00000008
+#define bit_AVXVNNI       0x00000010
 #define bit_AVX512BF16    0x00000020
 #define bit_HRESET        0x00400000
 

From 320dc8c4df74ccce318c2c9bdb9b2937438711ac Mon Sep 17 00:00:00 2001
From: Shraiysh Vaishay 
Date: Mon, 24 Jan 2022 18:42:39 +0530
Subject: [PATCH 507/946] [mlir][OpenMP] Added omp.atomic.capture operation

This patch supports the atomic construct (capture) following section 2.17.7 of OpenMP 5.0 standard. Also added tests for the same.

Reviewed By: peixin, kiranchandramohan

Differential Revision: https://reviews.llvm.org/D115851
---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  44 +++++++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  62 ++++++++++
 mlir/test/Dialect/OpenMP/invalid.mlir         | 116 ++++++++++++++++++
 mlir/test/Dialect/OpenMP/ops.mlir             |  36 ++++++
 4 files changed, 258 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 384d629c4bf1d..311513f1682a6 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -708,6 +708,50 @@ def AtomicUpdateOp : OpenMP_Op<"atomic.update"> {
   let verifier = [{ return verifyAtomicUpdateOp(*this); }];
 }
 
+def AtomicCaptureOp : OpenMP_Op<"atomic.capture",
+    [SingleBlockImplicitTerminator<"TerminatorOp">]> {
+  let summary = "performs an atomic capture";
+  let description = [{
+    This operation performs an atomic capture.
+
+    `hint` is the value of hint (as used in the hint clause). It is a compile
+    time constant. As the name suggests, this is just a hint for optimization.
+
+    `memory_order` indicates the memory ordering behavior of the construct. It
+    can be one of `seq_cst`, `acq_rel`, `release`, `acquire` or `relaxed`.
+
+    The region has the following allowed forms:
+
+    ```
+      omp.atomic.capture {
+        omp.atomic.update ...
+        omp.atomic.read ...
+        omp.terminator
+      }
+
+      omp.atomic.capture {
+        omp.atomic.read ...
+        omp.atomic.update ...
+        omp.terminator
+      }
+
+      omp.atomic.capture {
+        omp.atomic.read ...
+        omp.atomic.write ...
+        omp.terminator
+      }
+    ```
+
+  }];
+
+  let arguments = (ins DefaultValuedAttr:$hint,
+                       OptionalAttr:$memory_order);
+  let regions = (region SizedRegion<1>:$region);
+  let parser = [{ return parseAtomicCaptureOp(parser, result); }];
+  let printer = [{ return printAtomicCaptureOp(p, *this); }];
+  let verifier = [{ return verifyAtomicCaptureOp(*this); }];
+}
+
 //===----------------------------------------------------------------------===//
 // 2.19.5.7 declare reduction Directive
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index f0c5cdc6eb63e..1eeee3f65f3ba 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1539,6 +1539,68 @@ static LogicalResult verifyAtomicUpdateOp(AtomicUpdateOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// AtomicCaptureOp
+//===----------------------------------------------------------------------===//
+
+/// Parser for AtomicCaptureOp
+static LogicalResult parseAtomicCaptureOp(OpAsmParser &parser,
+                                          OperationState &result) {
+  SmallVector clauses = {memoryOrderClause, hintClause};
+  SmallVector segments;
+  if (parseClauses(parser, result, clauses, segments) ||
+      parser.parseRegion(*result.addRegion()))
+    return failure();
+  return success();
+}
+
+/// Printer for AtomicCaptureOp
+static void printAtomicCaptureOp(OpAsmPrinter &p, AtomicCaptureOp op) {
+  if (op.memory_order())
+    p << "memory_order(" << op.memory_order() << ") ";
+  if (op.hintAttr())
+    printSynchronizationHint(p, op, op.hintAttr());
+  p.printRegion(op.region());
+}
+
+/// Verifier for AtomicCaptureOp
+static LogicalResult verifyAtomicCaptureOp(AtomicCaptureOp op) {
+  Block::OpListType &ops = op.region().front().getOperations();
+  if (ops.size() != 3)
+    return emitError(op.getLoc())
+           << "expected three operations in omp.atomic.capture region (one "
+              "terminator, and two atomic ops)";
+  auto &firstOp = ops.front();
+  auto &secondOp = *ops.getNextNode(firstOp);
+  auto firstReadStmt = dyn_cast(firstOp);
+  auto firstUpdateStmt = dyn_cast(firstOp);
+  auto secondReadStmt = dyn_cast(secondOp);
+  auto secondUpdateStmt = dyn_cast(secondOp);
+  auto secondWriteStmt = dyn_cast(secondOp);
+
+  if (!((firstUpdateStmt && secondReadStmt) ||
+        (firstReadStmt && secondUpdateStmt) ||
+        (firstReadStmt && secondWriteStmt)))
+    return emitError(ops.front().getLoc())
+           << "invalid sequence of operations in the capture region";
+  if (firstUpdateStmt && secondReadStmt &&
+      firstUpdateStmt.x() != secondReadStmt.x())
+    return emitError(firstUpdateStmt.getLoc())
+           << "updated variable in omp.atomic.update must be captured in "
+              "second operation";
+  if (firstReadStmt && secondUpdateStmt &&
+      firstReadStmt.x() != secondUpdateStmt.x())
+    return emitError(firstReadStmt.getLoc())
+           << "captured variable in omp.atomic.read must be updated in second "
+              "operation";
+  if (firstReadStmt && secondWriteStmt &&
+      firstReadStmt.x() != secondWriteStmt.address())
+    return emitError(firstReadStmt.getLoc())
+           << "captured variable in omp.atomic.read must be updated in "
+              "second operation";
+  return success();
+}
+
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
 
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 2875c61013b75..d0f66c3218cbc 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -650,6 +650,122 @@ func @omp_atomic_update5(%x: memref, %expr: i32) {
 
 // -----
 
+func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) {
+  // expected-error @below {{expected three operations in omp.atomic.capture region}}
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : memref
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.read %v = %x : memref
+    omp.atomic.read %v = %x : memref
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.update %x = %x add %expr : memref, i32
+    omp.atomic.update %x = %x sub %expr : memref, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.write %x = %expr : memref, i32
+    omp.atomic.write %x = %expr : memref, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.write %x = %expr : memref, i32
+    omp.atomic.update %x = %x add %expr : memref, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.update %x = %x add %expr : memref, i32
+    omp.atomic.write %x = %expr : memref, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.write %x = %expr : memref, i32
+    omp.atomic.read %v = %x : memref
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %y: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{updated variable in omp.atomic.update must be captured in second operation}}
+    omp.atomic.update %x = %x add %expr : memref, i32
+    omp.atomic.read %v = %y : memref
+    omp.terminator
+  }
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %y: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{captured variable in omp.atomic.read must be updated in second operation}}
+    omp.atomic.read %v = %y : memref
+    omp.atomic.update %x = %x add %expr : memref, i32
+    omp.terminator
+  }
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref, %y: memref, %v: memref, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{captured variable in omp.atomic.read must be updated in second operation}}
+    omp.atomic.read %v = %x : memref
+    omp.atomic.write %y = %expr : memref, i32
+    omp.terminator
+  }
+}
+
+// -----
+
 func @omp_sections(%data_var1 : memref, %data_var2 : memref, %data_var3 : memref) -> () {
   // expected-error @below {{operand used in both private and firstprivate clauses}}
   omp.sections private(%data_var1 : memref) firstprivate(%data_var1 : memref) {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 950f3d0d472a5..96a0b427123f3 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -584,6 +584,42 @@ func @omp_atomic_update(%x : memref, %expr : i32, %xBool : memref, %exp
   return
 }
 
+// CHECK-LABEL: omp_atomic_capture
+// CHECK-SAME: (%[[v:.*]]: memref, %[[x:.*]]: memref, %[[expr:.*]]: i32)
+func @omp_atomic_capture(%v: memref, %x: memref, %expr: i32) {
+  // CHECK: omp.atomic.capture{
+  // CHECK-NEXT: omp.atomic.update %[[x]] = %[[expr]] add %[[x]] : memref, i32
+  // CHECK-NEXT: omp.atomic.read %[[v]] = %[[x]] : memref
+  // CHECK-NEXT: omp.terminator
+  // CHECK-NEXT: }
+  omp.atomic.capture{
+    omp.atomic.update %x = %expr add %x : memref, i32
+    omp.atomic.read %v = %x : memref
+    omp.terminator
+  }
+  // CHECK: omp.atomic.capture{
+  // CHECK-NEXT: omp.atomic.read %[[v]] = %[[x]] : memref
+  // CHECK-NEXT: omp.atomic.update %[[x]] = %[[expr]] add %[[x]] : memref, i32
+  // CHECK-NEXT: omp.terminator
+  // CHECK-NEXT: }
+  omp.atomic.capture{
+    omp.atomic.read %v = %x : memref
+    omp.atomic.update %x = %expr add %x : memref, i32
+    omp.terminator
+  }
+  // CHECK: omp.atomic.capture{
+  // CHECK-NEXT: omp.atomic.read %[[v]] = %[[x]] : memref
+  // CHECK-NEXT: omp.atomic.write %[[x]] = %[[expr]] : memref, i32
+  // CHECK-NEXT: omp.terminator
+  // CHECK-NEXT: }
+  omp.atomic.capture{
+    omp.atomic.read %v = %x : memref
+    omp.atomic.write %x = %expr : memref, i32
+    omp.terminator
+  }
+  return
+}
+
 // CHECK-LABEL: omp_sectionsop
 func @omp_sectionsop(%data_var1 : memref, %data_var2 : memref,
                      %data_var3 : memref, %redn_var : !llvm.ptr) {

From 881ff4e4ebe8cc0cc045c7c167cffb01f94f27f8 Mon Sep 17 00:00:00 2001
From: River Riddle 
Date: Mon, 24 Jan 2022 22:59:01 -0800
Subject: [PATCH 508/946] [mlir] Remove unnecessary dependency on Tensor from
 MemRef

---
 mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt | 1 +
 mlir/lib/Dialect/Affine/IR/CMakeLists.txt          | 1 +
 mlir/lib/Dialect/MemRef/IR/CMakeLists.txt          | 1 -
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
index d3b205cb79309..500f64b28ff5e 100644
--- a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
@@ -20,4 +20,5 @@ add_mlir_conversion_library(MLIRStandardToSPIRV
   MLIRSupport
   MLIRTransformUtils
   MLIRStandard
+  MLIRTensor
   )
diff --git a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
index 345003912f6ee..37cf1efef61e5 100644
--- a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
@@ -17,4 +17,5 @@ add_mlir_dialect_library(MLIRAffine
   MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRStandard
+  MLIRTensor
   )
diff --git a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
index c4415da76b0e4..a03e2c41664ed 100644
--- a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
@@ -20,6 +20,5 @@ add_mlir_dialect_library(MLIRMemRef
   MLIRIR
   MLIRMemRefUtils
   MLIRStandard
-  MLIRTensor
   MLIRViewLikeInterface
 )

From bca2d85153dc4a7bff5f671a742e12512a2bc31f Mon Sep 17 00:00:00 2001
From: Lorenzo Chelini 
Date: Tue, 25 Jan 2022 08:50:52 +0100
Subject: [PATCH 509/946] [MLIR][Interfaces] Silence -Wparentheses warning
 (NFC)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]
---
 mlir/include/mlir/Interfaces/ControlFlowInterfaces.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
index 806821a988bf3..1e8f7b54c474a 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
@@ -91,7 +91,7 @@ class InvocationBounds {
   /// Create invocation bounds. The lower bound must be at least 0 and only the
   /// upper bound can be unknown.
   InvocationBounds(unsigned lb, Optional ub) : lower(lb), upper(ub) {
-    assert(!ub || ub >= lb && "upper bound cannot be less than lower bound");
+    assert((!ub || ub >= lb) && "upper bound cannot be less than lower bound");
   }
 
   /// Return the lower bound.

From 70cb8daed45fb8a794761a9cfca8432c8ee7c70b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Sat, 22 Jan 2022 01:38:07 +0200
Subject: [PATCH 510/946] [X86] [CodeView] Add codeview mappings for registers
 ST0-ST7

These can end up needed after https://reviews.llvm.org/D116821.

Suggested by Alexandre Ganea.

Differential Revision: https://reviews.llvm.org/D118072
---
 .../X86/MCTargetDesc/X86MCTargetDesc.cpp      |  9 ++++
 llvm/test/DebugInfo/COFF/x87-registers.ll     | 44 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 llvm/test/DebugInfo/COFF/x87-registers.ll

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 4440bdc3d58f4..8913e405539e3 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -111,6 +111,15 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
 
       {codeview::RegisterId::EFLAGS, X86::EFLAGS},
 
+      {codeview::RegisterId::ST0, X86::ST0},
+      {codeview::RegisterId::ST1, X86::ST1},
+      {codeview::RegisterId::ST2, X86::ST2},
+      {codeview::RegisterId::ST3, X86::ST3},
+      {codeview::RegisterId::ST4, X86::ST4},
+      {codeview::RegisterId::ST5, X86::ST5},
+      {codeview::RegisterId::ST6, X86::ST6},
+      {codeview::RegisterId::ST7, X86::ST7},
+
       {codeview::RegisterId::ST0, X86::FP0},
       {codeview::RegisterId::ST1, X86::FP1},
       {codeview::RegisterId::ST2, X86::FP2},
diff --git a/llvm/test/DebugInfo/COFF/x87-registers.ll b/llvm/test/DebugInfo/COFF/x87-registers.ll
new file mode 100644
index 0000000000000..02585673ffe25
--- /dev/null
+++ b/llvm/test/DebugInfo/COFF/x87-registers.ll
@@ -0,0 +1,44 @@
+; RUN: llc -experimental-debug-variable-locations=true < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s
+
+; CHECK:      DefRangeRegisterSym {
+; CHECK-NEXT:   Kind: S_DEFRANGE_REGISTER
+; CHECK-NEXT:   Register: ST0
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+define i32 @a() !dbg !8 {
+entry:
+  call void @llvm.dbg.declare(metadata [6 x i8]* undef, metadata !13, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 48)), !dbg !15
+  %0 = tail call x86_fp80 asm sideeffect "", "={st},~{dirflag},~{fpsr},~{flags}"(), !dbg !16, !srcloc !17
+  call void @llvm.dbg.value(metadata x86_fp80 %0, metadata !13, metadata !DIExpression()), !dbg !18
+  ret i32 undef, !dbg !19
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "codeview-fp0.c", directory: "llvm/build")
+!2 = !{i32 2, !"CodeView", i32 1}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 2}
+!5 = !{i32 7, !"PIC Level", i32 2}
+!6 = !{i32 7, !"uwtable", i32 1}
+!7 = !{!"clang version 14.0.0"}
+!8 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{!13}
+!13 = !DILocalVariable(name: "b", scope: !8, file: !1, line: 2, type: !14)
+!14 = !DIBasicType(name: "long double", size: 128, encoding: DW_ATE_float)
+!15 = !DILocation(line: 2, scope: !8)
+!16 = !DILocation(line: 3, scope: !8)
+!17 = !{i64 40}
+!18 = !DILocation(line: 0, scope: !8)
+!19 = !DILocation(line: 4, scope: !8)

From 9554aaa2753bd866a00bf6fb4183656200e758e2 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Mon, 24 Jan 2022 14:29:05 +0100
Subject: [PATCH 511/946] [Dwarf] Optimize getOrCreateSourceID() for repeated
 calls on same file (NFCI)

DwarfCompileUnit::getOrCreateSourceID() is often called many times
in sequence with the same DIFile. This is currently very expensive,
because it involves creating a string from directory and file name
and looking it up in a string map. This patch remembers the last
DIFile and its ID and directly returns that.

This gives a geomean -1.3% compile-time improvement on CTMark O0-g.

Differential Revision: https://reviews.llvm.org/D118041
---
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 11 ++++++++---
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h   |  3 +++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index ab3c9f486670e..5913c687db48f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -127,9 +127,14 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
   if (!File)
     return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
                                                     CUID);
-  return Asm->OutStreamer->emitDwarfFileDirective(
-      0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
-      File->getSource(), CUID);
+
+  if (LastFile != File) {
+    LastFile = File;
+    LastFileID = Asm->OutStreamer->emitDwarfFileDirective(
+        0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
+        File->getSource(), CUID);
+  }
+  return LastFileID;
 }
 
 DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index fb03982b5e4a2..f2e1f63468039 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -86,6 +86,9 @@ class DwarfCompileUnit final : public DwarfUnit {
   /// DWO ID for correlating skeleton and split units.
   uint64_t DWOId = 0;
 
+  const DIFile *LastFile = nullptr;
+  unsigned LastFileID;
+
   /// Construct a DIE for the given DbgVariable without initializing the
   /// DbgVariable's DIE reference.
   DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);

From 2a14bc55c547f0fc7285b783b5320338c3ffdc42 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Tue, 25 Jan 2022 16:39:15 +0800
Subject: [PATCH 512/946] [NFC] [C++20] [Modules] Update comments for handling
 friend

There is a comment contains a FIXME for the Module TS. And now the
Module TS is merged so we should update the comment. I've checked the
implementation.
---
 clang/lib/Sema/SemaDecl.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index a29409461f575..e014500f2114f 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -1586,10 +1586,13 @@ void Sema::FilterLookupForScope(LookupResult &R, DeclContext *Ctx, Scope *S,
 /// We've determined that \p New is a redeclaration of \p Old. Check that they
 /// have compatible owning modules.
 bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) {
-  // FIXME: The Modules TS is not clear about how friend declarations are
-  // to be treated. It's not meaningful to have different owning modules for
-  // linkage in redeclarations of the same entity, so for now allow the
-  // redeclaration and change the owning modules to match.
+  // [module.interface]p7:
+  // A declaration is attached to a module as follows:
+  // - If the declaration is a non-dependent friend declaration that nominates a
+  // function with a declarator-id that is a qualified-id or template-id or that
+  // nominates a class other than with an elaborated-type-specifier with neither
+  // a nested-name-specifier nor a simple-template-id, it is attached to the
+  // module to which the friend is attached ([basic.link]).
   if (New->getFriendObjectKind() &&
       Old->getOwningModuleForLinkage() != New->getOwningModuleForLinkage()) {
     New->setLocalOwningModule(Old->getOwningModule());

From aa97bc116d343f7b6f222d7229668de5d361b312 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Fri, 21 Jan 2022 13:03:15 +0100
Subject: [PATCH 513/946] [NFC] Remove uses of PointerType::getElementType()

Instead use either Type::getPointerElementType() or
Type::getNonOpaquePointerElementType().

This is part of D117885, in preparation for deprecating the API.
---
 clang/lib/CodeGen/CGCall.cpp                  | 16 +++--
 clang/lib/CodeGen/CGClass.cpp                 |  5 +-
 clang/lib/CodeGen/CGObjCGNU.cpp               | 15 +++--
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  7 ++-
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |  3 +-
 clang/lib/CodeGen/TargetInfo.cpp              |  8 +--
 llvm/include/llvm/FuzzMutate/OpDescriptor.h   |  2 +-
 llvm/include/llvm/IR/MatrixBuilder.h          |  2 +-
 llvm/include/llvm/IR/Statepoint.h             |  2 +-
 llvm/lib/Analysis/AliasAnalysisEvaluator.cpp  |  6 +-
 llvm/lib/Analysis/ConstantFolding.cpp         |  2 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |  5 +-
 llvm/lib/Analysis/ScalarEvolution.cpp         |  3 +-
 llvm/lib/AsmParser/LLParser.cpp               | 10 ++--
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     | 60 +++++++++----------
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  2 +-
 .../CodeGen/GlobalISel/InlineAsmLowering.cpp  |  2 +-
 llvm/lib/FuzzMutate/Operations.cpp            |  2 +-
 llvm/lib/FuzzMutate/RandomIRBuilder.cpp       | 10 ++--
 llvm/lib/IR/AsmWriter.cpp                     |  2 +-
 llvm/lib/IR/AutoUpgrade.cpp                   |  2 +-
 llvm/lib/IR/ConstantFold.cpp                  | 16 ++---
 llvm/lib/IR/Core.cpp                          |  2 +-
 llvm/lib/IR/Function.cpp                      | 10 ++--
 llvm/lib/IR/IRBuilder.cpp                     |  9 ++-
 llvm/lib/IR/Verifier.cpp                      | 24 ++++----
 .../Target/AArch64/AArch64ISelLowering.cpp    | 12 ++--
 .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp      |  8 +--
 .../AMDGPU/AMDGPURewriteOutArguments.cpp      |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  8 +--
 llvm/lib/Target/BPF/BTFDebug.cpp              |  3 +-
 .../Target/Hexagon/HexagonVectorCombine.cpp   |  2 +-
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp     |  4 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |  7 ++-
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp      |  2 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  2 +-
 llvm/lib/Target/Sparc/SparcISelLowering.cpp   |  2 +-
 llvm/lib/Transforms/Coroutines/CoroFrame.cpp  |  4 +-
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp |  7 +--
 .../InstCombine/InstCombineCalls.cpp          | 14 ++---
 .../InstCombine/InstCombineCasts.cpp          | 10 ++--
 .../InstCombineLoadStoreAlloca.cpp            |  3 +-
 .../InstCombine/InstructionCombining.cpp      |  6 +-
 .../Instrumentation/AddressSanitizer.cpp      |  5 +-
 .../Instrumentation/MemProfiler.cpp           |  5 +-
 .../Instrumentation/SanitizerCoverage.cpp     |  2 +-
 llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp   |  4 +-
 llvm/lib/Transforms/Scalar/SROA.cpp           |  4 +-
 llvm/lib/Transforms/Scalar/Scalarizer.cpp     |  5 +-
 .../lib/Transforms/Utils/AMDGPUEmitPrintf.cpp |  2 +-
 .../Transforms/Utils/LowerMemIntrinsics.cpp   |  2 +-
 .../Utils/ScalarEvolutionExpander.cpp         |  8 +--
 llvm/tools/llvm-stress/llvm-stress.cpp        |  6 +-
 54 files changed, 182 insertions(+), 188 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index ccb394283c4ed..a37ff8844e885 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3880,9 +3880,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   }
 
   // Create the temporary.
-  Address temp = CGF.CreateTempAlloca(destType->getElementType(),
-                                      CGF.getPointerAlign(),
-                                      "icr.temp");
+  Address temp = CGF.CreateTempAlloca(destType->getPointerElementType(),
+                                      CGF.getPointerAlign(), "icr.temp");
   // Loading an l-value can introduce a cleanup if the l-value is __weak,
   // and that cleanup will be conditional if we can't prove that the l-value
   // isn't null, so we need to register a dominating point so that the cleanups
@@ -3892,9 +3891,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   // Zero-initialize it if we're not doing a copy-initialization.
   bool shouldCopy = CRE->shouldCopy();
   if (!shouldCopy) {
-    llvm::Value *null =
-      llvm::ConstantPointerNull::get(
-        cast(destType->getElementType()));
+    llvm::Value *null = llvm::ConstantPointerNull::get(
+        cast(destType->getPointerElementType()));
     CGF.Builder.CreateStore(null, temp);
   }
 
@@ -3936,7 +3934,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
     assert(srcRV.isScalar());
 
     llvm::Value *src = srcRV.getScalarVal();
-    src = CGF.Builder.CreateBitCast(src, destType->getElementType(),
+    src = CGF.Builder.CreateBitCast(src, destType->getPointerElementType(),
                                     "icr.cast");
 
     // Use an ordinary store, not a store-to-lvalue.
@@ -5075,8 +5073,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 #ifndef NDEBUG
         // Assert that these structs have equivalent element types.
         llvm::StructType *FullTy = CallInfo.getArgStruct();
-        llvm::StructType *DeclaredTy = cast(
-            cast(LastParamTy)->getElementType());
+        llvm::StructType *DeclaredTy =
+            cast(LastParamTy->getPointerElementType());
         assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
         for (llvm::StructType::element_iterator DI = DeclaredTy->element_begin(),
                                                 DE = DeclaredTy->element_end(),
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index d84956c2653e1..520e119ada26c 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -2852,9 +2852,8 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
               SanitizerHandler::CFICheckFail, {}, {});
   }
 
-  return Builder.CreateBitCast(
-      Builder.CreateExtractValue(CheckedLoad, 0),
-      cast(VTable->getType())->getElementType());
+  return Builder.CreateBitCast(Builder.CreateExtractValue(CheckedLoad, 0),
+                               VTable->getType()->getPointerElementType());
 }
 
 void CodeGenFunction::EmitForwardingCallToLambda(
diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp
index b2bf60d2c0fcf..52b4490908681 100644
--- a/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -2347,9 +2347,10 @@ llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
     }
   }
   if (!SelValue) {
-    SelValue = llvm::GlobalAlias::create(
-        SelectorTy->getElementType(), 0, llvm::GlobalValue::PrivateLinkage,
-        ".objc_selector_" + Sel.getAsString(), &TheModule);
+    SelValue = llvm::GlobalAlias::create(SelectorTy->getPointerElementType(), 0,
+                                         llvm::GlobalValue::PrivateLinkage,
+                                         ".objc_selector_" + Sel.getAsString(),
+                                         &TheModule);
     Types.emplace_back(TypeEncoding, SelValue);
   }
 
@@ -2576,14 +2577,16 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
       if (IsClassMessage)  {
         if (!MetaClassPtrAlias) {
           MetaClassPtrAlias = llvm::GlobalAlias::create(
-              IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+              IdTy->getPointerElementType(), 0,
+              llvm::GlobalValue::InternalLinkage,
               ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
         }
         ReceiverClass = MetaClassPtrAlias;
       } else {
         if (!ClassPtrAlias) {
           ClassPtrAlias = llvm::GlobalAlias::create(
-              IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+              IdTy->getPointerElementType(), 0,
+              llvm::GlobalValue::InternalLinkage,
               ".objc_class_ref" + Class->getNameAsString(), &TheModule);
         }
         ReceiverClass = ClassPtrAlias;
@@ -3706,7 +3709,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   GenerateProtocolHolderCategory();
 
   llvm::StructType *selStructTy =
-    dyn_cast(SelectorTy->getElementType());
+      dyn_cast(SelectorTy->getPointerElementType());
   llvm::Type *selStructPtrTy = SelectorTy;
   if (!selStructTy) {
     selStructTy = llvm::StructType::get(CGM.getLLVMContext(),
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e4889586bb7ee..fd956aabc717f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -837,9 +837,10 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   }
   llvm::Value *Size;
   llvm::Value *SizeInChars;
-  auto *ElemType =
-      cast(OrigAddresses[N].first.getPointer(CGF)->getType())
-          ->getElementType();
+  auto *ElemType = OrigAddresses[N]
+                       .first.getPointer(CGF)
+                       ->getType()
+                       ->getPointerElementType();
   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
   if (AsArraySection) {
     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 6102046805503..2979d92c84172 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4472,8 +4472,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
       // pad.  The best solution is to fix the personality function.
       } else {
         // Pull the pointer for the reference type off.
-        llvm::Type *PtrTy =
-          cast(LLVMCatchTy)->getElementType();
+        llvm::Type *PtrTy = LLVMCatchTy->getPointerElementType();
 
         // Create the temporary and write the adjusted pointer into it.
         Address ExnPtrTmp =
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index ebe651b5a7184..fd9a7e602833a 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -8937,8 +8937,7 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
                                        unsigned ToAS) const {
     // Single value types.
     if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS)
-      return llvm::PointerType::get(
-          cast(Ty)->getElementType(), ToAS);
+      return llvm::PointerType::get(Ty->getPointerElementType(), ToAS);
     return Ty;
   }
 
@@ -9334,7 +9333,7 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
     return llvm::ConstantPointerNull::get(PT);
 
   auto &Ctx = CGM.getContext();
-  auto NPT = llvm::PointerType::get(PT->getElementType(),
+  auto NPT = llvm::PointerType::get(PT->getPointerElementType(),
       Ctx.getTargetAddressSpace(LangAS::opencl_generic));
   return llvm::ConstantExpr::getAddrSpaceCast(
       llvm::ConstantPointerNull::get(NPT), PT);
@@ -10271,8 +10270,7 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
     auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
     auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
     if (LTy->isPointerTy() && LTy->getPointerAddressSpace() == DefaultAS) {
-      LTy = llvm::PointerType::get(
-          cast(LTy)->getElementType(), GlobalAS);
+      LTy = llvm::PointerType::get(LTy->getPointerElementType(), GlobalAS);
       return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
     }
   }
diff --git a/llvm/include/llvm/FuzzMutate/OpDescriptor.h b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
index d6c98cd949a29..43c8109207665 100644
--- a/llvm/include/llvm/FuzzMutate/OpDescriptor.h
+++ b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
@@ -146,7 +146,7 @@ static inline SourcePred sizedPtrType() {
       return false;
 
     if (const auto *PtrT = dyn_cast(V->getType()))
-      return PtrT->getElementType()->isSized();
+      return PtrT->getPointerElementType()->isSized();
     return false;
   };
   auto Make = [](ArrayRef, ArrayRef Ts) {
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index 6cc5797269e24..4c8286692ebf3 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -68,7 +68,7 @@ template  class MatrixBuilder {
 
     // Deal with the pointer
     PointerType *PtrTy = cast(DataPtr->getType());
-    Type *EltTy = PtrTy->getElementType();
+    Type *EltTy = PtrTy->getPointerElementType();
 
     auto *RetType = FixedVectorType::get(EltTy, Rows * Columns);
 
diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h
index c6251b9bf5c90..a254a67e6b1f7 100644
--- a/llvm/include/llvm/IR/Statepoint.h
+++ b/llvm/include/llvm/IR/Statepoint.h
@@ -123,7 +123,7 @@ class GCStatepointInst : public CallBase {
   /// statepoint.
   Type *getActualReturnType() const {
     auto *CalleeTy =
-      cast(getActualCalledOperand()->getType())->getElementType();
+        getActualCalledOperand()->getType()->getPointerElementType();
     return cast(CalleeTy)->getReturnType();
   }
 
diff --git a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 0c097b2fa3020..1577f1eb70b17 100644
--- a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -142,13 +142,13 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   for (SetVector::iterator I1 = Pointers.begin(), E = Pointers.end();
        I1 != E; ++I1) {
     auto I1Size = LocationSize::afterPointer();
-    Type *I1ElTy = cast((*I1)->getType())->getElementType();
+    Type *I1ElTy = (*I1)->getType()->getPointerElementType();
     if (I1ElTy->isSized())
       I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
 
     for (SetVector::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
       auto I2Size = LocationSize::afterPointer();
-      Type *I2ElTy = cast((*I2)->getType())->getElementType();
+      Type *I2ElTy = (*I2)->getType()->getPointerElementType();
       if (I2ElTy->isSized())
         I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
 
@@ -233,7 +233,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   for (CallBase *Call : Calls) {
     for (auto Pointer : Pointers) {
       auto Size = LocationSize::afterPointer();
-      Type *ElTy = cast(Pointer->getType())->getElementType();
+      Type *ElTy = Pointer->getType()->getPointerElementType();
       if (ElTy->isSized())
         Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
 
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 772316e7469d9..7cf69f613c669 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -941,7 +941,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
   if (auto *GV = dyn_cast(Ptr))
     SrcElemTy = GV->getValueType();
   else if (!PTy->isOpaque())
-    SrcElemTy = PTy->getElementType();
+    SrcElemTy = PTy->getNonOpaquePointerElementType();
   else
     SrcElemTy = Type::getInt8Ty(Ptr->getContext());
 
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 9551eb48e2316..e40b08b322637 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1414,8 +1414,9 @@ bool InductionDescriptor::isInductionPHI(
 
   // Always use i8 element type for opaque pointer inductions.
   PointerType *PtrTy = cast(PhiTy);
-  Type *ElementType = PtrTy->isOpaque() ? Type::getInt8Ty(PtrTy->getContext())
-                                        : PtrTy->getElementType();
+  Type *ElementType = PtrTy->isOpaque()
+                          ? Type::getInt8Ty(PtrTy->getContext())
+                          : PtrTy->getNonOpaquePointerElementType();
   if (!ElementType->isSized())
     return false;
 
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index fdfbd2c3ca77f..4f2123b4c5fab 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7598,8 +7598,7 @@ const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
         continue;
       // Also make sure step was increased the same with sizeof allocated
       // element type.
-      const PointerType *GEPT = dyn_cast(GEP->getType());
-      if (Ty->getElementType() != GEPT->getElementType())
+      if (Ty->getElementType() != GEP->getType()->getPointerElementType())
         continue;
 
       // FIXME: Since gep indices are silently zext to the indexing type,
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index cec4ffd82f818..868b8693926d7 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -990,10 +990,10 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
         ExplicitTypeLoc,
         typeComparisonErrorMessage(
             "explicit pointee type doesn't match operand's pointee type", Ty,
-            PTy->getElementType()));
+            PTy->getNonOpaquePointerElementType()));
   }
 
-  if (!IsAlias && !PTy->getElementType()->isFunctionTy()) {
+  if (!IsAlias && !PTy->getPointerElementType()->isFunctionTy()) {
     return error(ExplicitTypeLoc,
                  "explicit pointee type should be a function type");
   }
@@ -3588,7 +3588,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
             ExplicitTypeLoc,
             typeComparisonErrorMessage(
                 "explicit pointee type doesn't match operand's pointee type",
-                Ty, BasePointerType->getElementType()));
+                Ty, BasePointerType->getNonOpaquePointerElementType()));
       }
 
       unsigned GEPWidth =
@@ -7205,7 +7205,7 @@ int LLParser::parseLoad(Instruction *&Inst, PerFunctionState &PFS) {
         ExplicitTypeLoc,
         typeComparisonErrorMessage(
             "explicit pointee type doesn't match operand's pointee type", Ty,
-            cast(Val->getType())->getElementType()));
+            Val->getType()->getNonOpaquePointerElementType()));
   }
   SmallPtrSet Visited;
   if (!Alignment && !Ty->isSized(&Visited))
@@ -7465,7 +7465,7 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
         ExplicitTypeLoc,
         typeComparisonErrorMessage(
             "explicit pointee type doesn't match operand's pointee type", Ty,
-            BasePointerType->getElementType()));
+            BasePointerType->getNonOpaquePointerElementType()));
   }
 
   SmallVector Indices;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1e60bad616979..720ab560f988c 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2702,7 +2702,7 @@ Error BitcodeReader::parseConstants() {
 
       PointerType *OrigPtrTy = cast(Elt0FullTy->getScalarType());
       if (!PointeeType)
-        PointeeType = OrigPtrTy->getElementType();
+        PointeeType = OrigPtrTy->getPointerElementType();
       else if (!OrigPtrTy->isOpaqueOrPointeeTypeMatches(PointeeType))
         return error("Explicit gep operator type does not match pointee type "
                      "of pointer operand");
@@ -2825,9 +2825,8 @@ Error BitcodeReader::parseConstants() {
         ConstrStr += (char)Record[3+AsmStrSize+i];
       UpgradeInlineAsmString(&AsmStr);
       // FIXME: support upgrading in opaque pointers mode.
-      V = InlineAsm::get(
-          cast(cast(CurTy)->getElementType()),
-          AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+      V = InlineAsm::get(cast(CurTy->getPointerElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
       break;
     }
     // This version adds support for the asm dialect keywords (e.g.,
@@ -2852,10 +2851,9 @@ Error BitcodeReader::parseConstants() {
         ConstrStr += (char)Record[3+AsmStrSize+i];
       UpgradeInlineAsmString(&AsmStr);
       // FIXME: support upgrading in opaque pointers mode.
-      V = InlineAsm::get(
-          cast(cast(CurTy)->getElementType()),
-          AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
-          InlineAsm::AsmDialect(AsmDialect));
+      V = InlineAsm::get(cast(CurTy->getPointerElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+                         InlineAsm::AsmDialect(AsmDialect));
       break;
     }
     // This version adds support for the unwind keyword.
@@ -2884,10 +2882,9 @@ Error BitcodeReader::parseConstants() {
         ConstrStr += (char)Record[OpNum + AsmStrSize + i];
       UpgradeInlineAsmString(&AsmStr);
       // FIXME: support upgrading in opaque pointers mode.
-      V = InlineAsm::get(
-          cast(cast(CurTy)->getElementType()),
-          AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
-          InlineAsm::AsmDialect(AsmDialect), CanThrow);
+      V = InlineAsm::get(cast(CurTy->getPointerElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+                         InlineAsm::AsmDialect(AsmDialect), CanThrow);
       break;
     }
     // This version adds explicit function type.
@@ -3282,7 +3279,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) {
     if (!Ty->isPointerTy())
       return error("Invalid type for value");
     AddressSpace = cast(Ty)->getAddressSpace();
-    Ty = cast(Ty)->getElementType();
+    Ty = Ty->getPointerElementType();
   }
 
   uint64_t RawLinkage = Record[3];
@@ -3375,7 +3372,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) {
   if (!FTy)
     return error("Invalid record");
   if (auto *PTy = dyn_cast(FTy))
-    FTy = PTy->getElementType();
+    FTy = PTy->getPointerElementType();
 
   if (!isa(FTy))
     return error("Invalid type for value");
@@ -3416,7 +3413,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) {
       Func->removeParamAttr(i, Kind);
 
       Type *PTy = cast(FTy)->getParamType(i);
-      Type *PtrEltTy = cast(PTy)->getElementType();
+      Type *PtrEltTy = PTy->getPointerElementType();
       Attribute NewAttr;
       switch (Kind) {
       case Attribute::ByVal:
@@ -3539,7 +3536,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
     auto *PTy = dyn_cast(Ty);
     if (!PTy)
       return error("Invalid type for value");
-    Ty = PTy->getElementType();
+    Ty = PTy->getPointerElementType();
     AddrSpace = PTy->getAddressSpace();
   } else {
     AddrSpace = Record[OpNum++];
@@ -3908,7 +3905,7 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB,
 
       CB->removeParamAttr(i, Kind);
 
-      Type *PtrEltTy = cast(ArgsTys[i])->getElementType();
+      Type *PtrEltTy = ArgsTys[i]->getPointerElementType();
       Attribute NewAttr;
       switch (Kind) {
       case Attribute::ByVal:
@@ -3949,7 +3946,7 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB,
   case Intrinsic::preserve_array_access_index:
   case Intrinsic::preserve_struct_access_index:
     if (!CB->getAttributes().getParamElementType(0)) {
-      Type *ElTy = cast(ArgsTys[0])->getElementType();
+      Type *ElTy = ArgsTys[0]->getPointerElementType();
       Attribute NewAttr = Attribute::get(Context, Attribute::ElementType, ElTy);
       CB->addParamAttr(0, NewAttr);
     }
@@ -4239,8 +4236,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
 
       if (!Ty) {
-        Ty = cast(BasePtr->getType()->getScalarType())
-                 ->getElementType();
+        Ty = BasePtr->getType()->getScalarType()->getPointerElementType();
       } else if (!cast(BasePtr->getType()->getScalarType())
                       ->isOpaqueOrPointeeTypeMatches(Ty)) {
         return error(
@@ -4756,8 +4752,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (!CalleeTy)
         return error("Callee is not a pointer");
       if (!FTy) {
-        FTy = dyn_cast(
-            cast(Callee->getType())->getElementType());
+        FTy =
+            dyn_cast(Callee->getType()->getPointerElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (!CalleeTy->isOpaqueOrPointeeTypeMatches(FTy))
@@ -4837,8 +4833,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (!OpTy)
         return error("Callee is not a pointer type");
       if (!FTy) {
-        FTy = dyn_cast(
-            cast(Callee->getType())->getElementType());
+        FTy =
+            dyn_cast(Callee->getType()->getPointerElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
@@ -5000,7 +4996,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         auto *PTy = dyn_cast_or_null(Ty);
         if (!PTy)
           return error("Old-style alloca with a non-pointer type");
-        Ty = PTy->getElementType();
+        Ty = PTy->getPointerElementType();
       }
       Type *OpTy = getTypeByID(Record[1]);
       Value *Size = getFnValueByID(Record[2], OpTy);
@@ -5045,7 +5041,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (OpNum + 3 == Record.size()) {
         Ty = getTypeByID(Record[OpNum++]);
       } else {
-        Ty = cast(Op->getType())->getElementType();
+        Ty = Op->getType()->getPointerElementType();
       }
 
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
@@ -5078,7 +5074,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (OpNum + 5 == Record.size()) {
         Ty = getTypeByID(Record[OpNum++]);
       } else {
-        Ty = cast(Op->getType())->getElementType();
+        Ty = Op->getType()->getPointerElementType();
       }
 
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
@@ -5110,8 +5106,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           (BitCode == bitc::FUNC_CODE_INST_STORE
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
-                          cast(Ptr->getType())->getElementType(),
-                          Val)) ||
+                          Ptr->getType()->getPointerElementType(), Val)) ||
           OpNum + 2 != Record.size())
         return error("Invalid record");
 
@@ -5139,8 +5134,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
-                          cast(Ptr->getType())->getElementType(),
-                          Val)) ||
+                          Ptr->getType()->getPointerElementType(), Val)) ||
           OpNum + 4 != Record.size())
         return error("Invalid record");
 
@@ -5391,8 +5385,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (!OpTy)
         return error("Callee is not a pointer type");
       if (!FTy) {
-        FTy = dyn_cast(
-            cast(Callee->getType())->getElementType());
+        FTy =
+            dyn_cast(Callee->getType()->getPointerElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index e0efdf286caff..80978471da4e3 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -948,7 +948,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
       } else {
         // POINTER: [pointee type, address space]
         Code = bitc::TYPE_CODE_POINTER;
-        TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+        TypeVals.push_back(VE.getTypeID(PTy->getNonOpaquePointerElementType()));
         TypeVals.push_back(AddressSpace);
         if (AddressSpace == 0)
           AbbrevToUse = PtrAbbrev;
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 56a700721544c..e0503b1ed2050 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -313,7 +313,7 @@ bool InlineAsmLowering::lowerInlineAsm(
         PointerType *PtrTy = dyn_cast(OpTy);
         if (!PtrTy)
           report_fatal_error("Indirect operand for inline asm not a pointer!");
-        OpTy = PtrTy->getElementType();
+        OpTy = PtrTy->getPointerElementType();
       }
 
       // FIXME: Support aggregate input operands
diff --git a/llvm/lib/FuzzMutate/Operations.cpp b/llvm/lib/FuzzMutate/Operations.cpp
index a37fd5454dd42..221a3a84b49be 100644
--- a/llvm/lib/FuzzMutate/Operations.cpp
+++ b/llvm/lib/FuzzMutate/Operations.cpp
@@ -169,7 +169,7 @@ OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) {
 
 OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) {
   auto buildGEP = [](ArrayRef Srcs, Instruction *Inst) {
-    Type *Ty = cast(Srcs[0]->getType())->getElementType();
+    Type *Ty = Srcs[0]->getType()->getPointerElementType();
     auto Indices = makeArrayRef(Srcs).drop_front(1);
     return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst);
   };
diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index 1295714839e87..27c3bdfb22a8e 100644
--- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -53,8 +53,8 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef Insts,
       IP = ++I->getIterator();
       assert(IP != BB.end() && "guaranteed by the findPointer");
     }
-    auto *NewLoad = new LoadInst(
-        cast(Ptr->getType())->getElementType(), Ptr, "L", &*IP);
+    auto *NewLoad =
+        new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L", &*IP);
 
     // Only sample this load if it really matches the descriptor
     if (Pred.matches(Srcs, NewLoad))
@@ -141,12 +141,12 @@ Value *RandomIRBuilder::findPointer(BasicBlock &BB,
 
     if (auto PtrTy = dyn_cast(Inst->getType())) {
       // We can never generate loads from non first class or non sized types
-      if (!PtrTy->getElementType()->isSized() ||
-          !PtrTy->getElementType()->isFirstClassType())
+      Type *ElemTy = PtrTy->getPointerElementType();
+      if (!ElemTy->isSized() || !ElemTy->isFirstClassType())
         return false;
 
       // TODO: Check if this is horribly expensive.
-      return Pred.matches(Srcs, UndefValue::get(PtrTy->getElementType()));
+      return Pred.matches(Srcs, UndefValue::get(ElemTy));
     }
     return false;
   };
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index bbe0c97e60a2f..6631fec2032eb 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -587,7 +587,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
         OS << " addrspace(" << AddressSpace << ')';
       return;
     }
-    print(PTy->getElementType(), OS);
+    print(PTy->getNonOpaquePointerElementType(), OS);
     if (unsigned AddressSpace = PTy->getAddressSpace())
       OS << " addrspace(" << AddressSpace << ')';
     OS << '*';
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index b820eabf173d7..45459e200b3d5 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -4495,7 +4495,7 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
 
   if (F.getCallingConv() == CallingConv::X86_INTR &&
       !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
-    Type *ByValTy = cast(F.getArg(0)->getType())->getElementType();
+    Type *ByValTy = F.getArg(0)->getType()->getPointerElementType();
     Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
     F.addParamAttr(0, NewAttr);
   }
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 9f1d76b0c768a..1cd95cd5d4c8e 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -119,21 +119,21 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
     if (PointerType *DPTy = dyn_cast(DestTy))
       if (PTy->getAddressSpace() == DPTy->getAddressSpace() &&
           !PTy->isOpaque() && !DPTy->isOpaque() &&
-          PTy->getElementType()->isSized()) {
+          PTy->getNonOpaquePointerElementType()->isSized()) {
         SmallVector IdxList;
         Value *Zero =
           Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
         IdxList.push_back(Zero);
-        Type *ElTy = PTy->getElementType();
-        while (ElTy && ElTy != DPTy->getElementType()) {
+        Type *ElTy = PTy->getNonOpaquePointerElementType();
+        while (ElTy && ElTy != DPTy->getNonOpaquePointerElementType()) {
           ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, (uint64_t)0);
           IdxList.push_back(Zero);
         }
 
-        if (ElTy == DPTy->getElementType())
+        if (ElTy == DPTy->getNonOpaquePointerElementType())
           // This GEP is inbounds because all indices are zero.
-          return ConstantExpr::getInBoundsGetElementPtr(PTy->getElementType(),
-                                                        V, IdxList);
+          return ConstantExpr::getInBoundsGetElementPtr(
+              PTy->getNonOpaquePointerElementType(), V, IdxList);
       }
 
   // Handle casts from one vector constant to another.  We know that the src
@@ -2098,9 +2098,9 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
       PointerType *DstPtrTy = dyn_cast(CE->getType());
       if (SrcPtrTy && DstPtrTy) {
         ArrayType *SrcArrayTy =
-          dyn_cast(SrcPtrTy->getElementType());
+          dyn_cast(SrcPtrTy->getPointerElementType());
         ArrayType *DstArrayTy =
-          dyn_cast(DstPtrTy->getElementType());
+          dyn_cast(DstPtrTy->getPointerElementType());
         if (SrcArrayTy && DstArrayTy
             && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
             && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 3f899471843fc..47ee4cbbf5829 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -796,7 +796,7 @@ LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType,
 LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) {
   auto *Ty = unwrap(WrappedTy);
   if (auto *PTy = dyn_cast(Ty))
-    return wrap(PTy->getElementType());
+    return wrap(PTy->getPointerElementType());
   if (auto *ATy = dyn_cast(Ty))
     return wrap(ATy->getElementType());
   return wrap(cast(Ty)->getElementType());
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 534e8a74d0ebf..72d8f9e1547f6 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -817,7 +817,8 @@ static std::string getMangledTypeStr(Type *Ty, bool &HasUnnamedType) {
     // Opaque pointer doesn't have pointee type information, so we just mangle
     // address space for opaque pointer.
     if (!PTyp->isOpaque())
-      Result += getMangledTypeStr(PTyp->getElementType(), HasUnnamedType);
+      Result += getMangledTypeStr(PTyp->getNonOpaquePointerElementType(),
+                                  HasUnnamedType);
   } else if (ArrayType *ATyp = dyn_cast(Ty)) {
     Result += "a" + utostr(ATyp->getNumElements()) +
               getMangledTypeStr(ATyp->getElementType(), HasUnnamedType);
@@ -1465,8 +1466,8 @@ static bool matchIntrinsicType(
       if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
         return true;
       if (!PT->isOpaque())
-        return matchIntrinsicType(PT->getElementType(), Infos, ArgTys,
-                                  DeferredChecks, IsDeferredCheck);
+        return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos,
+                                  ArgTys, DeferredChecks, IsDeferredCheck);
       // Consume IIT descriptors relating to the pointer element type.
       while (Infos.front().Kind == IITDescriptor::Pointer)
         Infos = Infos.slice(1);
@@ -1573,7 +1574,8 @@ static bool matchIntrinsicType(
         return IsDeferredCheck || DeferCheck(Ty);
       Type * ReferenceType = ArgTys[D.getArgumentNumber()];
       PointerType *ThisArgType = dyn_cast(Ty);
-      return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
+      return (!ThisArgType ||
+              ThisArgType->getPointerElementType() != ReferenceType);
     }
     case IITDescriptor::PtrToElt: {
       if (D.getArgumentNumber() >= ArgTys.size())
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b1483a44ebf77..b91885bcfac47 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -679,7 +679,7 @@ static CallInst *CreateGCStatepointCallCommon(
     const Twine &Name) {
   // Extract out the type of the callee.
   auto *FuncPtrType = cast(ActualCallee->getType());
-  assert(isa(FuncPtrType->getElementType()) &&
+  assert(isa(FuncPtrType->getPointerElementType()) &&
          "actual callee must be a callable value");
 
   Module *M = Builder->GetInsertBlock()->getParent()->getParent();
@@ -736,7 +736,7 @@ static InvokeInst *CreateGCStatepointInvokeCommon(
     ArrayRef GCArgs, const Twine &Name) {
   // Extract out the type of the callee.
   auto *FuncPtrType = cast(ActualInvokee->getType());
-  assert(isa(FuncPtrType->getElementType()) &&
+  assert(isa(FuncPtrType->getPointerElementType()) &&
          "actual callee must be a callable value");
 
   Module *M = Builder->GetInsertBlock()->getParent()->getParent();
@@ -1002,12 +1002,11 @@ Value *IRBuilderBase::CreatePtrDiff(Value *LHS, Value *RHS,
                                     const Twine &Name) {
   assert(LHS->getType() == RHS->getType() &&
          "Pointer subtraction operand types must match!");
-  auto *ArgType = cast(LHS->getType());
+  auto *ArgElemType = LHS->getType()->getPointerElementType();
   Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
   Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
   Value *Difference = CreateSub(LHS_int, RHS_int);
-  return CreateExactSDiv(Difference,
-                         ConstantExpr::getSizeOf(ArgType->getElementType()),
+  return CreateExactSDiv(Difference, ConstantExpr::getSizeOf(ArgElemType),
                          Name);
 }
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index cb689e14e6f0b..b84edb7894058 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1826,33 +1826,34 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
              "Attribute 'preallocated' does not support unsized types!", V);
     }
     if (!PTy->isOpaque()) {
-      if (!isa(PTy->getElementType()))
+      if (!isa(PTy->getNonOpaquePointerElementType()))
         Assert(!Attrs.hasAttribute(Attribute::SwiftError),
                "Attribute 'swifterror' only applies to parameters "
                "with pointer to pointer type!",
                V);
       if (Attrs.hasAttribute(Attribute::ByRef)) {
-        Assert(Attrs.getByRefType() == PTy->getElementType(),
+        Assert(Attrs.getByRefType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'byref' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
-        Assert(Attrs.getByValType() == PTy->getElementType(),
+        Assert(Attrs.getByValType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'byval' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::Preallocated)) {
-        Assert(Attrs.getPreallocatedType() == PTy->getElementType(),
+        Assert(Attrs.getPreallocatedType() ==
+                   PTy->getNonOpaquePointerElementType(),
                "Attribute 'preallocated' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::InAlloca)) {
-        Assert(Attrs.getInAllocaType() == PTy->getElementType(),
+        Assert(Attrs.getInAllocaType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'inalloca' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::ElementType)) {
-        Assert(Attrs.getElementType() == PTy->getElementType(),
+        Assert(Attrs.getElementType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'elementtype' type does not match parameter!", V);
       }
     }
@@ -2195,9 +2196,10 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
 
   const Value *Target = Call.getArgOperand(2);
   auto *PT = dyn_cast(Target->getType());
-  Assert(PT && PT->getElementType()->isFunctionTy(),
+  Assert(PT && PT->getPointerElementType()->isFunctionTy(),
          "gc.statepoint callee must be of function pointer type", Call, Target);
-  FunctionType *TargetFuncType = cast(PT->getElementType());
+  FunctionType *TargetFuncType =
+      cast(PT->getPointerElementType());
 
   const int NumCallArgs = cast(Call.getArgOperand(3))->getZExtValue();
   Assert(NumCallArgs >= 0,
@@ -5005,7 +5007,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     // Assert that result type matches wrapped callee.
     const Value *Target = StatepointCall->getArgOperand(2);
     auto *PT = cast(Target->getType());
-    auto *TargetFuncType = cast(PT->getElementType());
+    auto *TargetFuncType = cast(PT->getPointerElementType());
     Assert(Call.getType() == TargetFuncType->getReturnType(),
            "gc.result result type does not match wrapped callee", Call);
     break;
@@ -5312,7 +5314,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       PointerType *Op0PtrTy =
           cast(Call.getArgOperand(0)->getType());
       if (!Op0PtrTy->isOpaque())
-        Op0ElemTy = Op0PtrTy->getElementType();
+        Op0ElemTy = Op0PtrTy->getNonOpaquePointerElementType();
       break;
     }
     case Intrinsic::matrix_column_major_store: {
@@ -5326,7 +5328,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       PointerType *Op1PtrTy =
           cast(Call.getArgOperand(1)->getType());
       if (!Op1PtrTy->isOpaque())
-        Op1ElemTy = Op1PtrTy->getElementType();
+        Op1ElemTy = Op1PtrTy->getNonOpaquePointerElementType();
       break;
     }
     default:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8f85c93e1d5f4..fd35ab2049e92 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11781,10 +11781,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::aarch64_ldxr: {
     PointerType *PtrTy = cast(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
     return true;
   }
@@ -11792,10 +11792,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::aarch64_stxr: {
     PointerType *PtrTy = cast(I.getArgOperand(1)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
     return true;
   }
@@ -11823,7 +11823,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::getVT(I.getType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
     return true;
   }
@@ -11833,7 +11833,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::getVT(I.getOperand(0)->getType());
     Info.ptrVal = I.getArgOperand(2);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
     return true;
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 699c6c4794554..f4d4d34b698c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -331,8 +331,8 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
   if (auto PtrTy = dyn_cast(Arg.getType())) {
     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
       // FIXME: Should report this for all address spaces
-      PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
-                                                   PtrTy->getElementType());
+      PointeeAlign = DL.getValueOrABITypeAlignment(
+          Arg.getParamAlign(), PtrTy->getPointerElementType());
     }
   }
 
@@ -732,8 +732,8 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
   // FIXME: Need to distinguish in memory alignment from pointer alignment.
   if (auto PtrTy = dyn_cast(Ty)) {
     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
-      PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
-                                                   PtrTy->getElementType());
+      PointeeAlign = DL.getValueOrABITypeAlignment(
+          Arg.getParamAlign(), PtrTy->getPointerElementType());
     }
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 3d578a9b891e3..1c6c63dd5b251 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -433,7 +433,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
 
     PointerType *ArgType = cast(Arg.getType());
 
-    auto *EltTy = ArgType->getElementType();
+    auto *EltTy = ArgType->getPointerElementType();
     const auto Align =
         DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index b525c1dc60389..fe4e6b24367a3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20789,10 +20789,10 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
     PointerType *PtrTy = cast(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
     return true;
   }
@@ -20801,10 +20801,10 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
     PointerType *PtrTy = cast(I.getArgOperand(1)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
     return true;
   }
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 0c510686a13bb..608be5160da7e 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -1366,7 +1366,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
 
     // Calculate symbol size
     const DataLayout &DL = Global.getParent()->getDataLayout();
-    uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
+    uint32_t Size =
+        DL.getTypeAllocSize(Global.getType()->getPointerElementType());
 
     DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
         Asm->getSymbol(&Global), Size);
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index bc64d9d30a4c2..6aca8d8078725 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -443,7 +443,7 @@ auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
   // we don't need to do pointer casts.
   auto *PtrTy = cast(Ptr->getType());
   if (!PtrTy->isOpaque()) {
-    Type *ElemTy = PtrTy->getElementType();
+    Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
     int ElemSize = HVC.getAllocSizeOf(ElemTy);
     if (Adjust % ElemSize == 0 && Adjust != 0) {
       Value *Tmp0 =
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 96ea35bd86cd8..3a59306c4998c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1454,7 +1454,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
 
           if (static_cast(TM).getDrvInterface() !=
               NVPTX::CUDA) {
-            Type *ETy = PTy->getElementType();
+            Type *ETy = PTy->getPointerElementType();
             int addrSpace = PTy->getAddressSpace();
             switch (addrSpace) {
             default:
@@ -1514,7 +1514,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
     // param has byVal attribute. So should be a pointer
     auto *PTy = dyn_cast(Ty);
     assert(PTy && "Param with byval attribute should be a pointer type");
-    Type *ETy = PTy->getElementType();
+    Type *ETy = PTy->getPointerElementType();
 
     if (isABI || isKernelFunc) {
       // Just print .param .align  .b8 .param[size];
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f2c8d17816b8f..eac237bb27bb2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1354,7 +1354,7 @@ std::string NVPTXTargetLowering::getPrototype(
     }
     auto *PTy = dyn_cast(Ty);
     assert(PTy && "Param with byval attribute should be a pointer type");
-    Type *ETy = PTy->getElementType();
+    Type *ETy = PTy->getPointerElementType();
 
     Align align = Outs[OIdx].Flags.getNonZeroByValAlign();
     unsigned sz = DL.getTypeAllocSize(ETy);
@@ -1577,7 +1577,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     SmallVector Offsets;
     auto *PTy = dyn_cast(Args[i].Ty);
     assert(PTy && "Type of a byval parameter should be pointer");
-    ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0);
+    ComputePTXValueVTs(*this, DL, PTy->getPointerElementType(), VTs, &Offsets,
+                       0);
 
     // declare .param .align  .b8 .param[];
     unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -2447,7 +2448,7 @@ static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
   if (!context)
     return false;
 
-  auto *STy = dyn_cast(PTy->getElementType());
+  auto *STy = dyn_cast(PTy->getPointerElementType());
   if (!STy || STy->isLiteral())
     return false;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index ddb7f097fe685..67aa49132016d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -233,7 +233,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
 
   assert(PType && "Expecting pointer type in handleByValParam");
 
-  Type *StructType = PType->getElementType();
+  Type *StructType = PType->getPointerElementType();
 
   auto IsALoadChain = [&](Value *Start) {
     SmallVector ValuesToCheck = {Start};
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c1bfc11824d27..105a43510369d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17894,7 +17894,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
   assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
          "Only support quadword now");
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-  Type *ValTy = cast(AlignedAddr->getType())->getElementType();
+  Type *ValTy = AlignedAddr->getType()->getPointerElementType();
   assert(ValTy->getPrimitiveSizeInBits() == 128);
   Function *RMW = Intrinsic::getDeclaration(
       M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
@@ -17919,7 +17919,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
   assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
          "Only support quadword now");
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-  Type *ValTy = cast(AlignedAddr->getType())->getElementType();
+  Type *ValTy = AlignedAddr->getType()->getPointerElementType();
   assert(ValTy->getPrimitiveSizeInBits() == 128);
   Function *IntCmpXchg =
       Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7e4e36a20d15..9b427703764ea 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1105,7 +1105,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::riscv_masked_cmpxchg_i32: {
     PointerType *PtrTy = cast(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.align = Align(4);
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 52b120031e3f0..6d6879bc94b38 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -826,7 +826,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
       // sret only allowed on first argument
       assert(Outs[realArgIdx].OrigArgIndex == 0);
       PointerType *Ty = cast(CLI.getArgs()[0].Ty);
-      Type *ElementTy = Ty->getElementType();
+      Type *ElementTy = Ty->getPointerElementType();
       SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy);
       continue;
     }
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 0a300ce855f07..92acfb93057a1 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -808,7 +808,7 @@ static StringRef solveTypeName(Type *Ty) {
 
   if (Ty->isPointerTy()) {
     auto *PtrTy = cast(Ty);
-    Type *PointeeTy = PtrTy->getElementType();
+    Type *PointeeTy = PtrTy->getPointerElementType();
     auto Name = solveTypeName(PointeeTy);
     if (Name == "UnknownType")
       return "PointerType";
@@ -2278,7 +2278,7 @@ static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
 
   auto ArgTy = cast(Arg.getType());
-  auto ValueTy = ArgTy->getElementType();
+  auto ValueTy = ArgTy->getPointerElementType();
 
   // Reduce to the alloca case:
 
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 3a42a2cac928b..ce3c5153bde27 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -196,8 +196,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote,
       for (const auto &ArgIndex : ArgIndices) {
         // not allowed to dereference ->begin() if size() is 0
         Params.push_back(GetElementPtrInst::getIndexedType(
-            cast(I->getType())->getElementType(),
-            ArgIndex.second));
+            I->getType()->getPointerElementType(), ArgIndex.second));
         ArgAttrVec.push_back(AttributeSet());
         assert(Params.back());
       }
@@ -298,7 +297,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote,
               Ops.push_back(ConstantInt::get(IdxTy, II));
               // Keep track of the type we're currently indexing.
               if (auto *ElPTy = dyn_cast(ElTy))
-                ElTy = ElPTy->getElementType();
+                ElTy = ElPTy->getPointerElementType();
               else
                 ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II);
             }
@@ -928,7 +927,7 @@ promoteArguments(Function *F, function_ref AARGetter,
   SmallPtrSet ArgsToPromote;
   SmallPtrSet ByValArgsToTransform;
   for (Argument *PtrArg : PointerArgs) {
-    Type *AgTy = cast(PtrArg->getType())->getElementType();
+    Type *AgTy = PtrArg->getType()->getPointerElementType();
 
     // Replace sret attribute with noalias. This reduces register pressure by
     // avoiding a register copy.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3a3f169d2f516..bfe09da91f53b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2520,7 +2520,7 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call,
   if (!Call.isByValArgument(ix))
     return false;
 
-  Type *SrcElemTy = SrcTy->getElementType();
+  Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
   Type *DstElemTy = Call.getParamByValType(ix);
   if (!SrcElemTy->isSized() || !DstElemTy->isSized())
     return false;
@@ -2785,7 +2785,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
           Call.removeParamAttr(ix, Attribute::ByVal);
           Call.addParamAttr(
               ix, Attribute::getWithByValType(
-                      Call.getContext(), NewTy->getElementType()));
+                      Call.getContext(), NewTy->getPointerElementType()));
         }
         Changed = true;
       }
@@ -3034,12 +3034,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
     // sized type and the sized type has to have the same size as the old type.
     if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
       PointerType *ParamPTy = dyn_cast(ParamTy);
-      if (!ParamPTy || !ParamPTy->getElementType()->isSized())
+      if (!ParamPTy || !ParamPTy->getPointerElementType()->isSized())
         return false;
 
       Type *CurElTy = Call.getParamByValType(i);
       if (DL.getTypeAllocSize(CurElTy) !=
-          DL.getTypeAllocSize(ParamPTy->getElementType()))
+          DL.getTypeAllocSize(ParamPTy->getPointerElementType()))
         return false;
     }
   }
@@ -3053,16 +3053,16 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
     // call.  We don't want to introduce a varargs call where one doesn't
     // already exist.
     PointerType *APTy = cast(Call.getCalledOperand()->getType());
-    if (FT->isVarArg()!=cast(APTy->getElementType())->isVarArg())
+    if (FT->isVarArg()!=cast(APTy->getPointerElementType())->isVarArg())
       return false;
 
     // If both the callee and the cast type are varargs, we still have to make
     // sure the number of fixed parameters are the same or we have the same
     // ABI issues as if we introduce a varargs call.
     if (FT->isVarArg() &&
-        cast(APTy->getElementType())->isVarArg() &&
+        cast(APTy->getPointerElementType())->isVarArg() &&
         FT->getNumParams() !=
-        cast(APTy->getElementType())->getNumParams())
+        cast(APTy->getPointerElementType())->getNumParams())
       return false;
   }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 8df4a4529f472..10a7c1b406a57 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -91,7 +91,7 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
 
   // Get the type really allocated and the type casted to.
   Type *AllocElTy = AI.getAllocatedType();
-  Type *CastElTy = PTy->getElementType();
+  Type *CastElTy = PTy->getPointerElementType();
   if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
 
   // This optimisation does not work for cases where the cast type
@@ -2649,8 +2649,8 @@ static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
   if (SrcPTy->isOpaque() || DstPTy->isOpaque())
     return nullptr;
 
-  Type *DstElTy = DstPTy->getElementType();
-  Type *SrcElTy = SrcPTy->getElementType();
+  Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
+  Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
 
   // When the type pointed to is not sized the cast cannot be
   // turned into a gep.
@@ -2669,8 +2669,8 @@ static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
   // If we found a path from the src to dest, create the getelementptr now.
   if (SrcElTy == DstElTy) {
     SmallVector Idxs(NumZeros + 1, Builder.getInt32(0));
-    GetElementPtrInst *GEP =
-        GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs);
+    GetElementPtrInst *GEP = GetElementPtrInst::Create(
+        SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
 
     // If the source pointer is dereferenceable, then assume it points to an
     // allocated object and apply "inbounds" to the GEP.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 0dbfdba353c4d..dad5c2ca6a50c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -345,7 +345,8 @@ void PointerReplacer::replacePointer(Instruction &I, Value *V) {
 #ifndef NDEBUG
   auto *PT = cast(I.getType());
   auto *NT = cast(V->getType());
-  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
+  assert(PT != NT &&
+         PT->getPointerElementType() == NT->getPointerElementType() &&
          "Invalid usage");
 #endif
   WorkMap[&I] = V;
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 32f6a980afa8d..5de1d6fa4567a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1372,7 +1372,7 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
 Type *
 InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
                                       SmallVectorImpl &NewIndices) {
-  Type *Ty = PtrTy->getElementType();
+  Type *Ty = PtrTy->getPointerElementType();
   if (!Ty->isSized())
     return nullptr;
 
@@ -2311,7 +2311,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // type. For now, skip these.
   if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
     bool HasZeroPointerIndex = false;
-    Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
+    Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
 
     if (auto *C = dyn_cast(GEP.getOperand(1)))
       HasZeroPointerIndex = C->isZero();
@@ -2498,7 +2498,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   if (auto *BCI = dyn_cast(ASCStrippedPtrOp)) {
     Value *SrcOp = BCI->getOperand(0);
     PointerType *SrcType = cast(BCI->getSrcTy());
-    Type *SrcEltType = SrcType->getElementType();
+    Type *SrcEltType = SrcType->getPointerElementType();
 
     // GEP directly using the source operand if this GEP is accessing an element
     // of a bitcasted pointer to vector or array of the same dimensions:
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index bd2dc8d639fc1..3bf2d16e7d209 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1559,7 +1559,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
       auto BasePtr = CI->getOperand(OpOffset);
       if (ignoreAccess(LI, BasePtr))
         return;
-      auto Ty = cast(BasePtr->getType())->getElementType();
+      auto Ty = BasePtr->getType()->getPointerElementType();
       MaybeAlign Alignment = Align(1);
       // Otherwise no alignment guarantees. We probably got Undef.
       if (auto *Op = dyn_cast(CI->getOperand(1 + OpOffset)))
@@ -1656,8 +1656,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
                                         unsigned Granularity, uint32_t TypeSize,
                                         bool IsWrite, Value *SizeArgument,
                                         bool UseCalls, uint32_t Exp) {
-  auto *VTy = cast(
-      cast(Addr->getType())->getElementType());
+  auto *VTy = cast(Addr->getType()->getPointerElementType());
   uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
   unsigned Num = VTy->getNumElements();
   auto Zero = ConstantInt::get(IntptrTy, 0);
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 727672fa0605d..92ea007691b27 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -384,7 +384,7 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
       }
 
       auto *BasePtr = CI->getOperand(0 + OpOffset);
-      auto *Ty = cast(BasePtr->getType())->getElementType();
+      auto *Ty = BasePtr->getType()->getPointerElementType();
       Access.TypeSize = DL.getTypeStoreSizeInBits(Ty);
       if (auto *AlignmentConstant =
               dyn_cast(CI->getOperand(1 + OpOffset)))
@@ -419,8 +419,7 @@ void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
                                               Instruction *I, Value *Addr,
                                               unsigned Alignment,
                                               uint32_t TypeSize, bool IsWrite) {
-  auto *VTy = cast(
-      cast(Addr->getType())->getElementType());
+  auto *VTy = cast(Addr->getType()->getPointerElementType());
   uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
   unsigned Num = VTy->getNumElements();
   auto *Zero = ConstantInt::get(IntptrTy, 0);
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index da8ee1f15bf8e..387ea5243265d 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -918,7 +918,7 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
 void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
     Function &, ArrayRef Loads, ArrayRef Stores) {
   auto CallbackIdx = [&](const Value *Ptr) -> int {
-    auto ElementTy = cast(Ptr->getType())->getElementType();
+    auto *ElementTy = Ptr->getType()->getPointerElementType();
     uint64_t TypeSize = DL->getTypeStoreSizeInBits(ElementTy);
     return TypeSize == 8     ? 0
            : TypeSize == 16  ? 1
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index d07fa1d118e42..caad91867112c 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -980,7 +980,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
     if (IsNullOrUndef(CI->getArgOperand(0))) {
       Changed = true;
       Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(cast(Ty)->getElementType()),
+      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
                     Constant::getNullValue(Ty), CI);
       Value *NewValue = UndefValue::get(CI->getType());
       LLVM_DEBUG(
@@ -1000,7 +1000,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
         IsNullOrUndef(CI->getArgOperand(1))) {
       Changed = true;
       Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(cast(Ty)->getElementType()),
+      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
                     Constant::getNullValue(Ty), CI);
 
       Value *NewValue = UndefValue::get(CI->getType());
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 2ed87ce6295b3..35497ae5ed9af 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1513,7 +1513,7 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
   if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
     return nullptr;
 
-  Type *ElementTy = Ty->getElementType();
+  Type *ElementTy = Ty->getNonOpaquePointerElementType();
   if (!ElementTy->isSized())
     return nullptr; // We can't GEP through an unsized element.
 
@@ -1572,7 +1572,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
   APInt Int8PtrOffset(Offset.getBitWidth(), 0);
 
   PointerType *TargetPtrTy = cast(PointerTy);
-  Type *TargetTy = TargetPtrTy->getElementType();
+  Type *TargetTy = TargetPtrTy->getNonOpaquePointerElementType();
 
   // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
   // address space from the expected `PointerTy` (the pointer to be used).
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 6b7419abe1d1f..3606c8a4b073f 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -270,7 +270,7 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
   Type *Ty = V->getType();
   PtrTy = dyn_cast(Ty);
   if (PtrTy)
-    Ty = PtrTy->getElementType();
+    Ty = PtrTy->getPointerElementType();
   Size = cast(Ty)->getNumElements();
   if (!CachePtr)
     Tmp.resize(Size, nullptr);
@@ -288,7 +288,8 @@ Value *Scatterer::operator[](unsigned I) {
     return CV[I];
   IRBuilder<> Builder(BB, BBI);
   if (PtrTy) {
-    Type *ElTy = cast(PtrTy->getElementType())->getElementType();
+    Type *ElTy =
+        cast(PtrTy->getPointerElementType())->getElementType();
     if (!CV[0]) {
       Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
       CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index fdc914a72bfd7..b8b0bbbc7a4ed 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -28,7 +28,7 @@ static bool isCString(const Value *Arg) {
   if (!PtrTy)
     return false;
 
-  auto IntTy = dyn_cast(PtrTy->getElementType());
+  auto IntTy = dyn_cast(PtrTy->getPointerElementType());
   if (!IntTy)
     return false;
 
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 8dc4702993c35..3d75dd57456de 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -297,7 +297,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
   Function *F = OrigBB->getParent();
   const DataLayout &DL = F->getParent()->getDataLayout();
 
-  Type *EltTy = cast(SrcAddr->getType())->getElementType();
+  Type *EltTy = SrcAddr->getType()->getPointerElementType();
 
   // Create the a comparison of src and dst, based on which we jump to either
   // the forward-copy part of the function (if src >= dst) or the backwards-copy
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index fe8215d3b9916..c6044f8fdffd7 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -173,7 +173,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
     auto *PtrTy = cast(Ty);
     if (DL.isNonIntegralPointerType(PtrTy)) {
       auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
-      assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 &&
+      assert(DL.getTypeAllocSize(Int8PtrTy->getPointerElementType()) == 1 &&
              "alloc size of i8 must by 1 byte for the GEP to be correct");
       auto *GEP = Builder.CreateGEP(
           Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
@@ -471,7 +471,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // indexes into the array implied by the pointer operand; the rest of
     // the indices index into the element or field type selected by the
     // preceding index.
-    Type *ElTy = PTy->getElementType();
+    Type *ElTy = PTy->getNonOpaquePointerElementType();
     for (;;) {
       // If the scale size is not 0, attempt to factor out a scale for
       // array indexing.
@@ -640,8 +640,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     Value *Casted = V;
     if (V->getType() != PTy)
       Casted = InsertNoopCastOfTo(Casted, PTy);
-    Value *GEP = Builder.CreateGEP(PTy->getElementType(), Casted, GepIndices,
-                                   "scevgep");
+    Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(),
+                                   Casted, GepIndices, "scevgep");
     Ops.push_back(SE.getUnknown(GEP));
   }
 
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index bb11c18b57fa4..941b529da9b2d 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -346,8 +346,7 @@ struct LoadModifier: public Modifier {
   void Act() override {
     // Try to use predefined pointers. If non-exist, use undef pointer value;
     Value *Ptr = getRandomPointerValue();
-    PointerType *Tp = cast(Ptr->getType());
-    Value *V = new LoadInst(Tp->getElementType(), Ptr, "L",
+    Value *V = new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L",
                             BB->getTerminator());
     PT->push_back(V);
   }
@@ -360,8 +359,7 @@ struct StoreModifier: public Modifier {
   void Act() override {
     // Try to use predefined pointers. If non-exist, use undef pointer value;
     Value *Ptr = getRandomPointerValue();
-    PointerType *Tp = cast(Ptr->getType());
-    Value *Val = getRandomValue(Tp->getElementType());
+    Value *Val = getRandomValue(Ptr->getType()->getPointerElementType());
     Type  *ValTy = Val->getType();
 
     // Do not store vectors of i1s because they are unsupported

From 22487280dcea8261996385b852ca4470b8e4846b Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 10:07:45 +0100
Subject: [PATCH 514/946] [NFC] Remove more uses of
 PointerType::getElementType() (NFC)

Replace more uses which I missed in the first pass with
Type::getPointerElementType().
---
 llvm/unittests/AsmParser/AsmParserTest.cpp     | 12 ++++++------
 .../Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp | 18 ++++++++----------
 mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp        |  4 ++--
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 9a7d70ad1ed02..7639ff50571e6 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -252,7 +252,7 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PointerType *PT = cast(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
@@ -262,11 +262,11 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
@@ -386,7 +386,7 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 4);
 
   PointerType *PT = cast(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
@@ -397,11 +397,11 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 5);
 
   PT = cast(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
index e4932e84cd28a..143630ce53ae0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
@@ -280,11 +280,10 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder,
     if (auto attr = op.getAttrOfType("callee"))
       return builder.CreateCall(
           moduleTranslation.lookupFunction(attr.getValue()), operandsRef);
-    auto *calleePtrType =
-        cast(operandsRef.front()->getType());
-    auto *calleeType =
-        cast(calleePtrType->getElementType());
-    return builder.CreateCall(calleeType, operandsRef.front(),
+    auto *calleeType = operandsRef.front()->getType();
+    auto *calleeFunctionType =
+        cast(calleeType->getPointerElementType());
+    return builder.CreateCall(calleeFunctionType, operandsRef.front(),
                               operandsRef.drop_front());
   };
 
@@ -349,12 +348,11 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder,
           moduleTranslation.lookupBlock(invOp.getSuccessor(0)),
           moduleTranslation.lookupBlock(invOp.getSuccessor(1)), operandsRef);
     } else {
-      auto *calleePtrType =
-          cast(operandsRef.front()->getType());
-      auto *calleeType =
-          cast(calleePtrType->getElementType());
+      auto *calleeType = operandsRef.front()->getType();
+      auto *calleeFunctionType =
+          cast(calleeType->getPointerElementType());
       result = builder.CreateInvoke(
-          calleeType, operandsRef.front(),
+          calleeFunctionType, operandsRef.front(),
           moduleTranslation.lookupBlock(invOp.getSuccessor(0)),
           moduleTranslation.lookupBlock(invOp.getSuccessor(1)),
           operandsRef.drop_front());
diff --git a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp
index 4efb5332cced5..210ff9e4ebd05 100644
--- a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp
+++ b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp
@@ -95,8 +95,8 @@ class TypeFromLLVMIRTranslatorImpl {
 
   /// Translates the given pointer type.
   Type translate(llvm::PointerType *type) {
-    return LLVM::LLVMPointerType::get(translateType(type->getElementType()),
-                                      type->getAddressSpace());
+    return LLVM::LLVMPointerType::get(
+        translateType(type->getPointerElementType()), type->getAddressSpace());
   }
 
   /// Translates the given structure type.

From 184591aeeb5a531f2315c3d7cddcd199c87ecb2c Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 09:57:26 +0100
Subject: [PATCH 515/946] [OpaquePtrs] Deprecate PointerType::getElementType()

This deprecates PointerType::getElementType() in favor of
Type::getPointerElementType(). The motivation is to make it more
apparent when code accesses the pointer element type, because
getElementType() may also also refer to at least
ArrayType::getElementType() and VectorType::getElementType().

Differential Revision: https://reviews.llvm.org/D117885
---
 llvm/include/llvm/IR/DerivedTypes.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index 8a1b26e699e32..f52ce3cde318d 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -667,9 +667,11 @@ class PointerType : public Type {
                                              unsigned AddressSpace) {
     if (PT->isOpaque())
       return get(PT->getContext(), AddressSpace);
-    return get(PT->getElementType(), AddressSpace);
+    return get(PT->PointeeTy, AddressSpace);
   }
 
+  [[deprecated("Pointer element types are deprecated. You can *temporarily* "
+               "use Type::getPointerElementType() instead")]]
   Type *getElementType() const {
     assert(!isOpaque() && "Attempting to get element type of opaque pointer");
     return PointeeTy;

From 13252160c3984b52a210bfa6ec64b9be4c911920 Mon Sep 17 00:00:00 2001
From: David Sherwood 
Date: Fri, 21 Jan 2022 09:56:49 +0000
Subject: [PATCH 516/946] [NFC] Move useSVEForFixedLengthVectors into
 AArch64Subtarget.h

Given how small the function is and how often it gets used it
makes more sense to live in the header file.

Differential Revision: https://reviews.llvm.org/D117883
---
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 5 -----
 llvm/lib/Target/AArch64/AArch64Subtarget.h   | 5 ++++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index f4d046078d68e..a4f4b85821822 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -373,9 +373,4 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
     MFI.computeMaxCallFrameSize(MF);
 }
 
-bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
-  // Prefer NEON unless larger SVE registers are available.
-  return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
-}
-
 bool AArch64Subtarget::useAA() const { return UseAA; }
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5057dbec653e1..3e3c0f6aba15c 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -680,7 +680,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
     return MinSVEVectorSizeInBits;
   }
 
-  bool useSVEForFixedLengthVectors() const;
+  bool useSVEForFixedLengthVectors() const {
+    // Prefer NEON unless larger SVE registers are available.
+    return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
+  }
 
   unsigned getVScaleForTuning() const { return VScaleForTuning; }
 };

From 2233befa5dc4c2c0ed597b295cfa353039c21e23 Mon Sep 17 00:00:00 2001
From: Victor Perez 
Date: Tue, 25 Jan 2022 10:05:50 +0000
Subject: [PATCH 517/946] [LegalizeTypes][VP] Add splitting support for
 vp.gather and vp.scatter

Split these nodes in a similar way as their masked versions.

Reviewed By: frasercrmck, craig.topper

Differential Revision: https://reviews.llvm.org/D117760
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  10 +-
 .../SelectionDAG/LegalizeVectorTypes.cpp      | 266 +++---
 .../RISCV/rvv/fixed-vectors-vpgather.ll       | 801 ++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-vpscatter.ll      | 471 +++++++++-
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 362 ++++++++
 .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll     | 368 +++++++-
 6 files changed, 2137 insertions(+), 141 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 0dccb6f3f6ac9..4d8daa82d8c02 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -821,6 +821,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   /// Split mask operator of a VP intrinsic.
   std::pair SplitMask(SDValue Mask);
 
+  /// Split mask operator of a VP intrinsic in a given location.
+  std::pair SplitMask(SDValue Mask, const SDLoc &DL);
+
   // Helper function for incrementing the pointer when splitting
   // memory operations
   void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
@@ -851,7 +854,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
+                          bool SplitSETCC = false);
   void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -879,8 +883,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
-  SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
-  SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);
+  SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 2d59a057f2031..f8f3d2ce8e45d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -944,7 +944,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SplitVecRes_MLOAD(cast(N), Lo, Hi);
     break;
   case ISD::MGATHER:
-    SplitVecRes_MGATHER(cast(N), Lo, Hi);
+  case ISD::VP_GATHER:
+    SplitVecRes_Gather(cast(N), Lo, Hi, /*SplitSETCC*/ true);
     break;
   case ISD::SETCC:
     SplitVecRes_SETCC(N, Lo, Hi);
@@ -1118,12 +1119,17 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 }
 
 std::pair DAGTypeLegalizer::SplitMask(SDValue Mask) {
+  return SplitMask(Mask, SDLoc(Mask));
+}
+
+std::pair DAGTypeLegalizer::SplitMask(SDValue Mask,
+                                                        const SDLoc &DL) {
   SDValue MaskLo, MaskHi;
   EVT MaskVT = Mask.getValueType();
   if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
     GetSplitVector(Mask, MaskLo, MaskHi);
   else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
+    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
   return std::make_pair(MaskLo, MaskHi);
 }
 
@@ -1923,61 +1929,85 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
 
 }
 
-void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
-                                         SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
+                                          SDValue &Hi, bool SplitSETCC) {
   EVT LoVT, HiVT;
-  SDLoc dl(MGT);
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
-
-  SDValue Ch = MGT->getChain();
-  SDValue Ptr = MGT->getBasePtr();
-  SDValue Mask = MGT->getMask();
-  SDValue PassThru = MGT->getPassThru();
-  SDValue Index = MGT->getIndex();
-  SDValue Scale = MGT->getScale();
-  EVT MemoryVT = MGT->getMemoryVT();
-  Align Alignment = MGT->getOriginalAlign();
-  ISD::LoadExtType ExtType = MGT->getExtensionType();
+  SDLoc dl(N);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+  SDValue Ch = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  struct Operands {
+    SDValue Mask;
+    SDValue Index;
+    SDValue Scale;
+  } Ops = [&]() -> Operands {
+    if (auto *MSC = dyn_cast(N)) {
+      return {MSC->getMask(), MSC->getIndex(), MSC->getScale()};
+    }
+    auto *VPSC = cast(N);
+    return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()};
+  }();
+
+  EVT MemoryVT = N->getMemoryVT();
+  Align Alignment = N->getOriginalAlign();
 
   // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (Mask.getOpcode() == ISD::SETCC) {
-    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
   } else {
-    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-      GetSplitVector(Mask, MaskLo, MaskHi);
-    else
-      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+    std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl);
   }
 
   EVT LoMemVT, HiMemVT;
   // Split MemoryVT
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
-  SDValue PassThruLo, PassThruHi;
-  if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(PassThru, PassThruLo, PassThruHi);
-  else
-    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
-
   SDValue IndexHi, IndexLo;
-  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Index, IndexLo, IndexHi);
+  if (getTypeAction(Ops.Index.getValueType()) ==
+      TargetLowering::TypeSplitVector)
+    GetSplitVector(Ops.Index, IndexLo, IndexHi);
   else
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MGT->getPointerInfo(), MachineMemOperand::MOLoad,
-      MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
-      MGT->getRanges());
+      N->getPointerInfo(), MachineMemOperand::MOLoad,
+      MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
 
-  SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
-  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
-                           MMO, MGT->getIndexType(), ExtType);
+  if (auto *MGT = dyn_cast(N)) {
+    SDValue PassThru = MGT->getPassThru();
+    SDValue PassThruLo, PassThruHi;
+    if (getTypeAction(PassThru.getValueType()) ==
+        TargetLowering::TypeSplitVector)
+      GetSplitVector(PassThru, PassThruLo, PassThruHi);
+    else
+      std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
 
-  SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
-  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
-                           MMO, MGT->getIndexType(), ExtType);
+    ISD::LoadExtType ExtType = MGT->getExtensionType();
+    ISD::MemIndexType IndexTy = MGT->getIndexType();
+
+    SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+    Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
+                             OpsLo, MMO, IndexTy, ExtType);
+
+    SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+    Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
+                             OpsHi, MMO, IndexTy, ExtType);
+  } else {
+    auto *VPGT = cast(N);
+    SDValue EVLLo, EVLHi;
+    std::tie(EVLLo, EVLHi) =
+        DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl);
+
+    SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+    Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
+                         MMO, VPGT->getIndexType());
+
+    SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+    Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
+                         MMO, VPGT->getIndexType());
+  }
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -1986,10 +2016,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
 
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
-  ReplaceValueWith(SDValue(MGT, 1), Ch);
+  ReplaceValueWith(SDValue(N, 1), Ch);
 }
 
-
 void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
@@ -2286,10 +2315,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     Res = SplitVecOp_MSTORE(cast(N), OpNo);
     break;
   case ISD::MSCATTER:
-    Res = SplitVecOp_MSCATTER(cast(N), OpNo);
+  case ISD::VP_SCATTER:
+    Res = SplitVecOp_Scatter(cast(N), OpNo);
     break;
   case ISD::MGATHER:
-    Res = SplitVecOp_MGATHER(cast(N), OpNo);
+  case ISD::VP_GATHER:
+    Res = SplitVecOp_Gather(cast(N), OpNo);
     break;
   case ISD::VSELECT:
     Res = SplitVecOp_VSELECT(N, OpNo);
@@ -2663,69 +2694,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
-                                             unsigned OpNo) {
-  EVT LoVT, HiVT;
-  SDLoc dl(MGT);
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
-
-  SDValue Ch = MGT->getChain();
-  SDValue Ptr = MGT->getBasePtr();
-  SDValue Index = MGT->getIndex();
-  SDValue Scale = MGT->getScale();
-  SDValue Mask = MGT->getMask();
-  SDValue PassThru = MGT->getPassThru();
-  Align Alignment = MGT->getOriginalAlign();
-  ISD::LoadExtType ExtType = MGT->getExtensionType();
-
-  SDValue MaskLo, MaskHi;
-  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-    // Split Mask operand
-    GetSplitVector(Mask, MaskLo, MaskHi);
-  else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
-
-  EVT MemoryVT = MGT->getMemoryVT();
-  EVT LoMemVT, HiMemVT;
-  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
-  SDValue PassThruLo, PassThruHi;
-  if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(PassThru, PassThruLo, PassThruHi);
-  else
-    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
-
-  SDValue IndexHi, IndexLo;
-  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Index, IndexLo, IndexHi);
-  else
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
-
-  MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MGT->getPointerInfo(), MachineMemOperand::MOLoad,
-      MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
-      MGT->getRanges());
-
-  SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
-  SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
-                                   OpsLo, MMO, MGT->getIndexType(), ExtType);
-
-  SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
-  SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
-                                   OpsHi, MMO, MGT->getIndexType(), ExtType);
-
-  // Build a factor node to remember that this load is independent of the
-  // other one.
-  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
-                   Hi.getValue(1));
-
-  // Legalize the chain result - switch anything that used the old chain to
-  // use the new one.
-  ReplaceValueWith(SDValue(MGT, 1), Ch);
+SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) {
+  (void)OpNo;
+  SDValue Lo, Hi;
+  SplitVecRes_Gather(N, Lo, Hi);
 
-  SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
-                            Hi);
-  ReplaceValueWith(SDValue(MGT, 0), Res);
+  SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi);
+  ReplaceValueWith(SDValue(N, 0), Res);
   return SDValue();
 }
 
@@ -2886,64 +2861,87 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
   return Res;
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
-                                              unsigned OpNo) {
-  SDValue Ch  = N->getChain();
+SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
+  SDValue Ch = N->getChain();
   SDValue Ptr = N->getBasePtr();
-  SDValue Mask = N->getMask();
-  SDValue Index = N->getIndex();
-  SDValue Scale = N->getScale();
-  SDValue Data = N->getValue();
   EVT MemoryVT = N->getMemoryVT();
   Align Alignment = N->getOriginalAlign();
   SDLoc DL(N);
-
+  struct Operands {
+    SDValue Mask;
+    SDValue Index;
+    SDValue Scale;
+    SDValue Data;
+  } Ops = [&]() -> Operands {
+    if (auto *MSC = dyn_cast(N)) {
+      return {MSC->getMask(), MSC->getIndex(), MSC->getScale(),
+              MSC->getValue()};
+    }
+    auto *VPSC = cast(N);
+    return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(),
+            VPSC->getValue()};
+  }();
   // Split all operands
 
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   SDValue DataLo, DataHi;
-  if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+  if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector)
     // Split Data operand
-    GetSplitVector(Data, DataLo, DataHi);
+    GetSplitVector(Ops.Data, DataLo, DataHi);
   else
-    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+    std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL);
 
   // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
-    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
   } else {
-    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-      GetSplitVector(Mask, MaskLo, MaskHi);
-    else
-      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+    std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL);
   }
 
   SDValue IndexHi, IndexLo;
-  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Index, IndexLo, IndexHi);
+  if (getTypeAction(Ops.Index.getValueType()) ==
+      TargetLowering::TypeSplitVector)
+    GetSplitVector(Ops.Index, IndexLo, IndexHi);
   else
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
 
   SDValue Lo;
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
       N->getPointerInfo(), MachineMemOperand::MOStore,
       MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
 
-  SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
-  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT,
-                            DL, OpsLo, MMO, N->getIndexType(),
-                            N->isTruncatingStore());
+  if (auto *MSC = dyn_cast(N)) {
+    SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+    Lo =
+        DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+                             MSC->getIndexType(), MSC->isTruncatingStore());
+
+    // The order of the Scatter operation after split is well defined. The "Hi"
+    // part comes after the "Lo". So these two operations should be chained one
+    // after another.
+    SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+    return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi,
+                                MMO, MSC->getIndexType(),
+                                MSC->isTruncatingStore());
+  }
+  auto *VPSC = cast(N);
+  SDValue EVLLo, EVLHi;
+  std::tie(EVLLo, EVLHi) =
+      DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL);
+
+  SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+  Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+                        VPSC->getIndexType());
 
   // The order of the Scatter operation after split is well defined. The "Hi"
   // part comes after the "Lo". So these two operations should be chained one
   // after another.
-  SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
-  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT,
-                              DL, OpsHi, MMO, N->getIndexType(),
-                              N->isTruncatingStore());
+  SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+  return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO,
+                          VPSC->getIndexType());
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index 7584a1507e706..bfc52bbce7242 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -271,6 +271,54 @@ define <8 x i8> @vpgather_baseidx_v8i8(i8* %base, <8 x i8> %idxs, <8 x i1> %m, i
   ret <8 x i8> %v
 }
 
+declare <32 x i8> @llvm.vp.gather.v32i8.v32p0i8(<32 x i8*>, <32 x i1>, i32)
+
+define <32 x i8> @vpgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v16, v8
+; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB13_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB13_2:
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v16, v12
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
+; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB13_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB13_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v16, v8
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
+; RV64-NEXT:    vslideup.vi v8, v12, 16
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
+  %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x i8> %v
+}
+
 declare <2 x i16> @llvm.vp.gather.v2i16.v2p0i16(<2 x i16*>, <2 x i1>, i32)
 
 define <2 x i16> @vpgather_v2i16(<2 x i16*> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
@@ -1870,3 +1918,756 @@ define <8 x double> @vpgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x
   %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0f64(<8 x double*> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
+
+declare <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*>, <32 x i1>, i32)
+
+define <32 x double> @vpgather_v32f64(<32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a2, a0, -16
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    bltu a0, a2, .LBB86_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB86_2:
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (zero), v24, v0.t
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:    bltu a0, a1, .LBB86_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:  .LBB86_4:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a2, a0, -16
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    bltu a0, a2, .LBB86_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB86_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v24, 2
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (zero), v16, v0.t
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    bltu a0, a1, .LBB86_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:  .LBB86_4:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
+; RV64-NEXT:    ret
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i8_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a3, .LBB87_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB87_2:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v16, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB87_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB87_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 16
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB87_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB87_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v16, v12
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB87_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB87_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v8
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i8> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV32-NEXT:    vslidedown.vi v12, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vsext.vf8 v16, v12
+; RV32-NEXT:    bltu a1, a3, .LBB88_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB88_2:
+; RV32-NEXT:    vsext.vf8 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v10, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v12, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB88_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB88_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vsext.vf8 v16, v12
+; RV64-NEXT:    bltu a1, a3, .LBB88_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB88_2:
+; RV64-NEXT:    vsext.vf8 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB88_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB88_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i8> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV32-NEXT:    vslidedown.vi v12, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vzext.vf8 v16, v12
+; RV32-NEXT:    bltu a1, a3, .LBB89_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB89_2:
+; RV32-NEXT:    vzext.vf8 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v10, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v12, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB89_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB89_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vzext.vf8 v16, v12
+; RV64-NEXT:    bltu a1, a3, .LBB89_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB89_2:
+; RV64-NEXT:    vzext.vf8 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB89_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB89_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i8> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i16_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a3, .LBB90_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB90_2:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf2 v16, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB90_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB90_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 16
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB90_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB90_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v12, 2
+; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV64-NEXT:    vslidedown.vi v16, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v16
+; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB90_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB90_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i16> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vsext.vf4 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB91_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB91_2:
+; RV32-NEXT:    vsext.vf4 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v12, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB91_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB91_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vsext.vf4 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB91_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB91_2:
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v12, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB91_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB91_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i16> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vzext.vf4 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB92_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB92_2:
+; RV32-NEXT:    vzext.vf4 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v12, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB92_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB92_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vzext.vf4 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB92_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB92_2:
+; RV64-NEXT:    vzext.vf4 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v12, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB92_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB92_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i16> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a3, .LBB93_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB93_2:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB93_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB93_4:
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 16
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB93_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB93_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v16, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf2 v24, v16
+; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB93_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB93_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf2 v24, v8
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i32> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vsext.vf2 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB94_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB94_2:
+; RV32-NEXT:    vsext.vf2 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v8, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v4, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB94_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB94_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vsext.vf2 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB94_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB94_2:
+; RV64-NEXT:    vsext.vf2 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB94_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB94_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vzext.vf2 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB95_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB95_2:
+; RV32-NEXT:    vzext.vf2 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v8, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v4, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB95_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB95_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vzext.vf2 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB95_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB95_2:
+; RV64-NEXT:    vzext.vf2 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB95_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB95_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vmv1r.v v24, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB96_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB96_2:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v24, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v28, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB96_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB96_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v28, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vluxei32.v v8, (a0), v28, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB96_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB96_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v24, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB96_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB96_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
index 1d5cad5d40212..5fe4ad0cfe635 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare void @llvm.vp.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, <2 x i1>, i32)
@@ -1716,3 +1716,470 @@ define void @vpscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64>
   call void @llvm.vp.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, <8 x i1> %m, i32 %evl)
   ret void
 }
+
+declare void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double>, <32 x double*>, <32 x i1>, i32)
+
+define void @vpscatter_v32f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a0)
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a0, .LBB79_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB79_2:
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    addi a2, a1, -16
+; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB79_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:  .LBB79_4:
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (zero), v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vle64.v v24, (a0)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:    bltu a2, a3, .LBB79_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB79_2:
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    vle64.v v16, (a0)
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    addi a0, a2, -16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
+; RV64-NEXT:    bltu a2, a0, .LBB79_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a0
+; RV64-NEXT:  .LBB79_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a1)
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:    bltu a2, a3, .LBB80_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB80_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    addi a1, a2, -16
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    bltu a2, a1, .LBB80_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB80_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a4, a3, 3
+; RV64-NEXT:    add a3, a4, a3
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV64-NEXT:    vle32.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB80_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:  .LBB80_2:
+; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8re8.v v0, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsext.vf2 v24, v0
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a2, -16
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vl1r.v v0, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    bltu a2, a3, .LBB80_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a3
+; RV64-NEXT:  .LBB80_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8re8.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vslidedown.vi v8, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf2 v24, v8
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i32> %idxs
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    vsext.vf2 v8, v24
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:    bltu a2, a3, .LBB81_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB81_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    addi a4, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vsext.vf2 v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli a4, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    addi a1, a2, -16
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vl8re8.v v8, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    bltu a2, a1, .LBB81_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB81_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV64-NEXT:    vle32.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v8, v24, 16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:    vsext.vf2 v8, v24
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:    bltu a2, a3, .LBB81_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB81_2:
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsext.vf2 v16, v24
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    addi a1, a2, -16
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    bltu a2, a1, .LBB81_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB81_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    vzext.vf2 v8, v24
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:    bltu a2, a3, .LBB82_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB82_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    addi a4, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vzext.vf2 v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli a4, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    addi a1, a2, -16
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vl8re8.v v8, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    bltu a2, a1, .LBB82_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB82_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV64-NEXT:    vle32.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v8, v24, 16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:    vzext.vf2 v8, v24
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:    bltu a2, a3, .LBB82_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB82_2:
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vzext.vf2 v16, v24
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    addi a1, a2, -16
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    bltu a2, a1, .LBB82_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB82_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index dc38a17e10a25..f71a2f86fee7d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -251,6 +251,107 @@ define  @vpgather_baseidx_nxv8i8(i8* %base,  %
   ret  %v
 }
 
+declare  @llvm.vp.gather.nxv32i8.nxv32p0i8(, , i32)
+
+define  @vpgather_baseidx_nxv32i8(i8* %base,  %idxs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_nxv32i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 2
+; RV32-NEXT:    vsetvli a4, zero, e8, mf2, ta, mu
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB12_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB12_2:
+; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v24, v10
+; RV32-NEXT:    vsetvli zero, a3, e8, m2, ta, mu
+; RV32-NEXT:    vluxei32.v v18, (a0), v24, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB12_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB12_4:
+; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v24, v8
+; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    vmv4r.v v8, v16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_nxv32i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli t0, a3, 1
+; RV64-NEXT:    sub a4, a1, t0
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li t1, 0
+; RV64-NEXT:    li a7, 0
+; RV64-NEXT:    bltu a1, a4, .LBB12_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a7, a4
+; RV64-NEXT:  .LBB12_2:
+; RV64-NEXT:    sub a4, a7, a3
+; RV64-NEXT:    mv a2, t1
+; RV64-NEXT:    bltu a7, a4, .LBB12_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a2, a4
+; RV64-NEXT:  .LBB12_4:
+; RV64-NEXT:    srli a4, a3, 2
+; RV64-NEXT:    vsetvli a5, zero, e8, mf2, ta, mu
+; RV64-NEXT:    vslidedown.vx v13, v12, a4
+; RV64-NEXT:    srli a6, a3, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vx v0, v13, a6
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v11
+; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
+; RV64-NEXT:    vluxei64.v v19, (a0), v24, v0.t
+; RV64-NEXT:    bltu a1, t0, .LBB12_6
+; RV64-NEXT:  # %bb.5:
+; RV64-NEXT:    mv a1, t0
+; RV64-NEXT:  .LBB12_6:
+; RV64-NEXT:    sub a2, a1, a3
+; RV64-NEXT:    bltu a1, a2, .LBB12_8
+; RV64-NEXT:  # %bb.7:
+; RV64-NEXT:    mv t1, a2
+; RV64-NEXT:  .LBB12_8:
+; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vx v0, v12, a6
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v9
+; RV64-NEXT:    vsetvli zero, t1, e8, m1, ta, mu
+; RV64-NEXT:    vluxei64.v v17, (a0), v24, v0.t
+; RV64-NEXT:    bltu a1, a3, .LBB12_10
+; RV64-NEXT:  # %bb.9:
+; RV64-NEXT:    mv a1, a3
+; RV64-NEXT:  .LBB12_10:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v8
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
+; RV64-NEXT:    bltu a7, a3, .LBB12_12
+; RV64-NEXT:  # %bb.11:
+; RV64-NEXT:    mv a7, a3
+; RV64-NEXT:  .LBB12_12:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v10
+; RV64-NEXT:    vsetvli zero, a7, e8, m1, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vluxei64.v v18, (a0), v24, v0.t
+; RV64-NEXT:    vmv4r.v v8, v16
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds i8, i8* %base,  %idxs
+  %v = call  @llvm.vp.gather.nxv32i8.nxv32p0i8( %ptrs,  %m, i32 %evl)
+  ret  %v
+}
+
 declare  @llvm.vp.gather.nxv1i16.nxv1p0i16(, , i32)
 
 define  @vpgather_nxv1i16( %ptrs,  %m, i32 zeroext %evl) {
@@ -2232,3 +2333,264 @@ define  @vpgather_baseidx_nxv8f64(double* %base,  @llvm.vp.gather.nxv8f64.nxv8p0f64( %ptrs,  %m, i32 %evl)
   ret  %v
 }
+
+declare  @llvm.vp.gather.nxv16f64.nxv16p0f64(, , i32)
+
+define  @vpgather_nxv16f64( %ptrs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v24, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    srli a4, a1, 3
+; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a3, a0, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a4
+; RV32-NEXT:    bltu a0, a3, .LBB102_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB102_2:
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (zero), v12, v0.t
+; RV32-NEXT:    bltu a0, a1, .LBB102_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:  .LBB102_4:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    srli a4, a1, 3
+; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a3, a0, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a4
+; RV64-NEXT:    bltu a0, a3, .LBB102_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB102_2:
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (zero), v16, v0.t
+; RV64-NEXT:    bltu a0, a1, .LBB102_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB102_4:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
+; RV64-NEXT:    ret
+  %v = call  @llvm.vp.gather.nxv16f64.nxv16p0f64( %ptrs,  %m, i32 %evl)
+  ret  %v
+}
+
+define  @vpgather_baseidx_nxv16i16_nxv16f64(double* %base,  %idxs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB103_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB103_2:
+; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf2 v16, v8
+; RV32-NEXT:    vsll.vi v24, v16, 3
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB103_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB103_4:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a5, a2, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a4, a1, a2
+; RV64-NEXT:    vslidedown.vx v0, v0, a5
+; RV64-NEXT:    bltu a1, a4, .LBB103_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a4
+; RV64-NEXT:  .LBB103_2:
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v16, v10
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    bltu a1, a2, .LBB103_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB103_4:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base,  %idxs
+  %v = call  @llvm.vp.gather.nxv16f64.nxv16p0f64( %ptrs,  %m, i32 %evl)
+  ret  %v
+}
+
+define  @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %base,  %idxs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v16, v10
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB104_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB104_2:
+; RV32-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v24, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB104_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB104_4:
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v16, v10
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a5, a2, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a4, a1, a2
+; RV64-NEXT:    vslidedown.vx v0, v0, a5
+; RV64-NEXT:    bltu a1, a4, .LBB104_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a4
+; RV64-NEXT:  .LBB104_2:
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    bltu a1, a2, .LBB104_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB104_4:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext  %idxs to 
+  %ptrs = getelementptr inbounds double, double* %base,  %eidxs
+  %v = call  @llvm.vp.gather.nxv16f64.nxv16p0f64( %ptrs,  %m, i32 %evl)
+  ret  %v
+}
+
+define  @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %base,  %idxs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vzext.vf4 v16, v10
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB105_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB105_2:
+; RV32-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV32-NEXT:    vzext.vf4 v24, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB105_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB105_4:
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vzext.vf4 v16, v10
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a5, a2, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a4, a1, a2
+; RV64-NEXT:    vslidedown.vx v0, v0, a5
+; RV64-NEXT:    bltu a1, a4, .LBB105_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a4
+; RV64-NEXT:  .LBB105_2:
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vzext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    bltu a1, a2, .LBB105_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB105_4:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext  %idxs to 
+  %ptrs = getelementptr inbounds double, double* %base,  %eidxs
+  %v = call  @llvm.vp.gather.nxv16f64.nxv16p0f64( %ptrs,  %m, i32 %evl)
+  ret  %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index 62a7244de1b3f..01be81fd1912d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare void @llvm.vp.scatter.nxv1i8.nxv1p0i8(, , , i32)
@@ -2071,3 +2071,367 @@ define void @vpscatter_baseidx_nxv8f64( %val, double* %base
   call void @llvm.vp.scatter.nxv8f64.nxv8p0f64( %val,  %ptrs,  %m, i32 %evl)
   ret void
 }
+
+declare void @llvm.vp.scatter.nxv16f64.nxv16p0f64(, , , i32)
+
+define void @vpscatter_nxv16f64( %val,  %ptrs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vl8re32.v v24, (a0)
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a0, .LBB95_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a0
+; RV32-NEXT:  .LBB95_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
+; RV32-NEXT:    srli a2, a0, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    vslidedown.vx v0, v0, a2
+; RV32-NEXT:    bltu a1, a0, .LBB95_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:  .LBB95_4:
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (zero), v28, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vl8re64.v v16, (a0)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a3, a1, 3
+; RV64-NEXT:    add a0, a0, a3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB95_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB95_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vl8re64.v v24, (a0)
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a0, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a0, .LBB95_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:  .LBB95_4:
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64( %val,  %ptrs,  %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, double* %base,  %idxs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vl4re16.v v4, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    bltu a2, a1, .LBB96_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB96_2:
+; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf2 v24, v4
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    srli a3, a1, 3
+; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a1, a2, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a3
+; RV32-NEXT:    bltu a2, a1, .LBB96_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:  .LBB96_4:
+; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vl4re16.v v4, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB96_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB96_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vsetvli a5, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v4
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a1, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a1, .LBB96_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a1
+; RV64-NEXT:  .LBB96_4:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v8, v6
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base,  %idxs
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64( %val,  %ptrs,  %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %val, double* %base,  %idxs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    vl4re16.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    vsext.vf4 v8, v24
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    bltu a2, a1, .LBB97_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB97_2:
+; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    vsext.vf4 v16, v26
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    srli a3, a1, 3
+; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a1, a2, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a3
+; RV32-NEXT:    bltu a2, a1, .LBB97_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:  .LBB97_4:
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    vl4re16.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    vsext.vf4 v8, v24
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB97_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB97_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vsext.vf4 v16, v26
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a1, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a1, .LBB97_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a1
+; RV64-NEXT:  .LBB97_4:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = sext  %idxs to 
+  %ptrs = getelementptr inbounds double, double* %base,  %eidxs
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64( %val,  %ptrs,  %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %val, double* %base,  %idxs,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    vl4re16.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    vzext.vf4 v8, v24
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    bltu a2, a1, .LBB98_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB98_2:
+; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    vzext.vf4 v16, v26
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    srli a3, a1, 3
+; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a1, a2, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a3
+; RV32-NEXT:    bltu a2, a1, .LBB98_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:  .LBB98_4:
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    vl4re16.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    vzext.vf4 v8, v24
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB98_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB98_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vzext.vf4 v16, v26
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a1, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a1, .LBB98_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a1
+; RV64-NEXT:  .LBB98_4:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = zext  %idxs to 
+  %ptrs = getelementptr inbounds double, double* %base,  %eidxs
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64( %val,  %ptrs,  %m, i32 %evl)
+  ret void
+}

From 48f763edb4718a2b2226d9654b96d4b69d66883e Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Fri, 21 Jan 2022 07:56:08 +0000
Subject: [PATCH 518/946] [docs] Refine the description in Set-Like and
 Map-Like container options.

In "Other Set-Like Container Options":
* Drops the references to C++ TR1 and SGI and hash_set.
* Drops the worry about portability (this was a problem with hash_set, but
std::unordered_set has worked portably since LLVM started depending
on C++11).

It is similar in "Other Map-Like Container Options" section.

Differential Revision: https://reviews.llvm.org/D117858
---
 llvm/docs/ProgrammersManual.rst | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 630893a3c11e4..8437e24c595b3 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -2198,10 +2198,9 @@ membership.
 Other Set-Like Container Options
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The STL provides several other options, such as std::multiset and the various
-"hash_set" like containers (whether from C++ TR1 or from the SGI library).  We
-never use hash_set and unordered_set because they are generally very expensive
-(each insertion requires a malloc) and very non-portable.
+The STL provides several other options, such as std::multiset and
+std::unordered_set.  We never use containers like unordered_set because
+they are generally very expensive (each insertion requires a malloc).
 
 std::multiset is useful if you're not interested in elimination of duplicates,
 but has all the drawbacks of :ref:`std::set `.  A sorted vector
@@ -2389,10 +2388,9 @@ operations is logarithmic in the size of the original map.
 Other Map-Like Container Options
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The STL provides several other options, such as std::multimap and the various
-"hash_map" like containers (whether from C++ TR1 or from the SGI library).  We
-never use hash_set and unordered_set because they are generally very expensive
-(each insertion requires a malloc) and very non-portable.
+The STL provides several other options, such as std::multimap and
+std::unordered_map.  We never use containers like unordered_map because
+they are generally very expensive (each insertion requires a malloc).
 
 std::multimap is useful if you want to map a key to multiple values, but has all
 the drawbacks of std::map.  A sorted vector or some other approach is almost

From 901dd53cbf6130cb157b0bae20141b5a01a8b903 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Fri, 21 Jan 2022 01:46:34 +0000
Subject: [PATCH 519/946] [docs] There are more than three bit storage
 containers.

To avoid listing all the bit containers in the title and do not use the
specific number for the number of bit containers.

Differential Revision: https://reviews.llvm.org/D117849
---
 llvm/docs/ProgrammersManual.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 8437e24c595b3..f56a3a4aa4567 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -2398,10 +2398,10 @@ always better.
 
 .. _ds_bit:
 
-Bit storage containers (BitVector, SparseBitVector, CoalescingBitVector)
+Bit storage containers
 ------------------------------------------------------------------------
 
-There are three bit storage containers, and choosing when to use each is
+There are several bit storage containers, and choosing when to use each is
 relatively straightforward.
 
 One additional option is ``std::vector``: we discourage its use for two

From e2f8d28afba0a6545284ad3b54a4b7532c3253b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= 
Date: Tue, 25 Jan 2022 11:11:14 +0100
Subject: [PATCH 520/946] [NFC] Added test with select with unpredictable
 metadata; regenerate x86-cmov-converter.ll

---
 llvm/test/CodeGen/X86/x86-cmov-converter.ll | 333 +++++++++++++++-----
 1 file changed, 251 insertions(+), 82 deletions(-)

diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index 59f68269381a2..91c1367da1bdf 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -106,6 +107,33 @@
 ; CHECK: jg
 
 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
+; CHECK-LABEL: CmovInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB0_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    leal 1(%rax), %r9d
+; CHECK-NEXT:    imull %esi, %eax
+; CHECK-NEXT:    movl $10, %r10d
+; CHECK-NEXT:    cmpl %edx, %eax
+; CHECK-NEXT:    jg .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    movl %r9d, %r10d
+; CHECK-NEXT:  .LBB0_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    imull %r9d, %r10d
+; CHECK-NEXT:    movl %r10d, (%rcx,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r8
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 0
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -136,6 +164,32 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
+; CHECK-LABEL: CmovNotInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB1_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edx, %r9d
+; CHECK-NEXT:    movl %edi, %r10d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $10, %r11d
+; CHECK-NEXT:  .LBB1_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    imull %esi, %edx
+; CHECK-NEXT:    cmpl %r9d, %edx
+; CHECK-NEXT:    cmovgl %r11d, %eax
+; CHECK-NEXT:    movl %eax, (%rcx,%rdi,4)
+; CHECK-NEXT:    movl (%r8,%rdi,4), %eax
+; CHECK-NEXT:    cltd
+; CHECK-NEXT:    idivl %r9d
+; CHECK-NEXT:    movl %eax, (%r8,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r10
+; CHECK-NEXT:    jne .LBB1_2
+; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp18 = icmp sgt i32 %n, 0
   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
@@ -169,6 +223,33 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: jg
 
 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxIndex:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB2_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB2_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-NEXT:    movslq %edi, %rcx
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-NEXT:    jg .LBB2_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:  .LBB2_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    cmpq %rdx, %r8
+; CHECK-NEXT:    jne .LBB2_2
+; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -202,6 +283,24 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxValue:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB3_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB3_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
+; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    cmovgl %edi, %eax
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    cmpq %rdx, %rcx
+; CHECK-NEXT:    jne .LBB3_2
+; CHECK-NEXT:  .LBB3_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %0 = load i32, i32* %a, align 4
   %cmp13 = icmp sgt i32 %n, 1
@@ -231,6 +330,24 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: set
 
 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
+; CHECK-LABEL: BinarySearch:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    jmp .LBB4_2
+; CHECK-NEXT:  .LBB4_1: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btl %eax, %edi
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
+; CHECK-NEXT:  .LBB4_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rdx), %ecx
+; CHECK-NEXT:    cmpl %ecx, %eax
+; CHECK-NEXT:    ja .LBB4_1
+; CHECK-NEXT:  # %bb.3: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
   %0 = load i32, i32* %Val8, align 8
@@ -301,6 +418,39 @@ while.end:                                        ; preds = %while.body, %entry
 ; CHECK:         ja
 
 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
+; CHECK-LABEL: Transform:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB5_5
+; CHECK-NEXT:  # %bb.1: # %while.body.preheader
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:  .LBB5_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movslq %esi, %rsi
+; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %r8d
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    movl $11, %eax
+; CHECK-NEXT:    movl %r8d, %ecx
+; CHECK-NEXT:    cmpl %r8d, %edx
+; CHECK-NEXT:    ja .LBB5_4
+; CHECK-NEXT:  # %bb.3: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    movl $22, %eax
+; CHECK-NEXT:    movl $22, %ecx
+; CHECK-NEXT:  .LBB5_4: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ecx
+; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
+; CHECK-NEXT:    addl $1, %esi
+; CHECK-NEXT:    cmpl %r9d, %esi
+; CHECK-NEXT:    ja .LBB5_2
+; CHECK-NEXT:  .LBB5_5: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %cmp10 = icmp ugt i32 0, %n
   br i1 %cmp10, label %while.body, label %while.end
@@ -328,16 +478,35 @@ while.end:                                        ; preds = %while.body, %entry
 ; even outside of a loop.
 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB6_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB6_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %load = load i32, i32* %y
   %z = select i1 %cond, i32 %x, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%rcx), %eax
-; CHECK:       [[FALSE_BB]]:
+  ret i32 %z
+}
+
+define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
+; CHECK-LABEL: test_cmov_memoperand_unpredictable:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB6_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB6_2: # %entry
+; CHECK-NEXT:    retq
+entry:
+  %cond = icmp ugt i32 %a, %b
+  %load = load i32, i32* %y
+  %z = select i1 %cond, i32 %x, i32 %load , !unpredictable !0
   ret i32 %z
 }
 
@@ -345,29 +514,25 @@ entry:
 ; operand.
 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB8_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB8_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z1 = select i1 %cond, i32 %x, i32 %a
   %z2 = select i1 %cond, i32 %x, i32 %y
   %z3 = select i1 %cond, i32 %x, i32 %b
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -376,29 +541,25 @@ entry:
 ; Same as before but with operands reversed in the select with a load.
 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    jbe .LBB9_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB9_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z2 = select i1 %cond, i32 %a, i32 %x
   %z1 = select i1 %cond, i32 %y, i32 %x
   %z3 = select i1 %cond, i32 %b, i32 %x
-; CHECK-NOT:     cmov
-; CHECK:         jbe [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -408,15 +569,19 @@ entry:
 ; loads.
 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:    cmoval %edx, %eax
+; CHECK-NEXT:    cmoval (%r8), %edx
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         cmpl
   %y1 = load i32, i32* %y1.ptr
   %y2 = load i32, i32* %y2.ptr
   %z1 = select i1 %cond, i32 %x, i32 %y1
   %z2 = select i1 %cond, i32 %y2, i32 %x
-; CHECK:         cmoval
-; CHECK:         cmoval
   %s1 = add i32 %z1, %z2
   ret i32 %s1
 }
@@ -426,18 +591,19 @@ entry:
 ; the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB11_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB11_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %load = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -445,20 +611,21 @@ entry:
 ; uses the result of the other as part of the address.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB12_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movq (%rcx), %rax
+; CHECK-NEXT:    movl (%rax), %eax
+; CHECK-NEXT:  .LBB12_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %load1 = load i32*, i32** %y
   %p = select i1 %cond, i32* %x, i32* %load1
   %load2 = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load2
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movq (%r{{..}}), %[[R1:.*]]
-; CHECK:         movl (%[[R1]]), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -467,19 +634,20 @@ entry:
 ; where that cmov gets *its* input from a prior cmov in the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB13_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB13_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %p2 = select i1 %cond, i32* %z, i32* %p
   %load = load i32, i32* %p2
   %r = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %r
 }
 
@@ -495,34 +663,36 @@ define void @test_memoperand_loop(i32 %data) #0 {
 ; CHECK-NEXT:    movq (%rcx), %rdx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movq %rax, %rcx
-entry:
-  %begin = load i32*, i32** @begin, align 8
-  %end = load i32*, i32** @end, align 8
-  br label %loop.body
-
-; CHECK-NEXT:  .LBB13_1: # %loop.body
+; CHECK-NEXT:  .LBB14_1: # %loop.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_3
+; CHECK-NEXT:    ja .LBB14_3
 ; CHECK-NEXT:  # %bb.2: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movq (%r8), %rcx
-; CHECK-NEXT:  .LBB13_3: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB14_3: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_5
+; CHECK-NEXT:    ja .LBB14_5
 ; CHECK-NEXT:  # %bb.4: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB13_5: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB14_5: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addl $1, %esi
 ; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
-; CHECK-NEXT:    jl .LBB13_1
+; CHECK-NEXT:    jl .LBB14_1
+; CHECK-NEXT:  # %bb.6: # %exit
+; CHECK-NEXT:    retq
+entry:
+  %begin = load i32*, i32** @begin, align 8
+  %end = load i32*, i32** @end, align 8
+  br label %loop.body
+
 loop.body:
   %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
@@ -539,10 +709,9 @@ loop.body:
   %cond = icmp slt i32 %iv.next, 1024
   br i1 %cond, label %loop.body, label %exit
 
-; CHECK-NEXT:  # %bb.6: # %exit
-; CHECK-NEXT:    retq
 exit:
   ret void
 }
 
 attributes #0 = {"target-cpu"="x86-64"}
+!0 = !{}

From 9fa6ad4c589316d71a61834fefd6d411249e4843 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= 
Date: Tue, 25 Jan 2022 11:28:26 +0100
Subject: [PATCH 521/946] Revert "[NFC] Added test with select with
 unpredictable metadata; regenerate x86-cmov-converter.ll"

This reverts commit e2f8d28afba0a6545284ad3b54a4b7532c3253b6.
---
 llvm/test/CodeGen/X86/x86-cmov-converter.ll | 333 +++++---------------
 1 file changed, 82 insertions(+), 251 deletions(-)

diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index 91c1367da1bdf..59f68269381a2 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -107,33 +106,6 @@
 ; CHECK: jg
 
 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
-; CHECK-LABEL: CmovInHotPath:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jle .LBB0_5
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edi, %r8d
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:  .LBB0_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
-; CHECK-NEXT:    leal 1(%rax), %r9d
-; CHECK-NEXT:    imull %esi, %eax
-; CHECK-NEXT:    movl $10, %r10d
-; CHECK-NEXT:    cmpl %edx, %eax
-; CHECK-NEXT:    jg .LBB0_4
-; CHECK-NEXT:  # %bb.3: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    movl %r9d, %r10d
-; CHECK-NEXT:  .LBB0_4: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    imull %r9d, %r10d
-; CHECK-NEXT:    movl %r10d, (%rcx,%rdi,4)
-; CHECK-NEXT:    addq $1, %rdi
-; CHECK-NEXT:    cmpq %rdi, %r8
-; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 0
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -164,32 +136,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
-; CHECK-LABEL: CmovNotInHotPath:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jle .LBB1_3
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edx, %r9d
-; CHECK-NEXT:    movl %edi, %r10d
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:    movl $10, %r11d
-; CHECK-NEXT:  .LBB1_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    imull %esi, %edx
-; CHECK-NEXT:    cmpl %r9d, %edx
-; CHECK-NEXT:    cmovgl %r11d, %eax
-; CHECK-NEXT:    movl %eax, (%rcx,%rdi,4)
-; CHECK-NEXT:    movl (%r8,%rdi,4), %eax
-; CHECK-NEXT:    cltd
-; CHECK-NEXT:    idivl %r9d
-; CHECK-NEXT:    movl %eax, (%r8,%rdi,4)
-; CHECK-NEXT:    addq $1, %rdi
-; CHECK-NEXT:    cmpq %rdi, %r10
-; CHECK-NEXT:    jne .LBB1_2
-; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %cmp18 = icmp sgt i32 %n, 0
   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
@@ -223,33 +169,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: jg
 
 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
-; CHECK-LABEL: MaxIndex:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpl $2, %edi
-; CHECK-NEXT:    jl .LBB2_5
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edi, %r8d
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:    movl $1, %edx
-; CHECK-NEXT:  .LBB2_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
-; CHECK-NEXT:    movslq %edi, %rcx
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
-; CHECK-NEXT:    jg .LBB2_4
-; CHECK-NEXT:  # %bb.3: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:  .LBB2_4: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    addq $1, %rdx
-; CHECK-NEXT:    movl %eax, %edi
-; CHECK-NEXT:    cmpq %rdx, %r8
-; CHECK-NEXT:    jne .LBB2_2
-; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -283,24 +202,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
-; CHECK-LABEL: MaxValue:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl (%rsi), %eax
-; CHECK-NEXT:    cmpl $2, %edi
-; CHECK-NEXT:    jl .LBB3_3
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    movl $1, %edx
-; CHECK-NEXT:  .LBB3_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
-; CHECK-NEXT:    cmpl %eax, %edi
-; CHECK-NEXT:    cmovgl %edi, %eax
-; CHECK-NEXT:    addq $1, %rdx
-; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    jne .LBB3_2
-; CHECK-NEXT:  .LBB3_3: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %0 = load i32, i32* %a, align 4
   %cmp13 = icmp sgt i32 %n, 1
@@ -330,24 +231,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: set
 
 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
-; CHECK-LABEL: BinarySearch:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl (%rsi), %eax
-; CHECK-NEXT:    jmp .LBB4_2
-; CHECK-NEXT:  .LBB4_1: # %while.body
-; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    btl %eax, %edi
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
-; CHECK-NEXT:  .LBB4_2: # %while.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rdx), %ecx
-; CHECK-NEXT:    cmpl %ecx, %eax
-; CHECK-NEXT:    ja .LBB4_1
-; CHECK-NEXT:  # %bb.3: # %while.end
-; CHECK-NEXT:    retq
 entry:
   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
   %0 = load i32, i32* %Val8, align 8
@@ -418,39 +301,6 @@ while.end:                                        ; preds = %while.body, %entry
 ; CHECK:         ja
 
 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
-; CHECK-LABEL: Transform:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB5_5
-; CHECK-NEXT:  # %bb.1: # %while.body.preheader
-; CHECK-NEXT:    movl %edx, %r8d
-; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:  .LBB5_2: # %while.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movslq %esi, %rsi
-; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divl %r8d
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    movl $11, %eax
-; CHECK-NEXT:    movl %r8d, %ecx
-; CHECK-NEXT:    cmpl %r8d, %edx
-; CHECK-NEXT:    ja .LBB5_4
-; CHECK-NEXT:  # %bb.3: # %while.body
-; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    movl $22, %eax
-; CHECK-NEXT:    movl $22, %ecx
-; CHECK-NEXT:  .LBB5_4: # %while.body
-; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
-; CHECK-NEXT:    addl $1, %esi
-; CHECK-NEXT:    cmpl %r9d, %esi
-; CHECK-NEXT:    ja .LBB5_2
-; CHECK-NEXT:  .LBB5_5: # %while.end
-; CHECK-NEXT:    retq
 entry:
   %cmp10 = icmp ugt i32 0, %n
   br i1 %cmp10, label %while.body, label %while.end
@@ -478,35 +328,16 @@ while.end:                                        ; preds = %while.body, %entry
 ; even outside of a loop.
 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB6_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB6_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edx, %eax
+; CHECK:         cmpl
   %load = load i32, i32* %y
   %z = select i1 %cond, i32 %x, i32 %load
-  ret i32 %z
-}
-
-define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
-; CHECK-LABEL: test_cmov_memoperand_unpredictable:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB6_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB6_2: # %entry
-; CHECK-NEXT:    retq
-entry:
-  %cond = icmp ugt i32 %a, %b
-  %load = load i32, i32* %y
-  %z = select i1 %cond, i32 %x, i32 %load , !unpredictable !0
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movl (%rcx), %eax
+; CHECK:       [[FALSE_BB]]:
   ret i32 %z
 }
 
@@ -514,25 +345,29 @@ entry:
 ; operand.
 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    movl %edx, %r8d
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB8_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %r8d
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:  .LBB8_2: # %entry
-; CHECK-NEXT:    addl %r8d, %eax
-; CHECK-NEXT:    addl %edx, %eax
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edx, %eax
+; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z1 = select i1 %cond, i32 %x, i32 %a
   %z2 = select i1 %cond, i32 %x, i32 %y
   %z3 = select i1 %cond, i32 %x, i32 %b
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
+; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
+; CHECK-DAG:     movl %{{.*}} %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         addl
+; CHECK-DAG:       %[[R1]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK-DAG:     addl
+; CHECK-DAG:       %[[R2]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -541,25 +376,29 @@ entry:
 ; Same as before but with operands reversed in the select with a load.
 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    movl %edx, %r8d
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    jbe .LBB9_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %r8d
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:  .LBB9_2: # %entry
-; CHECK-NEXT:    addl %r8d, %eax
-; CHECK-NEXT:    addl %edx, %eax
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edx, %eax
+; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z2 = select i1 %cond, i32 %a, i32 %x
   %z1 = select i1 %cond, i32 %y, i32 %x
   %z3 = select i1 %cond, i32 %b, i32 %x
+; CHECK-NOT:     cmov
+; CHECK:         jbe [[FALSE_BB:.*]]
+; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
+; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
+; CHECK-DAG:     movl %{{.*}} %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         addl
+; CHECK-DAG:       %[[R1]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK-DAG:     addl
+; CHECK-DAG:       %[[R2]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -569,19 +408,15 @@ entry:
 ; loads.
 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:    cmoval %edx, %eax
-; CHECK-NEXT:    cmoval (%r8), %edx
-; CHECK-NEXT:    addl %edx, %eax
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         cmpl
   %y1 = load i32, i32* %y1.ptr
   %y2 = load i32, i32* %y2.ptr
   %z1 = select i1 %cond, i32 %x, i32 %y1
   %z2 = select i1 %cond, i32 %y2, i32 %x
+; CHECK:         cmoval
+; CHECK:         cmoval
   %s1 = add i32 %z1, %z2
   ret i32 %s1
 }
@@ -591,19 +426,18 @@ entry:
 ; the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB11_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB11_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edi, %eax
+; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %load = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movl (%r{{..}}), %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         retq
   ret i32 %z
 }
 
@@ -611,21 +445,20 @@ entry:
 ; uses the result of the other as part of the address.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB12_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movq (%rcx), %rax
-; CHECK-NEXT:    movl (%rax), %eax
-; CHECK-NEXT:  .LBB12_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edi, %eax
+; CHECK:         cmpl
   %load1 = load i32*, i32** %y
   %p = select i1 %cond, i32* %x, i32* %load1
   %load2 = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load2
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movq (%r{{..}}), %[[R1:.*]]
+; CHECK:         movl (%[[R1]]), %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         retq
   ret i32 %z
 }
 
@@ -634,20 +467,19 @@ entry:
 ; where that cmov gets *its* input from a prior cmov in the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB13_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB13_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edi, %eax
+; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %p2 = select i1 %cond, i32* %z, i32* %p
   %load = load i32, i32* %p2
   %r = select i1 %cond, i32 %a, i32 %load
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movl (%r{{..}}), %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         retq
   ret i32 %r
 }
 
@@ -663,36 +495,34 @@ define void @test_memoperand_loop(i32 %data) #0 {
 ; CHECK-NEXT:    movq (%rcx), %rdx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB14_1: # %loop.body
+entry:
+  %begin = load i32*, i32** @begin, align 8
+  %end = load i32*, i32** @end, align 8
+  br label %loop.body
+
+; CHECK-NEXT:  .LBB13_1: # %loop.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB14_3
+; CHECK-NEXT:    ja .LBB13_3
 ; CHECK-NEXT:  # %bb.2: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movq (%r8), %rcx
-; CHECK-NEXT:  .LBB14_3: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:  .LBB13_3: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB14_5
+; CHECK-NEXT:    ja .LBB13_5
 ; CHECK-NEXT:  # %bb.4: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB14_5: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:  .LBB13_5: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addl $1, %esi
 ; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
-; CHECK-NEXT:    jl .LBB14_1
-; CHECK-NEXT:  # %bb.6: # %exit
-; CHECK-NEXT:    retq
-entry:
-  %begin = load i32*, i32** @begin, align 8
-  %end = load i32*, i32** @end, align 8
-  br label %loop.body
-
+; CHECK-NEXT:    jl .LBB13_1
 loop.body:
   %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
@@ -709,9 +539,10 @@ loop.body:
   %cond = icmp slt i32 %iv.next, 1024
   br i1 %cond, label %loop.body, label %exit
 
+; CHECK-NEXT:  # %bb.6: # %exit
+; CHECK-NEXT:    retq
 exit:
   ret void
 }
 
 attributes #0 = {"target-cpu"="x86-64"}
-!0 = !{}

From 5f5c5603ce40e9372c108645be92828d4fe6df6f Mon Sep 17 00:00:00 2001
From: Fraser Cormack 
Date: Mon, 24 Jan 2022 11:53:29 +0000
Subject: [PATCH 522/946] [SelectionDAG][VP] Add splitting support for VP_MERGE

This patch adds splitting support for ISD::VP_MERGE, which splits
identically to VP_SELECT and similarly to other select-like nodes.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D118032
---
 .../SelectionDAG/LegalizeTypesGeneric.cpp     |   2 +-
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   1 +
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 152 ++++++++++++++-
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 175 +++++++++++++++++-
 4 files changed, 325 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c3a77879f225b..c6885677d6448 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -540,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) {
       std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
   }
 
-  if (Opcode != ISD::VP_SELECT) {
+  if (Opcode != ISD::VP_SELECT && Opcode != ISD::VP_MERGE) {
     Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL);
     Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH);
     return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f8f3d2ce8e45d..5dd5db2ad51f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -915,6 +915,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
   case ISD::VSELECT:
   case ISD::SELECT:
+  case ISD::VP_MERGE:
   case ISD::VP_SELECT:    SplitRes_Select(N, Lo, Hi); break;
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index e0cc13db7f118..e904e8a4d495c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
@@ -951,3 +951,151 @@ define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1>
   %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl)
   ret <16 x double> %v
 }
+
+declare <32 x double> @llvm.vp.merge.v32f64(<32 x i1>, <32 x double>, <32 x double>, i32)
+
+define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vv_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    addi a1, a0, 128
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vle64.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, a2, -16
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    bltu a2, a3, .LBB72_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a1, a3
+; RV32-NEXT:  .LBB72_2:
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
+; RV32-NEXT:    bltu a2, a0, .LBB72_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB72_4:
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vv_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    addi a1, a0, 128
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vle64.v v24, (a1)
+; RV64-NEXT:    addi a3, a2, -16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    bltu a2, a3, .LBB72_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a1, a3
+; RV64-NEXT:  .LBB72_2:
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
+; RV64-NEXT:    bltu a2, a0, .LBB72_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:  .LBB72_4:
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v8, v8, v16, v0
+; RV64-NEXT:    vmv8r.v v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v32f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -16
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    bltu a0, a2, .LBB73_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a1, a2
+; CHECK-NEXT:  .LBB73_2:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; CHECK-NEXT:    vslidedown.vi v0, v24, 2
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; CHECK-NEXT:    li a1, 16
+; CHECK-NEXT:    vfmerge.vfm v16, v16, fa0, v0
+; CHECK-NEXT:    bltu a0, a1, .LBB73_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:  .LBB73_4:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <32 x double> poison, double %a, i32 0
+  %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer
+  %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
+  ret <32 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 46ebd0a6acf50..653217a20c54e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare  @llvm.vp.merge.nxv1i8(, , , i32)
@@ -263,6 +263,177 @@ define  @vpmerge_vi_nxv64i8( %vb,  %v
 }
 
+declare  @llvm.vp.merge.nxv128i8(, , , i32)
+
+define  @vpmerge_vv_nxv128i8( %va,  %vb,  %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vv_nxv128i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a1, a1, a4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a4, a0, a1
+; RV32-NEXT:    vl8r.v v24, (a4)
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vs8r.v v24, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli a4, zero, e8, m8, ta, mu
+; RV32-NEXT:    vlm.v v2, (a2)
+; RV32-NEXT:    sub a4, a3, a1
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a3, a4, .LBB21_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a4
+; RV32-NEXT:  .LBB21_2:
+; RV32-NEXT:    vl8r.v v8, (a0)
+; RV32-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
+; RV32-NEXT:    vmv1r.v v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
+; RV32-NEXT:    bltu a3, a1, .LBB21_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB21_4:
+; RV32-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vv_nxv128i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a4, a0, a1
+; RV64-NEXT:    vl8r.v v24, (a4)
+; RV64-NEXT:    vsetvli a4, zero, e8, m8, ta, mu
+; RV64-NEXT:    vlm.v v2, (a2)
+; RV64-NEXT:    sub a4, a3, a1
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a3, a4, .LBB21_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a4
+; RV64-NEXT:  .LBB21_2:
+; RV64-NEXT:    vl8r.v v8, (a0)
+; RV64-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
+; RV64-NEXT:    vmv1r.v v0, v2
+; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
+; RV64-NEXT:    bltu a3, a1, .LBB21_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB21_4:
+; RV64-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v8, v8, v16, v0
+; RV64-NEXT:    vmv8r.v v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %v = call  @llvm.vp.merge.nxv128i8( %m,  %va,  %vb, i32 %evl)
+  ret  %v
+}
+
+define  @vpmerge_vx_nxv128i8(i8 %a,  %vb,  %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a3, vlenb
+; CHECK-NEXT:    slli a3, a3, 3
+; CHECK-NEXT:    mv a4, a2
+; CHECK-NEXT:    bltu a2, a3, .LBB22_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a4, a3
+; CHECK-NEXT:  .LBB22_2:
+; CHECK-NEXT:    li a5, 0
+; CHECK-NEXT:    vsetvli a6, zero, e8, m8, ta, mu
+; CHECK-NEXT:    vlm.v v24, (a1)
+; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
+; CHECK-NEXT:    sub a1, a2, a3
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    bltu a2, a1, .LBB22_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    mv a5, a1
+; CHECK-NEXT:  .LBB22_4:
+; CHECK-NEXT:    vsetvli zero, a5, e8, m8, tu, mu
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmerge.vxm v16, v16, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement  poison, i8 %a, i32 0
+  %va = shufflevector  %elt.head,  poison,  zeroinitializer
+  %v = call  @llvm.vp.merge.nxv128i8( %m,  %va,  %vb, i32 %evl)
+  ret  %v
+}
+
+define  @vpmerge_vi_nxv128i8( %vb,  %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 3
+; CHECK-NEXT:    mv a3, a1
+; CHECK-NEXT:    bltu a1, a2, .LBB23_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a3, a2
+; CHECK-NEXT:  .LBB23_2:
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    vsetvli a5, zero, e8, m8, ta, mu
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
+; CHECK-NEXT:    sub a0, a1, a2
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    bltu a1, a0, .LBB23_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:  .LBB23_4:
+; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmerge.vim v16, v16, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement  poison, i8 2, i32 0
+  %va = shufflevector  %elt.head,  poison,  zeroinitializer
+  %v = call  @llvm.vp.merge.nxv128i8( %m,  %va,  %vb, i32 %evl)
+  ret  %v
+}
+
 declare  @llvm.vp.merge.nxv1i16(, , , i32)
 
 define  @vpmerge_vv_nxv1i16( %va,  %vb,  %m, i32 zeroext %evl) {

From 19d3dc6e226c0714d08c5cd130d8f5ba63bbd4f2 Mon Sep 17 00:00:00 2001
From: Victor Perez 
Date: Tue, 25 Jan 2022 10:34:58 +0000
Subject: [PATCH 523/946] [VP] Update CodeGen/RISCV/rvv/vpgather-sdnode.ll test

---
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 56 +++++++++----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index f71a2f86fee7d..4c3636e315825 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -287,62 +287,62 @@ define  @vpgather_baseidx_nxv32i8(i8* %base, 
Date: Tue, 25 Jan 2022 11:46:02 +0100
Subject: [PATCH 524/946] [bazel] Adjust dependencies after a70aa7bb

These are all picked up transitively, but fail with
--features=layering_check, which enforces header dependencies.
---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel      | 1 +
 utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 436e7a79833ec..03e54e2726f19 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -8018,6 +8018,7 @@ cc_library(
     deps = [
         ":AllocationOpInterface",
         ":Analysis",
+        ":ArithmeticDialect",
         ":BufferizationDialect",
         ":BufferizationPassIncGen",
         ":ControlFlowInterfaces",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 7bc92df875c46..79ba323efef07 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -332,6 +332,8 @@ cc_library(
         "//mlir:IR",
         "//mlir:MemRefDialect",
         "//mlir:Pass",
+        "//mlir:SCFDialect",
+        "//mlir:StandardOps",
         "//mlir:Support",
         "//mlir:Transforms",
         "//mlir:VectorOps",

From f302e0b5dd402e629620a58f9115a3441c65d60f Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Tue, 25 Jan 2022 10:37:19 +0000
Subject: [PATCH 525/946] [AArch64] Exclude optional features from HasV8_0rOps.

The following SubtargetFeatures are removed from the definition of
HasV8_0rOps, on the grounds that they are optional in Armv8.4-A, and
therefore (by the definition of Armv8.0-R) also optional in v8.0-R:

 * performance monitoring: FeaturePerfMon
 * cryptography: FeatureSM4 and FeatureSHA3
 * half-precision FP: FeatureFullFP16, FeatureFP16FML
 * speculation control: FeatureSSBS, FeaturePredRes, FeatureSB,
   FeatureSpecRestrict

This isn't the full set of features that are listed as optional in the
spec. FeatureCCIDX and FeatureTRACEV8_4 are also optional. But LLVM
includes those in HasV8_3aOps and HasV8_4aOps respectively (I think on
the grounds that the system registers they enable are useful to be
able to access after a runtime check), and so for consistency, I've
left those in HasV8_0rOps too.

After this commit, HasV8_0rOps is a strict subset of HasV8_4aOps (but
missing features that are not in Armv8.0-R at all).

The definition of Cortex-R82 is correspondingly updated to add most of
the features that I've removed from base Armv8.0-R (with the exception
of the cryptography ones), since that particular implementation of
v8.0-R does have them.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D118045
---
 llvm/lib/Target/AArch64/AArch64.td                  | 13 ++++++-------
 llvm/test/MC/AArch64/armv8.2a-crypto.s              |  3 ++-
 .../MC/Disassembler/AArch64/armv8.3a-complex.txt    |  3 ++-
 .../MC/Disassembler/AArch64/armv8.5a-predres.txt    |  2 +-
 .../Disassembler/AArch64/armv8.5a-specrestrict.txt  |  2 +-
 llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt |  2 +-
 llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt  |  1 -
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 0a2d88c12338b..b87468d5c8de2 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -528,16 +528,13 @@ def HasV8_0rOps : SubtargetFeature<
   [//v8.1
   FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
   //v8.2
-  FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
-  FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
+  FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV,
   //v8.3
   FeatureComplxNum, FeatureCCIDX, FeatureJS,
   FeaturePAuth, FeatureRCPC,
   //v8.4
-  FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4,
-  FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
-  //v8.5
-  FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;
+  FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI,
+  FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
@@ -969,7 +966,9 @@ def ProcessorFeatures {
   list A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
                                  FeatureETE, FeatureMTE, FeatureFP16FML,
                                  FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8];
-  list R82  = [HasV8_0rOps];
+  list R82  = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
+                                 FeatureFP16FML, FeatureSSBS, FeaturePredRes,
+                                 FeatureSB, FeatureSpecRestrict];
   list X1   = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
                                  FeatureNEON, FeatureRCPC, FeaturePerfMon,
                                  FeatureSPE, FeatureFullFP16, FeatureDotProd];
diff --git a/llvm/test/MC/AArch64/armv8.2a-crypto.s b/llvm/test/MC/AArch64/armv8.2a-crypto.s
index 8a1052037c683..93e84d5830b80 100644
--- a/llvm/test/MC/AArch64/armv8.2a-crypto.s
+++ b/llvm/test/MC/AArch64/armv8.2a-crypto.s
@@ -7,7 +7,8 @@
 // RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-NO-SHA --check-prefix=CHECK-NO-SM < %t %s
 
-// RUN: llvm-mc -triple aarch64 -mattr=+v8r -show-encoding -o - %s | FileCheck %s --check-prefixes=CHECK-SM,CHECK-SHA
+// RUN: not llvm-mc -triple aarch64 -mattr=+v8r -show-encoding -o - %s < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-SHA --check-prefix=CHECK-NO-SM < %t %s
 
   sha512h   q0, q1, v2.2d
   sha512h2  q0, q1, v2.2d
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt b/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
index b62d424eefe00..f920639173ad2 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
@@ -1,7 +1,8 @@
 # RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,-fullfp16 --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
 # RUN: FileCheck %s < %t --check-prefix=NO-FP16
 # RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,+fullfp16 --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
-# RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
+# RUN: FileCheck %s < %t --check-prefix=NO-FP16
 # RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-v8.3a,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NO-V83A
 
 ###### FCMLA vector
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt
index 45bb17bdba840..e1b10b16a0f43 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt
@@ -1,6 +1,6 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+predres -disassemble < %s      | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a    -disassemble < %s      | FileCheck %s
-# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSB
 # RUN: llvm-mc -triple=aarch64 -mattr=-predres -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOSB
 
 [0x80 0x73 0x0b 0xd5]
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt
index 8662b905a3f99..3301680b1fd8d 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt
@@ -1,6 +1,6 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+specrestrict -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a        -disassemble < %s | FileCheck %s
-# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 # RUN: llvm-mc -triple=aarch64 -mattr=-specrestrict -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 
 [0x81 0x03 0x38 0xd5]
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
index 3efec07fdcb14..7698751c88076 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mcpu=cortex-a76 -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mcpu=cortex-a76ae -disassemble < %s | FileCheck %s
-# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 # RUN: llvm-mc -triple=aarch64 -mattr=-ssbs  -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 
 [0x3f 0x41 0x03 0xd5]
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt b/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
index 57520c99cb80c..5265df1ec7920 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
@@ -1,6 +1,5 @@
 # RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+fp16fml           --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
 # RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=-fullfp16,+fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
-# RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
 
 #A fullfp16 instruction, for testing the interaction of the features
 [0x41,0x08,0xe3,0x1e]

From 99adacbcb7895114a62266c8a15e794bacd2380c Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 12:04:06 +0100
Subject: [PATCH 526/946] [clang] Remove some getPointerElementType() uses

Same cases where the call can be removed in a straightforward way.
---
 clang/lib/CodeGen/CGAtomic.cpp           |  8 ++++----
 clang/lib/CodeGen/CGCall.h               |  3 ++-
 clang/lib/CodeGen/CGExprScalar.cpp       |  5 +++--
 clang/lib/CodeGen/CGOpenMPRuntime.cpp    |  5 +----
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 13 +++++++------
 clang/lib/CodeGen/TargetInfo.cpp         | 14 ++++++++------
 6 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 6532f02879612..10569ae2c3f91 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -351,12 +351,12 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
 
 bool AtomicInfo::emitMemSetZeroIfNecessary() const {
   assert(LVal.isSimple());
-  llvm::Value *addr = LVal.getPointer(CGF);
-  if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
+  Address addr = LVal.getAddress(CGF);
+  if (!requiresMemSetZero(addr.getElementType()))
     return false;
 
   CGF.Builder.CreateMemSet(
-      addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
+      addr.getPointer(), llvm::ConstantInt::get(CGF.Int8Ty, 0),
       CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(),
       LVal.getAlignment().getAsAlign());
   return true;
@@ -1522,7 +1522,7 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
        !AsValue)) {
     auto *ValTy = AsValue
                       ? CGF.ConvertTypeForMem(ValueTy)
-                      : getAtomicAddress().getType()->getPointerElementType();
+                      : getAtomicAddress().getElementType();
     if (ValTy->isIntegerTy()) {
       assert(IntVal->getType() == ValTy && "Different integer types.");
       return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
index d2bb104a3f9d3..af63e1bddd2d6 100644
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -112,7 +112,8 @@ class CGCallee {
     assert(functionPtr && "configuring callee without function pointer");
     assert(functionPtr->getType()->isPointerTy());
     assert(functionPtr->getType()->isOpaquePointerTy() ||
-           functionPtr->getType()->getPointerElementType()->isFunctionTy());
+           functionPtr->getType()->getNonOpaquePointerElementType()
+               ->isFunctionTy());
   }
 
   static CGCallee forBuiltin(unsigned builtinID,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index e32462eb635cd..4e8933fffe03b 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1613,8 +1613,9 @@ ScalarExprEmitter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
   if (GlobalConstStr->getType()->getPointerAddressSpace() == ExprAS)
     return GlobalConstStr;
 
-  llvm::Type *EltTy = GlobalConstStr->getType()->getPointerElementType();
-  llvm::PointerType *NewPtrTy = llvm::PointerType::get(EltTy, ExprAS);
+  llvm::PointerType *PtrTy = cast(GlobalConstStr->getType());
+  llvm::PointerType *NewPtrTy =
+      llvm::PointerType::getWithSamePointeeType(PtrTy, ExprAS);
   return Builder.CreateAddrSpaceCast(GlobalConstStr, NewPtrTy, "usn_addr_cast");
 }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index fd956aabc717f..c8c08060e729e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -837,10 +837,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   }
   llvm::Value *Size;
   llvm::Value *SizeInChars;
-  auto *ElemType = OrigAddresses[N]
-                       .first.getPointer(CGF)
-                       ->getType()
-                       ->getPointerElementType();
+  auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
   if (AsArraySection) {
     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 594c7fef36a77..7c8e4e6b52a08 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3401,12 +3401,13 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
       LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
   // First cast to generic.
   TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      /*AddrSpace=*/0));
+      TargetAddr, llvm::PointerType::getWithSamePointeeType(
+          cast(TargetAddr->getType()), /*AddrSpace=*/0));
   // Cast from generic to native address space.
   TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      NativePointeeAddrSpace));
+      TargetAddr, llvm::PointerType::getWithSamePointeeType(
+          cast(TargetAddr->getType()),
+                                  NativePointeeAddrSpace));
   Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
   CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
                         NativeParamType);
@@ -3431,8 +3432,8 @@ void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall(
       continue;
     }
     llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        NativeArg,
-        NativeArg->getType()->getPointerElementType()->getPointerTo());
+        NativeArg, llvm::PointerType::getWithSamePointeeType(
+            cast(NativeArg->getType()), /*AddrSpace*/ 0));
     TargetArgs.emplace_back(
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
   }
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index fd9a7e602833a..fb81169003fc0 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -8936,8 +8936,9 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
   llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
                                        unsigned ToAS) const {
     // Single value types.
-    if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS)
-      return llvm::PointerType::get(Ty->getPointerElementType(), ToAS);
+    auto *PtrTy = llvm::dyn_cast(Ty);
+    if (PtrTy && PtrTy->getAddressSpace() == FromAS)
+      return llvm::PointerType::getWithSamePointeeType(PtrTy, ToAS);
     return Ty;
   }
 
@@ -9333,8 +9334,8 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
     return llvm::ConstantPointerNull::get(PT);
 
   auto &Ctx = CGM.getContext();
-  auto NPT = llvm::PointerType::get(PT->getPointerElementType(),
-      Ctx.getTargetAddressSpace(LangAS::opencl_generic));
+  auto NPT = llvm::PointerType::getWithSamePointeeType(
+      PT, Ctx.getTargetAddressSpace(LangAS::opencl_generic));
   return llvm::ConstantExpr::getAddrSpaceCast(
       llvm::ConstantPointerNull::get(NPT), PT);
 }
@@ -10269,8 +10270,9 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
     llvm::Type *LTy = CGT.ConvertType(Ty);
     auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
     auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
-    if (LTy->isPointerTy() && LTy->getPointerAddressSpace() == DefaultAS) {
-      LTy = llvm::PointerType::get(LTy->getPointerElementType(), GlobalAS);
+    auto *PtrTy = llvm::dyn_cast(LTy);
+    if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
+      LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS);
       return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
     }
   }

From 7cb452bfde1086f7bcddfd6de5594ebcb4c11bf5 Mon Sep 17 00:00:00 2001
From: Fraser Cormack 
Date: Mon, 24 Jan 2022 11:43:20 +0000
Subject: [PATCH 527/946] [SelectionDAG][VP] Add widening support for VP_MERGE

This patch adds widening support for ISD::VP_MERGE, which widens
identically to VP_SELECT and similarly to other select-like nodes.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D118030
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  3 +-
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 61 ++++++++++++----
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 69 ++++++++++++++-----
 3 files changed, 104 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5dd5db2ad51f0..0bd44ce4c872e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3231,6 +3231,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::VSELECT:
   case ISD::SELECT:
   case ISD::VP_SELECT:
+  case ISD::VP_MERGE:
     Res = WidenVecRes_Select(N);
     break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
@@ -4782,7 +4783,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
   SDValue InOp1 = GetWidenedVector(N->getOperand(1));
   SDValue InOp2 = GetWidenedVector(N->getOperand(2));
   assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
-  return Opcode == ISD::VP_SELECT
+  return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE
              ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
                            N->getOperand(3))
              : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index e904e8a4d495c..8ac3184f02c45 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -78,6 +78,43 @@ define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
   ret <4 x i8> %v
 }
 
+declare <6 x i8> @llvm.vp.merge.v6i8(<6 x i1>, <6 x i8>, <6 x i8>, i32)
+
+define <6 x i8> @vpmerge_vv_v6i8(<6 x i8> %va, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v6i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+  ret <6 x i8> %v
+}
+
+define <6 x i8> @vpmerge_vx_v6i8(i8 %a, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v6i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <6 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer
+  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+  ret <6 x i8> %v
+}
+
+define <6 x i8> @vpmerge_vi_v6i8(<6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v6i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <6 x i8> poison, i8 2, i32 0
+  %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer
+  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+  ret <6 x i8> %v
+}
+
 declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
 
 define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
@@ -981,10 +1018,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV32-NEXT:    addi a1, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    bltu a2, a3, .LBB72_2
+; RV32-NEXT:    bltu a2, a3, .LBB75_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a1, a3
-; RV32-NEXT:  .LBB72_2:
+; RV32-NEXT:  .LBB75_2:
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; RV32-NEXT:    vslidedown.vi v0, v1, 2
@@ -1001,10 +1038,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT:    bltu a2, a0, .LBB72_4
+; RV32-NEXT:    bltu a2, a0, .LBB75_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB72_4:
+; RV32-NEXT:  .LBB75_4:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    addi a0, sp, 16
@@ -1037,10 +1074,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    bltu a2, a3, .LBB72_2
+; RV64-NEXT:    bltu a2, a3, .LBB75_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a1, a3
-; RV64-NEXT:  .LBB72_2:
+; RV64-NEXT:  .LBB75_2:
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; RV64-NEXT:    vslidedown.vi v0, v1, 2
@@ -1049,10 +1086,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV64-NEXT:    addi a1, sp, 16
 ; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT:    bltu a2, a0, .LBB72_4
+; RV64-NEXT:    bltu a2, a0, .LBB75_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB72_4:
+; RV64-NEXT:  .LBB75_4:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
 ; RV64-NEXT:    vmv1r.v v0, v1
 ; RV64-NEXT:    csrr a0, vlenb
@@ -1077,19 +1114,19 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
 ; CHECK-NEXT:    addi a2, a0, -16
 ; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bltu a0, a2, .LBB73_2
+; CHECK-NEXT:    bltu a0, a2, .LBB76_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:  .LBB73_2:
+; CHECK-NEXT:  .LBB76_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; CHECK-NEXT:    vslidedown.vi v0, v24, 2
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    vfmerge.vfm v16, v16, fa0, v0
-; CHECK-NEXT:    bltu a0, a1, .LBB73_4
+; CHECK-NEXT:    bltu a0, a1, .LBB76_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    li a0, 16
-; CHECK-NEXT:  .LBB73_4:
+; CHECK-NEXT:  .LBB76_4:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 653217a20c54e..6a4ac666b1101 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -78,6 +78,43 @@ define  @vpmerge_vi_nxv2i8( %vb,  %v
 }
 
+declare  @llvm.vp.merge.nxv3i8(, , , i32)
+
+define  @vpmerge_vv_nxv3i8( %va,  %vb,  %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call  @llvm.vp.merge.nxv3i8( %m,  %va,  %vb, i32 %evl)
+  ret  %v
+}
+
+define  @vpmerge_vx_nxv3i8(i8 %a,  %vb,  %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement  poison, i8 %a, i32 0
+  %va = shufflevector  %elt.head,  poison,  zeroinitializer
+  %v = call  @llvm.vp.merge.nxv3i8( %m,  %va,  %vb, i32 %evl)
+  ret  %v
+}
+
+define  @vpmerge_vi_nxv3i8( %vb,  %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement  poison, i8 2, i32 0
+  %va = shufflevector  %elt.head,  poison,  zeroinitializer
+  %v = call  @llvm.vp.merge.nxv3i8( %m,  %va,  %vb, i32 %evl)
+  ret  %v
+}
+
 declare  @llvm.vp.merge.nxv4i8(, , , i32)
 
 define  @vpmerge_vv_nxv4i8( %va,  %vb,  %m, i32 zeroext %evl) {
@@ -295,10 +332,10 @@ define  @vpmerge_vv_nxv128i8( %va,  @vpmerge_vv_nxv128i8( %va,  @vpmerge_vv_nxv128i8( %va,  @vpmerge_vx_nxv128i8(i8 %a,  %vb,
 ; CHECK-NEXT:    csrr a3, vlenb
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    mv a4, a2
-; CHECK-NEXT:    bltu a2, a3, .LBB22_2
+; CHECK-NEXT:    bltu a2, a3, .LBB25_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a4, a3
-; CHECK-NEXT:  .LBB22_2:
+; CHECK-NEXT:  .LBB25_2:
 ; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    vsetvli a6, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vlm.v v24, (a1)
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
 ; CHECK-NEXT:    sub a1, a2, a3
 ; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT:    bltu a2, a1, .LBB22_4
+; CHECK-NEXT:    bltu a2, a1, .LBB25_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a5, a1
-; CHECK-NEXT:  .LBB22_4:
+; CHECK-NEXT:  .LBB25_4:
 ; CHECK-NEXT:    vsetvli zero, a5, e8, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vxm v16, v16, a0, v0
@@ -410,20 +447,20 @@ define  @vpmerge_vi_nxv128i8( %vb, 
Date: Mon, 17 Jan 2022 15:56:11 +0100
Subject: [PATCH 528/946] [lldb] Make logging machinery type-safe

This patch makes use of c++ type checking and scoped enums to make
logging statements shorter and harder to misuse.

Defines like LIBLLDB_LOG_PROCESS are replaces with LLDBLog::Process.
Because it now carries type information we do not need to worry about
matching a specific enum value with the right getter function -- the
compiler will now do that for us.

The main entry point for the logging machinery becomes the GetLog
(template) function, which will obtain the correct Log object based on
the enum type. It achieves this through another template function
(LogChannelFor), which must be specialized for each type, and should
return the appropriate channel object.

This patch also removes the ability to log a message if multiple
categories are enabled simultaneously as it was unused and confusing.

This patch does not actually remove any of the existing interfaces. The
defines and log retrieval functions are left around as wrappers around
the new interfaces. They will be removed in follow-up patch.

Differential Revision: https://reviews.llvm.org/D117490
---
 .../lldb/Interpreter/ScriptedInterface.h      |   2 +-
 lldb/include/lldb/Utility/Log.h               |  55 +++++++--
 lldb/include/lldb/Utility/Logging.h           | 116 +++++++++++-------
 .../Plugins/Process/POSIX/ProcessPOSIXLog.cpp |  20 +--
 .../Plugins/Process/POSIX/ProcessPOSIXLog.h   |  40 +++---
 .../GDBRemoteCommunicationServerLLGS.cpp      |   2 +-
 .../Process/gdb-remote/ProcessGDBRemote.cpp   |   5 +-
 .../gdb-remote/ProcessGDBRemoteLog.cpp        |  28 +++--
 .../Process/gdb-remote/ProcessGDBRemoteLog.h  |  55 ++++++---
 .../Process/gdb-remote/ThreadGDBRemote.cpp    |   8 +-
 .../SymbolFile/DWARF/LogChannelDWARF.cpp      |  18 +--
 .../SymbolFile/DWARF/LogChannelDWARF.h        |  33 +++--
 lldb/source/Utility/Log.cpp                   |   4 +-
 lldb/source/Utility/Logging.cpp               |  97 +++++++++------
 lldb/tools/lldb-server/lldb-gdbserver.cpp     |   1 +
 lldb/unittests/Utility/LogTest.cpp            |  60 +++++----
 16 files changed, 348 insertions(+), 196 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/ScriptedInterface.h
index 9eb11832003e6..9de5e60cfea32 100644
--- a/lldb/include/lldb/Interpreter/ScriptedInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedInterface.h
@@ -33,7 +33,7 @@ class ScriptedInterface {
   template 
   static Ret ErrorWithMessage(llvm::StringRef caller_name,
                               llvm::StringRef error_msg, Status &error,
-                              uint32_t log_caterogy = LIBLLDB_LOG_PROCESS) {
+                              LLDBLog log_caterogy = LLDBLog::Process) {
     LLDB_LOGF(GetLogIfAllCategoriesSet(log_caterogy), "%s ERROR = %s",
               caller_name.data(), error_msg.data());
     error.SetErrorString(llvm::Twine(caller_name + llvm::Twine(" ERROR = ") +
diff --git a/lldb/include/lldb/Utility/Log.h b/lldb/include/lldb/Utility/Log.h
index 2684783939bd8..09fd2cb3a7e60 100644
--- a/lldb/include/lldb/Utility/Log.h
+++ b/lldb/include/lldb/Utility/Log.h
@@ -10,7 +10,6 @@
 #define LLDB_UTILITY_LOG_H
 
 #include "lldb/Utility/Flags.h"
-#include "lldb/Utility/Logging.h"
 #include "lldb/lldb-defines.h"
 
 #include "llvm/ADT/ArrayRef.h"
@@ -48,11 +47,31 @@ namespace lldb_private {
 
 class Log final {
 public:
+  /// The underlying type of all log channel enums. Declare them as:
+  /// enum class MyLog : MaskType {
+  ///   Channel0 = Log::ChannelFlag<0>,
+  ///   Channel1 = Log::ChannelFlag<1>,
+  ///   ...,
+  ///   LLVM_MARK_AS_BITMASK_ENUM(LastChannel),
+  /// };
+  using MaskType = uint64_t;
+
+  template 
+  static constexpr MaskType ChannelFlag = MaskType(1) << Bit;
+
   // Description of a log channel category.
   struct Category {
     llvm::StringLiteral name;
     llvm::StringLiteral description;
-    uint32_t flag;
+    MaskType flag;
+
+    template 
+    constexpr Category(llvm::StringLiteral name,
+                       llvm::StringLiteral description, Cat mask)
+        : name(name), description(description), flag(MaskType(mask)) {
+      static_assert(
+          std::is_same>::value, "");
+    }
   };
 
   // This class describes a log channel. It also encapsulates the behavior
@@ -63,18 +82,22 @@ class Log final {
 
   public:
     const llvm::ArrayRef categories;
-    const uint32_t default_flags;
+    const MaskType default_flags;
 
+    template 
     constexpr Channel(llvm::ArrayRef categories,
-                      uint32_t default_flags)
+                      Cat default_flags)
         : log_ptr(nullptr), categories(categories),
-          default_flags(default_flags) {}
+          default_flags(MaskType(default_flags)) {
+      static_assert(
+          std::is_same>::value, "");
+    }
 
     // This function is safe to call at any time. If the channel is disabled
     // after (or concurrently with) this function returning a non-null Log
     // pointer, it is still safe to attempt to write to the Log object -- the
     // output will be discarded.
-    Log *GetLogIfAll(uint32_t mask) {
+    Log *GetLogIfAll(MaskType mask) {
       Log *log = log_ptr.load(std::memory_order_relaxed);
       if (log && log->GetMask().AllSet(mask))
         return log;
@@ -85,7 +108,7 @@ class Log final {
     // after (or concurrently with) this function returning a non-null Log
     // pointer, it is still safe to attempt to write to the Log object -- the
     // output will be discarded.
-    Log *GetLogIfAny(uint32_t mask) {
+    Log *GetLogIfAny(MaskType mask) {
       Log *log = log_ptr.load(std::memory_order_relaxed);
       if (log && log->GetMask().AnySet(mask))
         return log;
@@ -180,7 +203,7 @@ class Log final {
 
   std::shared_ptr m_stream_sp;
   std::atomic m_options{0};
-  std::atomic m_mask{0};
+  std::atomic m_mask{0};
 
   void WriteHeader(llvm::raw_ostream &OS, llvm::StringRef file,
                    llvm::StringRef function);
@@ -215,6 +238,19 @@ class Log final {
   void operator=(const Log &) = delete;
 };
 
+// Must be specialized for a particular log type.
+template  Log::Channel &LogChannelFor() = delete;
+
+/// Retrieve the Log object for the channel associated with the given log enum.
+///
+/// Returns a valid Log object if any of the provided categories are enabled.
+/// Otherwise, returns nullptr.
+template  Log *GetLog(Cat mask) {
+  static_assert(std::is_same>::value,
+                "");
+  return LogChannelFor().GetLogIfAny(Log::MaskType(mask));
+}
+
 } // namespace lldb_private
 
 /// The LLDB_LOG* macros defined below are the way to emit log messages.
@@ -272,3 +308,6 @@ class Log final {
   } while (0)
 
 #endif // LLDB_UTILITY_LOG_H
+
+// TODO: Remove this and fix includes everywhere.
+#include "lldb/Utility/Logging.h"
diff --git a/lldb/include/lldb/Utility/Logging.h b/lldb/include/lldb/Utility/Logging.h
index 1a8a1022c5c0e..db84da244954a 100644
--- a/lldb/include/lldb/Utility/Logging.h
+++ b/lldb/include/lldb/Utility/Logging.h
@@ -9,57 +9,89 @@
 #ifndef LLDB_UTILITY_LOGGING_H
 #define LLDB_UTILITY_LOGGING_H
 
+#include "lldb/Utility/Log.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include 
 
-// Log Bits specific to logging in lldb
-#define LIBLLDB_LOG_PROCESS (1u << 1)
-#define LIBLLDB_LOG_THREAD (1u << 2)
-#define LIBLLDB_LOG_DYNAMIC_LOADER (1u << 3)
-#define LIBLLDB_LOG_EVENTS (1u << 4)
-#define LIBLLDB_LOG_BREAKPOINTS (1u << 5)
-#define LIBLLDB_LOG_WATCHPOINTS (1u << 6)
-#define LIBLLDB_LOG_STEP (1u << 7)
-#define LIBLLDB_LOG_EXPRESSIONS (1u << 8)
-#define LIBLLDB_LOG_TEMPORARY (1u << 9)
-#define LIBLLDB_LOG_STATE (1u << 10)
-#define LIBLLDB_LOG_OBJECT (1u << 11)
-#define LIBLLDB_LOG_COMMUNICATION (1u << 12)
-#define LIBLLDB_LOG_CONNECTION (1u << 13)
-#define LIBLLDB_LOG_HOST (1u << 14)
-#define LIBLLDB_LOG_UNWIND (1u << 15)
-#define LIBLLDB_LOG_API (1u << 16)
-#define LIBLLDB_LOG_SCRIPT (1u << 17)
-#define LIBLLDB_LOG_COMMANDS (1U << 18)
-#define LIBLLDB_LOG_TYPES (1u << 19)
-#define LIBLLDB_LOG_SYMBOLS (1u << 20)
-#define LIBLLDB_LOG_MODULES (1u << 21)
-#define LIBLLDB_LOG_TARGET (1u << 22)
-#define LIBLLDB_LOG_MMAP (1u << 23)
-#define LIBLLDB_LOG_OS (1u << 24)
-#define LIBLLDB_LOG_PLATFORM (1u << 25)
-#define LIBLLDB_LOG_SYSTEM_RUNTIME (1u << 26)
-#define LIBLLDB_LOG_JIT_LOADER (1u << 27)
-#define LIBLLDB_LOG_LANGUAGE (1u << 28)
-#define LIBLLDB_LOG_DATAFORMATTERS (1u << 29)
-#define LIBLLDB_LOG_DEMANGLE (1u << 30)
-#define LIBLLDB_LOG_AST (1u << 31)
-#define LIBLLDB_LOG_ALL (UINT32_MAX)
-#define LIBLLDB_LOG_DEFAULT                                                    \
-  (LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_THREAD | LIBLLDB_LOG_DYNAMIC_LOADER |     \
-   LIBLLDB_LOG_BREAKPOINTS | LIBLLDB_LOG_WATCHPOINTS | LIBLLDB_LOG_STEP |      \
-   LIBLLDB_LOG_STATE | LIBLLDB_LOG_SYMBOLS | LIBLLDB_LOG_TARGET |              \
-   LIBLLDB_LOG_COMMANDS)
-
 namespace lldb_private {
 
-class Log;
+enum class LLDBLog : Log::MaskType {
+  API = Log::ChannelFlag<0>,
+  AST = Log::ChannelFlag<1>,
+  Breakpoints = Log::ChannelFlag<2>,
+  Commands = Log::ChannelFlag<3>,
+  Communication = Log::ChannelFlag<4>,
+  Connection = Log::ChannelFlag<5>,
+  DataFormatters = Log::ChannelFlag<6>,
+  Demangle = Log::ChannelFlag<7>,
+  DynamicLoader = Log::ChannelFlag<8>,
+  Events = Log::ChannelFlag<9>,
+  Expressions = Log::ChannelFlag<10>,
+  Host = Log::ChannelFlag<11>,
+  JITLoader = Log::ChannelFlag<12>,
+  Language = Log::ChannelFlag<13>,
+  MMap = Log::ChannelFlag<14>,
+  Modules = Log::ChannelFlag<15>,
+  Object = Log::ChannelFlag<16>,
+  OS = Log::ChannelFlag<17>,
+  Platform = Log::ChannelFlag<18>,
+  Process = Log::ChannelFlag<19>,
+  Script = Log::ChannelFlag<20>,
+  State = Log::ChannelFlag<21>,
+  Step = Log::ChannelFlag<22>,
+  Symbols = Log::ChannelFlag<23>,
+  SystemRuntime = Log::ChannelFlag<24>,
+  Target = Log::ChannelFlag<25>,
+  Temporary = Log::ChannelFlag<26>,
+  Thread = Log::ChannelFlag<27>,
+  Types = Log::ChannelFlag<28>,
+  Unwind = Log::ChannelFlag<29>,
+  Watchpoints = Log::ChannelFlag<30>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints),
+};
+
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
+// Log Bits specific to logging in lldb
+#define LIBLLDB_LOG_PROCESS ::lldb_private::LLDBLog::Process
+#define LIBLLDB_LOG_THREAD ::lldb_private::LLDBLog::Thread
+#define LIBLLDB_LOG_DYNAMIC_LOADER ::lldb_private::LLDBLog::DynamicLoader
+#define LIBLLDB_LOG_EVENTS ::lldb_private::LLDBLog::Events
+#define LIBLLDB_LOG_BREAKPOINTS ::lldb_private::LLDBLog::Breakpoints
+#define LIBLLDB_LOG_WATCHPOINTS ::lldb_private::LLDBLog::Watchpoints
+#define LIBLLDB_LOG_STEP ::lldb_private::LLDBLog::Step
+#define LIBLLDB_LOG_EXPRESSIONS ::lldb_private::LLDBLog::Expressions
+#define LIBLLDB_LOG_TEMPORARY ::lldb_private::LLDBLog::Temporary
+#define LIBLLDB_LOG_STATE ::lldb_private::LLDBLog::State
+#define LIBLLDB_LOG_OBJECT ::lldb_private::LLDBLog::Object
+#define LIBLLDB_LOG_COMMUNICATION ::lldb_private::LLDBLog::Communication
+#define LIBLLDB_LOG_CONNECTION ::lldb_private::LLDBLog::Connection
+#define LIBLLDB_LOG_HOST ::lldb_private::LLDBLog::Host
+#define LIBLLDB_LOG_UNWIND ::lldb_private::LLDBLog::Unwind
+#define LIBLLDB_LOG_API ::lldb_private::LLDBLog::API
+#define LIBLLDB_LOG_SCRIPT ::lldb_private::LLDBLog::Script
+#define LIBLLDB_LOG_COMMANDS ::lldb_private::LLDBLog::Commands
+#define LIBLLDB_LOG_TYPES ::lldb_private::LLDBLog::Types
+#define LIBLLDB_LOG_SYMBOLS ::lldb_private::LLDBLog::Symbols
+#define LIBLLDB_LOG_MODULES ::lldb_private::LLDBLog::Modules
+#define LIBLLDB_LOG_TARGET ::lldb_private::LLDBLog::Target
+#define LIBLLDB_LOG_MMAP ::lldb_private::LLDBLog::MMap
+#define LIBLLDB_LOG_OS ::lldb_private::LLDBLog::OS
+#define LIBLLDB_LOG_PLATFORM ::lldb_private::LLDBLog::Platform
+#define LIBLLDB_LOG_SYSTEM_RUNTIME ::lldb_private::LLDBLog::SystemRuntime
+#define LIBLLDB_LOG_JIT_LOADER ::lldb_private::LLDBLog::JITLoader
+#define LIBLLDB_LOG_LANGUAGE ::lldb_private::LLDBLog::Language
+#define LIBLLDB_LOG_DATAFORMATTERS ::lldb_private::LLDBLog::DataFormatters
+#define LIBLLDB_LOG_DEMANGLE ::lldb_private::LLDBLog::Demangle
+#define LIBLLDB_LOG_AST ::lldb_private::LLDBLog::AST
 
-Log *GetLogIfAllCategoriesSet(uint32_t mask);
+Log *GetLogIfAllCategoriesSet(LLDBLog mask);
 
-Log *GetLogIfAnyCategoriesSet(uint32_t mask);
+Log *GetLogIfAnyCategoriesSet(LLDBLog mask);
 
 void InitializeLldbChannel();
 
+template <> Log::Channel &LogChannelFor();
 } // namespace lldb_private
 
 #endif // LLDB_UTILITY_LOGGING_H
diff --git a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp
index f4d0803b264a3..7ad88aabc2c03 100644
--- a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp
+++ b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp
@@ -13,16 +13,20 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-  {{"break"}, {"log breakpoints"}, POSIX_LOG_BREAKPOINTS},
-  {{"memory"}, {"log memory reads and writes"}, POSIX_LOG_MEMORY},
-  {{"process"}, {"log process events and activities"}, POSIX_LOG_PROCESS},
-  {{"ptrace"}, {"log all calls to ptrace"}, POSIX_LOG_PTRACE},
-  {{"registers"}, {"log register read/writes"}, POSIX_LOG_REGISTERS},
-  {{"thread"}, {"log thread events and activities"}, POSIX_LOG_THREAD},
-  {{"watch"}, {"log watchpoint related activities"}, POSIX_LOG_WATCHPOINTS},
+    {{"break"}, {"log breakpoints"}, POSIXLog::Breakpoints},
+    {{"memory"}, {"log memory reads and writes"}, POSIXLog::Memory},
+    {{"process"}, {"log process events and activities"}, POSIXLog::Process},
+    {{"ptrace"}, {"log all calls to ptrace"}, POSIXLog::Ptrace},
+    {{"registers"}, {"log register read/writes"}, POSIXLog::Registers},
+    {{"thread"}, {"log thread events and activities"}, POSIXLog::Thread},
+    {{"watch"}, {"log watchpoint related activities"}, POSIXLog::Watchpoints},
 };
 
-Log::Channel ProcessPOSIXLog::g_channel(g_categories, POSIX_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, POSIXLog::Process);
+
+template <> Log::Channel &lldb_private::LogChannelFor() {
+  return g_channel;
+}
 
 void ProcessPOSIXLog::Initialize() {
   static llvm::once_flag g_once_flag;
diff --git a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
index c0147c43410fb..f0807e1d4480d 100644
--- a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
+++ b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
@@ -13,27 +13,35 @@
 
 #include "lldb/Utility/Log.h"
 
-#define POSIX_LOG_PROCESS (1u << 1)
-#define POSIX_LOG_THREAD (1u << 2)
-#define POSIX_LOG_MEMORY (1u << 4) // Log memory reads/writes calls
-#define POSIX_LOG_PTRACE (1u << 5)
-#define POSIX_LOG_REGISTERS (1u << 6)
-#define POSIX_LOG_BREAKPOINTS (1u << 7)
-#define POSIX_LOG_WATCHPOINTS (1u << 8)
-#define POSIX_LOG_ALL (UINT32_MAX)
-#define POSIX_LOG_DEFAULT POSIX_LOG_PROCESS
-
 namespace lldb_private {
-class ProcessPOSIXLog {
-  static Log::Channel g_channel;
 
+enum class POSIXLog : Log::MaskType {
+  Breakpoints = Log::ChannelFlag<0>,
+  Memory = Log::ChannelFlag<1>,
+  Process = Log::ChannelFlag<2>,
+  Ptrace = Log::ChannelFlag<3>,
+  Registers = Log::ChannelFlag<4>,
+  Thread = Log::ChannelFlag<5>,
+  Watchpoints = Log::ChannelFlag<6>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints)
+};
+
+#define POSIX_LOG_PROCESS ::lldb_private::POSIXLog::Process
+#define POSIX_LOG_THREAD ::lldb_private::POSIXLog::Thread
+#define POSIX_LOG_MEMORY ::lldb_private::POSIXLog::Memory
+#define POSIX_LOG_PTRACE ::lldb_private::POSIXLog::Ptrace
+#define POSIX_LOG_REGISTERS ::lldb_private::POSIXLog::Registers
+#define POSIX_LOG_BREAKPOINTS ::lldb_private::POSIXLog::Breakpoints
+#define POSIX_LOG_WATCHPOINTS ::lldb_private::POSIXLog::Watchpoints
+
+class ProcessPOSIXLog {
 public:
   static void Initialize();
 
-  static Log *GetLogIfAllCategoriesSet(uint32_t mask) {
-    return g_channel.GetLogIfAll(mask);
-  }
+  static Log *GetLogIfAllCategoriesSet(POSIXLog mask) { return GetLog(mask); }
 };
-}
+
+template <> Log::Channel &LogChannelFor();
+} // namespace lldb_private
 
 #endif // liblldb_ProcessPOSIXLog_h_
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
index a6749274ca997..123a8198a89b8 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
@@ -1086,7 +1086,7 @@ void GDBRemoteCommunicationServerLLGS::NewSubprocess(
 }
 
 void GDBRemoteCommunicationServerLLGS::DataAvailableCallback() {
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_COMM));
+  Log *log = GetLog(GDBRLog::Comm);
 
   bool interrupt = false;
   bool done = false;
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index 1946520d5d6cb..d8ad0b4e4e4be 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -529,7 +529,7 @@ Status ProcessGDBRemote::WillAttachToProcessWithName(const char *process_name,
 }
 
 Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) {
-  Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
+  Log *log = GetLog(GDBRLog::Process);
 
   Status error(WillLaunchOrAttach());
   if (error.Fail())
@@ -606,8 +606,7 @@ Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) {
                 ReadModuleFromMemory(FileSpec(namebuf), standalone_value);
           }
 
-          Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(
-              LIBLLDB_LOG_DYNAMIC_LOADER));
+          Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER));
           if (module_sp.get()) {
             target.GetImages().AppendIfNeeded(module_sp, false);
 
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp
index 40990ef664943..3322f6b8048ab 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp
@@ -15,25 +15,29 @@ using namespace lldb_private;
 using namespace lldb_private::process_gdb_remote;
 
 static constexpr Log::Category g_categories[] = {
-    {{"async"}, {"log asynchronous activity"}, GDBR_LOG_ASYNC},
-    {{"break"}, {"log breakpoints"}, GDBR_LOG_BREAKPOINTS},
-    {{"comm"}, {"log communication activity"}, GDBR_LOG_COMM},
-    {{"packets"}, {"log gdb remote packets"}, GDBR_LOG_PACKETS},
-    {{"memory"}, {"log memory reads and writes"}, GDBR_LOG_MEMORY},
+    {{"async"}, {"log asynchronous activity"}, GDBRLog::Async},
+    {{"break"}, {"log breakpoints"}, GDBRLog::Breakpoints},
+    {{"comm"}, {"log communication activity"}, GDBRLog::Comm},
+    {{"packets"}, {"log gdb remote packets"}, GDBRLog::Packets},
+    {{"memory"}, {"log memory reads and writes"}, GDBRLog::Memory},
     {{"data-short"},
      {"log memory bytes for memory reads and writes for short transactions "
       "only"},
-     GDBR_LOG_MEMORY_DATA_SHORT},
+     GDBRLog::MemoryDataShort},
     {{"data-long"},
      {"log memory bytes for memory reads and writes for all transactions"},
-     GDBR_LOG_MEMORY_DATA_LONG},
-    {{"process"}, {"log process events and activities"}, GDBR_LOG_PROCESS},
-    {{"step"}, {"log step related activities"}, GDBR_LOG_STEP},
-    {{"thread"}, {"log thread events and activities"}, GDBR_LOG_THREAD},
-    {{"watch"}, {"log watchpoint related activities"}, GDBR_LOG_WATCHPOINTS},
+     GDBRLog::MemoryDataLong},
+    {{"process"}, {"log process events and activities"}, GDBRLog::Process},
+    {{"step"}, {"log step related activities"}, GDBRLog::Step},
+    {{"thread"}, {"log thread events and activities"}, GDBRLog::Thread},
+    {{"watch"}, {"log watchpoint related activities"}, GDBRLog::Watchpoints},
 };
 
-Log::Channel ProcessGDBRemoteLog::g_channel(g_categories, GDBR_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, GDBRLog::Packets);
+
+template <> Log::Channel &lldb_private::LogChannelFor() {
+  return g_channel;
+}
 
 void ProcessGDBRemoteLog::Initialize() {
   static llvm::once_flag g_once_flag;
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h
index bd3e993cf72ac..44e390ec8cadb 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h
@@ -11,35 +11,52 @@
 
 #include "lldb/Utility/Log.h"
 
-#define GDBR_LOG_PROCESS (1u << 1)
-#define GDBR_LOG_THREAD (1u << 2)
-#define GDBR_LOG_PACKETS (1u << 3)
-#define GDBR_LOG_MEMORY (1u << 4) // Log memory reads/writes calls
-#define GDBR_LOG_MEMORY_DATA_SHORT                                             \
-  (1u << 5) // Log short memory reads/writes bytes
-#define GDBR_LOG_MEMORY_DATA_LONG (1u << 6) // Log all memory reads/writes bytes
-#define GDBR_LOG_BREAKPOINTS (1u << 7)
-#define GDBR_LOG_WATCHPOINTS (1u << 8)
-#define GDBR_LOG_STEP (1u << 9)
-#define GDBR_LOG_COMM (1u << 10)
-#define GDBR_LOG_ASYNC (1u << 11)
-#define GDBR_LOG_ALL (UINT32_MAX)
-#define GDBR_LOG_DEFAULT GDBR_LOG_PACKETS
-
 namespace lldb_private {
 namespace process_gdb_remote {
 
-class ProcessGDBRemoteLog {
-  static Log::Channel g_channel;
+enum class GDBRLog : Log::MaskType {
+  Async = Log::ChannelFlag<0>,
+  Breakpoints = Log::ChannelFlag<1>,
+  Comm = Log::ChannelFlag<2>,
+  Memory = Log::ChannelFlag<3>,          // Log memory reads/writes calls
+  MemoryDataLong = Log::ChannelFlag<4>,  // Log all memory reads/writes bytes
+  MemoryDataShort = Log::ChannelFlag<5>, // Log short memory reads/writes bytes
+  Packets = Log::ChannelFlag<6>,
+  Process = Log::ChannelFlag<7>,
+  Step = Log::ChannelFlag<8>,
+  Thread = Log::ChannelFlag<9>,
+  Watchpoints = Log::ChannelFlag<10>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints)
+};
 
+#define GDBR_LOG_PROCESS ::lldb_private::process_gdb_remote::GDBRLog::Process
+#define GDBR_LOG_THREAD ::lldb_private::process_gdb_remote::GDBRLog::Thread
+#define GDBR_LOG_PACKETS ::lldb_private::process_gdb_remote::GDBRLog::Packets
+#define GDBR_LOG_MEMORY ::lldb_private::process_gdb_remote::GDBRLog::Memory
+#define GDBR_LOG_MEMORY_DATA_SHORT                                             \
+  ::lldb_private::process_gdb_remote::GDBRLog::MemoryDataShort
+#define GDBR_LOG_MEMORY_DATA_LONG                                              \
+  ::lldb_private::process_gdb_remote::GDBRLog::MemoryDataLong
+#define GDBR_LOG_BREAKPOINTS                                                   \
+  ::lldb_private::process_gdb_remote::GDBRLog::Breakpoints
+#define GDBR_LOG_WATCHPOINTS                                                   \
+  ::lldb_private::process_gdb_remote::GDBRLog::Watchpoints
+#define GDBR_LOG_STEP ::lldb_private::process_gdb_remote::GDBRLog::Step
+#define GDBR_LOG_COMM ::lldb_private::process_gdb_remote::GDBRLog::Comm
+#define GDBR_LOG_ASYNC ::lldb_private::process_gdb_remote::GDBRLog::Async
+
+class ProcessGDBRemoteLog {
 public:
   static void Initialize();
 
-  static Log *GetLogIfAllCategoriesSet(uint32_t mask) { return g_channel.GetLogIfAll(mask); }
-  static Log *GetLogIfAnyCategoryIsSet(uint32_t mask) { return g_channel.GetLogIfAny(mask); }
+  static Log *GetLogIfAllCategoriesSet(GDBRLog mask) { return GetLog(mask); }
+  static Log *GetLogIfAnyCategoryIsSet(GDBRLog mask) { return GetLog(mask); }
 };
 
 } // namespace process_gdb_remote
+
+template <> Log::Channel &LogChannelFor();
+
 } // namespace lldb_private
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_GDB_REMOTE_PROCESSGDBREMOTELOG_H
diff --git a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
index 2a9896e41085c..ba73115e4ad60 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
@@ -39,7 +39,7 @@ ThreadGDBRemote::ThreadGDBRemote(Process &process, lldb::tid_t tid)
       m_dispatch_queue_t(LLDB_INVALID_ADDRESS), m_queue_kind(eQueueKindUnknown),
       m_queue_serial_number(LLDB_INVALID_QUEUE_ID),
       m_associated_with_libdispatch_queue(eLazyBoolCalculate) {
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOG(log, "this = {0}, pid = {1}, tid = {2}", this, process.GetID(),
            GetID());
   // At this point we can clone reg_info for architectures supporting
@@ -54,7 +54,7 @@ ThreadGDBRemote::ThreadGDBRemote(Process &process, lldb::tid_t tid)
 
 ThreadGDBRemote::~ThreadGDBRemote() {
   ProcessSP process_sp(GetProcess());
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOG(log, "this = {0}, pid = {1}, tid = {2}", this,
            process_sp ? process_sp->GetID() : LLDB_INVALID_PROCESS_ID, GetID());
   DestroyThread();
@@ -222,7 +222,7 @@ void ThreadGDBRemote::SetAssociatedWithLibdispatchQueue(
 StructuredData::ObjectSP ThreadGDBRemote::FetchThreadExtendedInfo() {
   StructuredData::ObjectSP object_sp;
   const lldb::user_id_t tid = GetProtocolID();
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOGF(log, "Fetching extended information for thread %4.4" PRIx64, tid);
   ProcessSP process_sp(GetProcess());
   if (process_sp) {
@@ -236,7 +236,7 @@ StructuredData::ObjectSP ThreadGDBRemote::FetchThreadExtendedInfo() {
 void ThreadGDBRemote::WillResume(StateType resume_state) {
   int signo = GetResumeSignal();
   const lldb::user_id_t tid = GetProtocolID();
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOGF(log, "Resuming thread: %4.4" PRIx64 " with state: %s.", tid,
             StateAsCString(resume_state));
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp
index 3f1d6677bacf2..d2b8fe19db530 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp
@@ -13,18 +13,20 @@ using namespace lldb_private;
 static constexpr Log::Category g_categories[] = {
     {{"comp"},
      {"log insertions of object files into DWARF debug maps"},
-     DWARF_LOG_TYPE_COMPLETION},
-    {{"info"}, {"log the parsing of .debug_info"}, DWARF_LOG_DEBUG_INFO},
-    {{"line"}, {"log the parsing of .debug_line"}, DWARF_LOG_DEBUG_LINE},
+     DWARFLog::TypeCompletion},
+    {{"info"}, {"log the parsing of .debug_info"}, DWARFLog::DebugInfo},
+    {{"line"}, {"log the parsing of .debug_line"}, DWARFLog::DebugLine},
     {{"lookups"},
      {"log any lookups that happen by name, regex, or address"},
-     DWARF_LOG_LOOKUPS},
-    {{"map"},
-     {"log struct/unions/class type completions"},
-     DWARF_LOG_DEBUG_MAP},
+     DWARFLog::Lookups},
+    {{"map"}, {"log struct/unions/class type completions"}, DWARFLog::DebugMap},
 };
 
-Log::Channel LogChannelDWARF::g_channel(g_categories, DWARF_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, DWARFLog::DebugInfo);
+
+template <> Log::Channel &lldb_private::LogChannelFor() {
+  return g_channel;
+}
 
 void LogChannelDWARF::Initialize() {
   Log::Register("dwarf", g_channel);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h
index 2fc23563ef938..8076c719e9c46 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h
@@ -11,25 +11,32 @@
 
 #include "lldb/Utility/Log.h"
 
-#define DWARF_LOG_DEBUG_INFO (1u << 1)
-#define DWARF_LOG_DEBUG_LINE (1u << 2)
-#define DWARF_LOG_LOOKUPS (1u << 3)
-#define DWARF_LOG_TYPE_COMPLETION (1u << 4)
-#define DWARF_LOG_DEBUG_MAP (1u << 5)
-#define DWARF_LOG_ALL (UINT32_MAX)
-#define DWARF_LOG_DEFAULT (DWARF_LOG_DEBUG_INFO)
-
 namespace lldb_private {
-class LogChannelDWARF {
-  static Log::Channel g_channel;
 
+enum class DWARFLog : Log::MaskType {
+  DebugInfo = Log::ChannelFlag<0>,
+  DebugLine = Log::ChannelFlag<1>,
+  DebugMap = Log::ChannelFlag<2>,
+  Lookups = Log::ChannelFlag<3>,
+  TypeCompletion = Log::ChannelFlag<4>,
+  LLVM_MARK_AS_BITMASK_ENUM(TypeCompletion)
+};
+#define DWARF_LOG_DEBUG_INFO ::lldb_private::DWARFLog::DebugInfo
+#define DWARF_LOG_DEBUG_LINE ::lldb_private::DWARFLog::DebugLine
+#define DWARF_LOG_LOOKUPS ::lldb_private::DWARFLog::Lookups
+#define DWARF_LOG_TYPE_COMPLETION ::lldb_private::DWARFLog::TypeCompletion
+#define DWARF_LOG_DEBUG_MAP ::lldb_private::DWARFLog::DebugMap
+
+class LogChannelDWARF {
 public:
   static void Initialize();
   static void Terminate();
 
-  static Log *GetLogIfAll(uint32_t mask) { return g_channel.GetLogIfAll(mask); }
-  static Log *GetLogIfAny(uint32_t mask) { return g_channel.GetLogIfAny(mask); }
+  static Log *GetLogIfAll(DWARFLog mask) { return GetLog(mask); }
+  static Log *GetLogIfAny(DWARFLog mask) { return GetLog(mask); }
 };
-}
+
+template <> Log::Channel &LogChannelFor();
+} // namespace lldb_private
 
 #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_LOGCHANNELDWARF_H
diff --git a/lldb/source/Utility/Log.cpp b/lldb/source/Utility/Log.cpp
index 26070d0740b19..d229538073d10 100644
--- a/lldb/source/Utility/Log.cpp
+++ b/lldb/source/Utility/Log.cpp
@@ -88,7 +88,7 @@ void Log::Enable(const std::shared_ptr &stream_sp,
                  uint32_t options, uint32_t flags) {
   llvm::sys::ScopedWriter lock(m_mutex);
 
-  uint32_t mask = m_mask.fetch_or(flags, std::memory_order_relaxed);
+  MaskType mask = m_mask.fetch_or(flags, std::memory_order_relaxed);
   if (mask | flags) {
     m_options.store(options, std::memory_order_relaxed);
     m_stream_sp = stream_sp;
@@ -99,7 +99,7 @@ void Log::Enable(const std::shared_ptr &stream_sp,
 void Log::Disable(uint32_t flags) {
   llvm::sys::ScopedWriter lock(m_mutex);
 
-  uint32_t mask = m_mask.fetch_and(~flags, std::memory_order_relaxed);
+  MaskType mask = m_mask.fetch_and(~flags, std::memory_order_relaxed);
   if (!(mask & ~flags)) {
     m_stream_sp.reset();
     m_channel.log_ptr.store(nullptr, std::memory_order_relaxed);
diff --git a/lldb/source/Utility/Logging.cpp b/lldb/source/Utility/Logging.cpp
index 4648bec502c54..67d5d3af2640c 100644
--- a/lldb/source/Utility/Logging.cpp
+++ b/lldb/source/Utility/Logging.cpp
@@ -16,49 +16,74 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-  {{"api"}, {"log API calls and return values"}, LIBLLDB_LOG_API},
-  {{"ast"}, {"log AST"}, LIBLLDB_LOG_AST},
-  {{"break"}, {"log breakpoints"}, LIBLLDB_LOG_BREAKPOINTS},
-  {{"commands"}, {"log command argument parsing"}, LIBLLDB_LOG_COMMANDS},
-  {{"comm"}, {"log communication activities"}, LIBLLDB_LOG_COMMUNICATION},
-  {{"conn"}, {"log connection details"}, LIBLLDB_LOG_CONNECTION},
-  {{"demangle"}, {"log mangled names to catch demangler crashes"}, LIBLLDB_LOG_DEMANGLE},
-  {{"dyld"}, {"log shared library related activities"}, LIBLLDB_LOG_DYNAMIC_LOADER},
-  {{"event"}, {"log broadcaster, listener and event queue activities"}, LIBLLDB_LOG_EVENTS},
-  {{"expr"}, {"log expressions"}, LIBLLDB_LOG_EXPRESSIONS},
-  {{"formatters"}, {"log data formatters related activities"}, LIBLLDB_LOG_DATAFORMATTERS},
-  {{"host"}, {"log host activities"}, LIBLLDB_LOG_HOST},
-  {{"jit"}, {"log JIT events in the target"}, LIBLLDB_LOG_JIT_LOADER},
-  {{"language"}, {"log language runtime events"}, LIBLLDB_LOG_LANGUAGE},
-  {{"mmap"}, {"log mmap related activities"}, LIBLLDB_LOG_MMAP},
-  {{"module"}, {"log module activities such as when modules are created, destroyed, replaced, and more"}, LIBLLDB_LOG_MODULES},
-  {{"object"}, {"log object construction/destruction for important objects"}, LIBLLDB_LOG_OBJECT},
-  {{"os"}, {"log OperatingSystem plugin related activities"}, LIBLLDB_LOG_OS},
-  {{"platform"}, {"log platform events and activities"}, LIBLLDB_LOG_PLATFORM},
-  {{"process"}, {"log process events and activities"}, LIBLLDB_LOG_PROCESS},
-  {{"script"}, {"log events about the script interpreter"}, LIBLLDB_LOG_SCRIPT},
-  {{"state"}, {"log private and public process state changes"}, LIBLLDB_LOG_STATE},
-  {{"step"}, {"log step related activities"}, LIBLLDB_LOG_STEP},
-  {{"symbol"}, {"log symbol related issues and warnings"}, LIBLLDB_LOG_SYMBOLS},
-  {{"system-runtime"}, {"log system runtime events"}, LIBLLDB_LOG_SYSTEM_RUNTIME},
-  {{"target"}, {"log target events and activities"}, LIBLLDB_LOG_TARGET},
-  {{"temp"}, {"log internal temporary debug messages"}, LIBLLDB_LOG_TEMPORARY},
-  {{"thread"}, {"log thread events and activities"}, LIBLLDB_LOG_THREAD},
-  {{"types"}, {"log type system related activities"}, LIBLLDB_LOG_TYPES},
-  {{"unwind"}, {"log stack unwind activities"}, LIBLLDB_LOG_UNWIND},
-  {{"watch"}, {"log watchpoint related activities"}, LIBLLDB_LOG_WATCHPOINTS},
+    {{"api"}, {"log API calls and return values"}, LLDBLog::API},
+    {{"ast"}, {"log AST"}, LLDBLog::AST},
+    {{"break"}, {"log breakpoints"}, LLDBLog::Breakpoints},
+    {{"commands"}, {"log command argument parsing"}, LLDBLog::Commands},
+    {{"comm"}, {"log communication activities"}, LLDBLog::Communication},
+    {{"conn"}, {"log connection details"}, LLDBLog::Connection},
+    {{"demangle"},
+     {"log mangled names to catch demangler crashes"},
+     LLDBLog::Demangle},
+    {{"dyld"},
+     {"log shared library related activities"},
+     LLDBLog::DynamicLoader},
+    {{"event"},
+     {"log broadcaster, listener and event queue activities"},
+     LLDBLog::Events},
+    {{"expr"}, {"log expressions"}, LLDBLog::Expressions},
+    {{"formatters"},
+     {"log data formatters related activities"},
+     LLDBLog::DataFormatters},
+    {{"host"}, {"log host activities"}, LLDBLog::Host},
+    {{"jit"}, {"log JIT events in the target"}, LLDBLog::JITLoader},
+    {{"language"}, {"log language runtime events"}, LLDBLog::Language},
+    {{"mmap"}, {"log mmap related activities"}, LLDBLog::MMap},
+    {{"module"},
+     {"log module activities such as when modules are created, destroyed, "
+      "replaced, and more"},
+     LLDBLog::Modules},
+    {{"object"},
+     {"log object construction/destruction for important objects"},
+     LLDBLog::Object},
+    {{"os"}, {"log OperatingSystem plugin related activities"}, LLDBLog::OS},
+    {{"platform"}, {"log platform events and activities"}, LLDBLog::Platform},
+    {{"process"}, {"log process events and activities"}, LLDBLog::Process},
+    {{"script"}, {"log events about the script interpreter"}, LLDBLog::Script},
+    {{"state"},
+     {"log private and public process state changes"},
+     LLDBLog::State},
+    {{"step"}, {"log step related activities"}, LLDBLog::Step},
+    {{"symbol"}, {"log symbol related issues and warnings"}, LLDBLog::Symbols},
+    {{"system-runtime"}, {"log system runtime events"}, LLDBLog::SystemRuntime},
+    {{"target"}, {"log target events and activities"}, LLDBLog::Target},
+    {{"temp"}, {"log internal temporary debug messages"}, LLDBLog::Temporary},
+    {{"thread"}, {"log thread events and activities"}, LLDBLog::Thread},
+    {{"types"}, {"log type system related activities"}, LLDBLog::Types},
+    {{"unwind"}, {"log stack unwind activities"}, LLDBLog::Unwind},
+    {{"watch"}, {"log watchpoint related activities"}, LLDBLog::Watchpoints},
 };
 
-static Log::Channel g_log_channel(g_categories, LIBLLDB_LOG_DEFAULT);
+static Log::Channel g_log_channel(g_categories,
+                                  LLDBLog::Process | LLDBLog::Thread |
+                                      LLDBLog::DynamicLoader |
+                                      LLDBLog::Breakpoints |
+                                      LLDBLog::Watchpoints | LLDBLog::Step |
+                                      LLDBLog::State | LLDBLog::Symbols |
+                                      LLDBLog::Target | LLDBLog::Commands);
+
+template <> Log::Channel &lldb_private::LogChannelFor() {
+  return g_log_channel;
+}
 
 void lldb_private::InitializeLldbChannel() {
   Log::Register("lldb", g_log_channel);
 }
 
-Log *lldb_private::GetLogIfAllCategoriesSet(uint32_t mask) {
-  return g_log_channel.GetLogIfAll(mask);
+Log *lldb_private::GetLogIfAllCategoriesSet(LLDBLog mask) {
+  return GetLog(mask);
 }
 
-Log *lldb_private::GetLogIfAnyCategoriesSet(uint32_t mask) {
-  return g_log_channel.GetLogIfAny(mask);
+Log *lldb_private::GetLogIfAnyCategoriesSet(LLDBLog mask) {
+  return GetLog(mask);
 }
diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp
index 906ae4c378b6b..7648a0bb668d0 100644
--- a/lldb/tools/lldb-server/lldb-gdbserver.cpp
+++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp
@@ -27,6 +27,7 @@
 #include "lldb/Host/Socket.h"
 #include "lldb/Host/common/NativeProcessProtocol.h"
 #include "lldb/Target/Process.h"
+#include "lldb/Utility/Logging.h"
 #include "lldb/Utility/Status.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Option/ArgList.h"
diff --git a/lldb/unittests/Utility/LogTest.cpp b/lldb/unittests/Utility/LogTest.cpp
index dfbb1f092146c..b3a5a7af44da3 100644
--- a/lldb/unittests/Utility/LogTest.cpp
+++ b/lldb/unittests/Utility/LogTest.cpp
@@ -18,13 +18,24 @@
 using namespace lldb;
 using namespace lldb_private;
 
-enum { FOO = 1, BAR = 2 };
+enum class TestChannel : Log::MaskType {
+  FOO = Log::ChannelFlag<0>,
+  BAR = Log::ChannelFlag<1>,
+  LLVM_MARK_AS_BITMASK_ENUM(BAR),
+};
+
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
 static constexpr Log::Category test_categories[] = {
-    {{"foo"}, {"log foo"}, FOO}, {{"bar"}, {"log bar"}, BAR},
+    {{"foo"}, {"log foo"}, TestChannel::FOO},
+    {{"bar"}, {"log bar"}, TestChannel::BAR},
 };
-static constexpr uint32_t default_flags = FOO;
 
-static Log::Channel test_channel(test_categories, default_flags);
+static Log::Channel test_channel(test_categories, TestChannel::FOO);
+
+namespace lldb_private {
+template <> Log::Channel &LogChannelFor() { return test_channel; }
+} // namespace lldb_private
 
 // Wrap enable, disable and list functions to make them easier to test.
 static bool EnableChannel(std::shared_ptr stream_sp,
@@ -93,7 +104,7 @@ void LogChannelEnabledTest::SetUp() {
   std::string error;
   ASSERT_TRUE(EnableChannel(m_stream_sp, 0, "chan", {}, error));
 
-  m_log = test_channel.GetLogIfAll(FOO);
+  m_log = GetLog(TestChannel::FOO);
   ASSERT_NE(nullptr, m_log);
 }
 
@@ -124,18 +135,18 @@ TEST(LogTest, Register) {
 TEST(LogTest, Unregister) {
   llvm::llvm_shutdown_obj obj;
   Log::Register("chan", test_channel);
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAny(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr stream_sp(
       new llvm::raw_string_ostream(message));
   EXPECT_TRUE(Log::EnableLogChannel(stream_sp, 0, "chan", {"foo"}, llvm::nulls()));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAny(FOO));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
   Log::Unregister("chan");
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAny(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
 }
 
 TEST_F(LogChannelTest, Enable) {
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr stream_sp(
       new llvm::raw_string_ostream(message));
@@ -144,20 +155,22 @@ TEST_F(LogChannelTest, Enable) {
   EXPECT_EQ("Invalid log channel 'chanchan'.\n", error);
 
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(BAR));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::BAR));
 
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {"bar"}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO | BAR));
+  EXPECT_NE(nullptr, test_channel.GetLogIfAll(
+                         Log::MaskType(TestChannel::FOO | TestChannel::BAR)));
 
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {"baz"}, error));
   EXPECT_NE(std::string::npos, error.find("unrecognized log category 'baz'"))
       << "error: " << error;
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO | BAR));
+  EXPECT_NE(nullptr, test_channel.GetLogIfAll(
+                         Log::MaskType(TestChannel::FOO | TestChannel::BAR)));
 }
 
 TEST_F(LogChannelTest, EnableOptions) {
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr stream_sp(
       new llvm::raw_string_ostream(message));
@@ -165,32 +178,33 @@ TEST_F(LogChannelTest, EnableOptions) {
   EXPECT_TRUE(
       EnableChannel(stream_sp, LLDB_LOG_OPTION_VERBOSE, "chan", {}, error));
 
-  Log *log = test_channel.GetLogIfAll(FOO);
+  Log *log = GetLog(TestChannel::FOO);
   ASSERT_NE(nullptr, log);
   EXPECT_TRUE(log->GetVerbose());
 }
 
 TEST_F(LogChannelTest, Disable) {
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr stream_sp(
       new llvm::raw_string_ostream(message));
   std::string error;
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {"foo", "bar"}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO | BAR));
+  EXPECT_NE(nullptr, test_channel.GetLogIfAll(
+                         Log::MaskType(TestChannel::FOO | TestChannel::BAR)));
 
   EXPECT_TRUE(DisableChannel("chan", {"bar"}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(BAR));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::BAR));
 
   EXPECT_TRUE(DisableChannel("chan", {"baz"}, error));
   EXPECT_NE(std::string::npos, error.find("unrecognized log category 'baz'"))
       << "error: " << error;
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(BAR));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::BAR));
 
   EXPECT_TRUE(DisableChannel("chan", {}, error));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAny(FOO | BAR));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO | TestChannel::BAR));
 }
 
 TEST_F(LogChannelTest, List) {
@@ -309,5 +323,5 @@ TEST_F(LogChannelEnabledTest, LogGetLogThread) {
 
   // The mask should be either zero of "FOO". In either case, we should not trip
   // any undefined behavior (run the test under TSAN to verify this).
-  EXPECT_THAT(mask, testing::AnyOf(0, FOO));
+  EXPECT_THAT(mask, testing::AnyOf(0, Log::MaskType(TestChannel::FOO)));
 }

From ce6903595b7161f881b62834c55b3099853cabd5 Mon Sep 17 00:00:00 2001
From: Pavel Labath 
Date: Tue, 25 Jan 2022 12:10:28 +0100
Subject: [PATCH 529/946] [lldb/test] Use abspath when searching for lldb.exe

realpath is too aggressive and does not produce the desired effect if
ones build folder is a symlink farm.
---
 lldb/packages/Python/lldbsuite/test/dotest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py
index 0815188d6cde8..ce01146055b2c 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest.py
@@ -357,7 +357,7 @@ def parseOptionsAndInitTestdirs():
 
     if args.executable:
         # lldb executable is passed explicitly
-        lldbtest_config.lldbExec = os.path.realpath(args.executable)
+        lldbtest_config.lldbExec = os.path.abspath(args.executable)
         if not is_exe(lldbtest_config.lldbExec):
             lldbtest_config.lldbExec = which(args.executable)
         if not is_exe(lldbtest_config.lldbExec):

From 109cc5adccaec4c2264c0db3d54bbec1183bf95d Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson 
Date: Wed, 12 Jan 2022 00:34:11 +0100
Subject: [PATCH 530/946] [DAGCombine] Fold SRA of a load into a narrower
 sign-extending load

An sra is basically sign-extending a narrower value. Fold away the
shift by doing a sextload of a narrower value, when it is legal to
reduce the load width accordingly.

Differential Revision: https://reviews.llvm.org/D116930
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 ++++++++++++-------
 llvm/test/CodeGen/PowerPC/pr13891.ll          |  2 +-
 llvm/test/CodeGen/X86/combine-sra-load.ll     | 20 +++++++---------
 3 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1137f8b16977f..c3d2ed2dcf85c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8964,6 +8964,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
     return MULH;
 
+  // Attempt to convert a sra of a load into a narrower sign-extending load.
+  if (SDValue NarrowLoad = reduceLoadWidth(N))
+    return NarrowLoad;
+
   return SDValue();
 }
 
@@ -12151,10 +12155,10 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
     ExtVT = cast(N->getOperand(1))->getVT();
-  } else if (Opc == ISD::SRL) {
-    // Another special-case: SRL is basically zero-extending a narrower value,
-    // or it may be shifting a higher subword, half or byte into the lowest
-    // bits.
+  } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
+    // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
+    // value, or it may be shifting a higher subword, half or byte into the
+    // lowest bits.
 
     // Only handle shift with constant shift amount, and the shiftee must be a
     // load.
@@ -12168,13 +12172,16 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
     if (MemoryWidth <= ShAmt)
       return SDValue();
-    // Attempt to fold away the SRL by using ZEXTLOAD.
-    ExtType = ISD::ZEXTLOAD;
+    // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
+    ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
     // If original load is a SEXTLOAD then we can't simply replace it by a
     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
-    // followed by a ZEXT, but that is not handled at the moment).
-    if (LN->getExtensionType() == ISD::SEXTLOAD)
+    // followed by a ZEXT, but that is not handled at the moment). Similarly if
+    // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
+    if ((LN->getExtensionType() == ISD::SEXTLOAD ||
+         LN->getExtensionType() == ISD::ZEXTLOAD) &&
+        LN->getExtensionType() != ExtType)
       return SDValue();
   } else if (Opc == ISD::AND) {
     // An AND with a constant mask is the same as a truncate + zero-extend.
diff --git a/llvm/test/CodeGen/PowerPC/pr13891.ll b/llvm/test/CodeGen/PowerPC/pr13891.ll
index f35a0a724bfd4..816166a20fedc 100644
--- a/llvm/test/CodeGen/PowerPC/pr13891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr13891.ll
@@ -7,7 +7,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline {
 ; CHECK-LABEL: _Z5check3foos:
 ; CHECK: sth 3, {{[0-9]+}}(1)
-; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
+; CHECK: lbz {{[0-9]+}}, {{[0-9]+}}(1)
 entry:
   %0 = bitcast %struct.foo* %f to i16*
   %1 = load i16, i16* %0, align 2
diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll
index 119acaa6a02b5..ba5814f0f160d 100644
--- a/llvm/test/CodeGen/X86/combine-sra-load.ll
+++ b/llvm/test/CodeGen/X86/combine-sra-load.ll
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
 
-; FIXME: fold (sra (load i32), 16)) -> (sextload i16)
+; fold (sra (load i32), 16)) -> (sextload i16)
 define i32 @sra_half(i32* %p) {
 ; CHECK-LABEL: sra_half:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    sarl $16, %eax
+; CHECK-NEXT:    movswl 2(%rdi), %eax
 ; CHECK-NEXT:    retq
   %load = load i32, i32* %p
   %shift = ashr i32 %load, 16
@@ -25,12 +24,11 @@ define <4 x i32> @sra_half_vec(<4 x i32>* %p) {
   ret <4 x i32> %shift
 }
 
-; FIXME: fold (sra (load i64), 48)) -> (sextload i16)
+; fold (sra (load i64), 48)) -> (sextload i16)
 define i64 @sra_large_shift(i64* %r) {
 ; CHECK-LABEL: sra_large_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    sarq $48, %rax
+; CHECK-NEXT:    movswq 6(%rdi), %rax
 ; CHECK-NEXT:    retq
   %t0 = load i64, i64* %r
   %conv = ashr i64 %t0, 48
@@ -61,12 +59,11 @@ define i32 @sra_of_zextload(i16* %p) {
   ret i32 %shift
 }
 
-; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8)
+; fold (sra (sextload i16 to i32), 8) -> (sextload i8)
 define i32 @sra_of_sextload(i16* %p) {
 ; CHECK-LABEL: sra_of_sextload:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movswl (%rdi), %eax
-; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    movsbl 1(%rdi), %eax
 ; CHECK-NEXT:    retq
   %load = load i16, i16* %p
   %sext = sext i16 %load to i32
@@ -89,12 +86,11 @@ define i32 @sra_of_sextload_no_fold(i16* %p) {
   ret i32 %shift
 }
 
-; FIXME: Fold even if SRA has multiple uses.
+; Fold even if SRA has multiple uses.
 define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {
 ; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %ecx
-; CHECK-NEXT:    sarl $16, %ecx
+; CHECK-NEXT:    movswl 2(%rdi), %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    xorl $6, %eax
 ; CHECK-NEXT:    orl %ecx, %eax

From dbbe0109086d8e813fbabf3114da737086250ff9 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Tue, 25 Jan 2022 19:24:49 +0800
Subject: [PATCH 531/946] [MLIR] [AsyncToLLVM] Use llvm.coro.align intrinsic

Use llvm.coro.align to align coroutine frame properly.

Reviewed By: bkramer

Differential Revision: https://reviews.llvm.org/D117978
---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  4 +++
 .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp    | 26 +++++++++++--------
 .../AsyncToLLVM/convert-coro-to-llvm.mlir     | 14 +++++-----
 .../test/Target/LLVMIR/llvmir-intrinsics.mlir |  9 +++++++
 4 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 58d8cc0ec7362..946e674fab66f 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1542,6 +1542,10 @@ def LLVM_CoroSizeOp : LLVM_IntrOp<"coro.size", [0], [], [], 1> {
   let assemblyFormat = "attr-dict `:` type($res)";
 }
 
+def LLVM_CoroAlignOp : LLVM_IntrOp<"coro.align", [0], [], [], 1> {
+  let assemblyFormat = "attr-dict `:` type($res)";
+}
+
 def LLVM_CoroSaveOp : LLVM_IntrOp<"coro.save", [], [], [], 1> {
   let arguments = (ins LLVM_i8Ptr:$handle);
   let assemblyFormat = "$handle attr-dict `:` type($res)";
diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
index f0342f3660fe6..5e2bc50cc886f 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
+++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
@@ -335,27 +335,31 @@ class CoroBeginOpConversion : public OpConversionPattern {
     // Get coroutine frame size: @llvm.coro.size.i64.
     Value coroSize =
         rewriter.create(loc, rewriter.getI64Type());
-    // The coroutine lowering doesn't properly account for alignment of the
-    // frame, so align everything to 64 bytes which ought to be enough for
-    // everyone. https://llvm.org/PR53148
-    constexpr int64_t coroAlign = 64;
+    // Get coroutine frame alignment: @llvm.coro.align.i64.
+    Value coroAlign =
+        rewriter.create(loc, rewriter.getI64Type());
+
+    // Round up the size to be multiple of the alignment. Since aligned_alloc
+    // requires the size parameter be an integral multiple of the alignment
+    // parameter.
     auto makeConstant = [&](uint64_t c) {
       return rewriter.create(
           op->getLoc(), rewriter.getI64Type(), rewriter.getI64IntegerAttr(c));
     };
-    // Round up the size to the alignment. This is a requirement of
-    // aligned_alloc.
-    coroSize = rewriter.create(op->getLoc(), coroSize,
-                                            makeConstant(coroAlign - 1));
-    coroSize = rewriter.create(op->getLoc(), coroSize,
-                                            makeConstant(-coroAlign));
+    coroSize = rewriter.create(op->getLoc(), coroSize, coroAlign);
+    coroSize =
+        rewriter.create(op->getLoc(), coroSize, makeConstant(1));
+    Value NegCoroAlign =
+        rewriter.create(op->getLoc(), makeConstant(0), coroAlign);
+    coroSize =
+        rewriter.create(op->getLoc(), coroSize, NegCoroAlign);
 
     // Allocate memory for the coroutine frame.
     auto allocFuncOp = LLVM::lookupOrCreateAlignedAllocFn(
         op->getParentOfType(), rewriter.getI64Type());
     auto coroAlloc = rewriter.create(
         loc, i8Ptr, SymbolRefAttr::get(allocFuncOp),
-        ValueRange{makeConstant(coroAlign), coroSize});
+        ValueRange{coroAlign, coroSize});
 
     // Begin a coroutine: @llvm.coro.begin.
     auto coroId = CoroBeginOpAdaptor(adaptor.getOperands()).id();
diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
index 85623772249f9..f84c5960c4200 100644
--- a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
@@ -14,12 +14,14 @@ func @coro_begin() {
   // CHECK: %[[ID:.*]] = llvm.intr.coro.id
   %0 = async.coro.id
   // CHECK: %[[SIZE:.*]] = llvm.intr.coro.size : i64
-  // CHECK: %[[C63:.*]] = llvm.mlir.constant(63 : i64) : i64
-  // CHECK: %[[SIZE2:.*]] = llvm.add %[[SIZE]], %[[C63]] : i64
-  // CHECK: %[[CN64:.*]] = llvm.mlir.constant(-64 : i64) : i64
-  // CHECK: %[[SIZE3:.*]] = llvm.and %[[SIZE2]], %[[CN64]] : i64
-  // CHECK: %[[ALIGN:.*]] = llvm.mlir.constant(64 : i64) : i64
-  // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[SIZE3]])
+  // CHECK: %[[ALIGN:.*]] = llvm.intr.coro.align : i64
+  // CHECK: %[[SIZE_PLUS_ALIGN:.*]] = llvm.add %[[SIZE]], %[[ALIGN]] : i64
+  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[SIZE_PLUS_ALIGN_MINUS_ONE:.*]] = llvm.sub %[[SIZE_PLUS_ALIGN]], %[[C1]] : i64
+  // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[NEGATED_ALIGN:.*]] = llvm.sub %[[C0]], %[[ALIGN]]  : i64
+  // CHECK: %[[ROUNDED_SIZE:.*]] = llvm.and %[[SIZE_PLUS_ALIGN_MINUS_ONE]], %[[NEGATED_ALIGN]] : i64
+  // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[ROUNDED_SIZE]])
   // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin %[[ID]], %[[ALLOC]]
   %1 = async.coro.begin %0
   return
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index 752e9e961bcd1..f2f83d7425f39 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -446,6 +446,15 @@ llvm.func @coro_size() {
   llvm.return
 }
 
+// CHECK-LABEL: @coro_align
+llvm.func @coro_align() {
+  // CHECK: call i64 @llvm.coro.align.i64
+  %0 = llvm.intr.coro.align : i64
+  // CHECK: call i32 @llvm.coro.align.i32
+  %1 = llvm.intr.coro.align : i32
+  llvm.return
+}
+
 // CHECK-LABEL: @coro_save
 llvm.func @coro_save(%arg0: !llvm.ptr) {
   // CHECK: call token @llvm.coro.save

From a83e9266b96c4e5f92611af6f4d95fef6aeef12a Mon Sep 17 00:00:00 2001
From: Adrian Kuegel 
Date: Tue, 25 Jan 2022 12:33:42 +0100
Subject: [PATCH 532/946] [mlir][Bazel] Update BUILD.bazel file

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 03e54e2726f19..dff9389cc766b 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -3161,6 +3161,7 @@ cc_library(
         ":Async",
         ":DLTIDialect",
         ":AffineUtils",
+        ":ExecutionEngineUtils",
         ":GPUDialect",
         ":GPUPassIncGen",
         ":MemRefDialect",
@@ -8184,8 +8185,8 @@ cc_library(
     hdrs = glob(["include/mlir/Tools/PDLL/AST/*.h"]),
     includes = ["include"],
     deps = [
+        ":Support",
         "//llvm:Support",
-        "//mlir:Support",
     ],
 )
 

From caff8591eff211c41d8de8505f89754d09ca6fa7 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 12:36:04 +0100
Subject: [PATCH 533/946] [OpenMP] Simplify pointer comparison

Rather than checking ptrdiff(a, b) != 0, directly check a != b.
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp        |  3 +--
 clang/test/OpenMP/declare_mapper_codegen.cpp | 24 ++++----------------
 2 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c8c08060e729e..233656b90095b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10217,8 +10217,7 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
   llvm::Value *Cond;
   if (IsInit) {
     // base != begin?
-    llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
-        MapperCGF.Builder.CreatePtrDiff(Base, Begin));
+    llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
     // IsPtrAndObj?
     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
         MapType,
diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp
index 053aa9d2cc6df..e114a1315c79f 100644
--- a/clang/test/OpenMP/declare_mapper_codegen.cpp
+++ b/clang/test/OpenMP/declare_mapper_codegen.cpp
@@ -102,11 +102,7 @@ class C {
 // CK0-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
 // CK0-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK0-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
-// CK0-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK0-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK0-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK0-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK0-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK0-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK0-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK0-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK0-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]
@@ -662,11 +658,7 @@ class C {
 // CK1-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK1-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
 // CK1-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
-// CK1-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK1-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK1-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK1-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK1-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK1-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK1-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK1-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK1-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]
@@ -789,11 +781,7 @@ class C {
 // CK2-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK2-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
 // CK2-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
-// CK2-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK2-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK2-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK2-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK2-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK2-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK2-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK2-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK2-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]
@@ -999,11 +987,7 @@ class C {
 // CK4-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK4-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
 // CK4-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
-// CK4-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK4-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK4-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK4-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK4-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK4-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK4-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK4-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK4-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]

From 91a0b464a853821734db8b1c521df03f8e2e56e7 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt 
Date: Tue, 25 Jan 2022 11:40:31 +0000
Subject: [PATCH 534/946] [OpenCL] Make read_write images optional for
 -fdeclare-opencl-builtins

Ensure any use of a `read_write` image is guarded behind the
`__opencl_c_read_write_images` feature macro.

Differential Revision: https://reviews.llvm.org/D117899
---
 clang/lib/Headers/opencl-c-base.h             |   1 +
 clang/lib/Sema/OpenCLBuiltins.td              | 164 +++++++++++-------
 .../SemaOpenCL/fdeclare-opencl-builtins.cl    |   1 +
 3 files changed, 108 insertions(+), 58 deletions(-)

diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index 7485386c82346..4f63e7bbdcbba 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -68,6 +68,7 @@
 // For the SPIR and SPIR-V target all features are supported.
 #if defined(__SPIR__) || defined(__SPIRV__)
 #define __opencl_c_atomic_scope_all_devices 1
+#define __opencl_c_read_write_images 1
 #endif // defined(__SPIR__)
 #endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300)
 
diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
index 38debc5aa9fc5..df2f206041c10 100644
--- a/clang/lib/Sema/OpenCLBuiltins.td
+++ b/clang/lib/Sema/OpenCLBuiltins.td
@@ -80,11 +80,14 @@ def FuncExtKhrLocalInt32ExtendedAtomics  : FunctionExtension<"cl_khr_local_int32
 def FuncExtKhrInt64BaseAtomics           : FunctionExtension<"cl_khr_int64_base_atomics">;
 def FuncExtKhrInt64ExtendedAtomics       : FunctionExtension<"cl_khr_int64_extended_atomics">;
 def FuncExtKhrMipmapImage                : FunctionExtension<"cl_khr_mipmap_image">;
+def FuncExtKhrMipmapImageReadWrite       : FunctionExtension<"cl_khr_mipmap_image __opencl_c_read_write_images">;
 def FuncExtKhrMipmapImageWrites          : FunctionExtension<"cl_khr_mipmap_image_writes">;
 def FuncExtKhrGlMsaaSharing              : FunctionExtension<"cl_khr_gl_msaa_sharing">;
+def FuncExtKhrGlMsaaSharingReadWrite     : FunctionExtension<"cl_khr_gl_msaa_sharing __opencl_c_read_write_images">;
 
 def FuncExtOpenCLCPipes                  : FunctionExtension<"__opencl_c_pipes">;
 def FuncExtOpenCLCWGCollectiveFunctions  : FunctionExtension<"__opencl_c_work_group_collective_functions">;
+def FuncExtOpenCLCReadWriteImages        : FunctionExtension<"__opencl_c_read_write_images">;
 def FuncExtFloatAtomicsFp16GlobalLoadStore  : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_load_store">;
 def FuncExtFloatAtomicsFp16LocalLoadStore   : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_local_atomic_load_store">;
 def FuncExtFloatAtomicsFp16GenericLoadStore : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_load_store __opencl_c_ext_fp16_local_atomic_load_store">;
@@ -1390,30 +1393,35 @@ foreach coordTy = [Int, Float] in {
 }
 
 // --- Table 23: Sampler-less Read Functions ---
+multiclass ImageReadSamplerless {
+  foreach imgTy = [Image2d, Image1dArray] in {
+    def : Builtin<"read_imagef", [VectorType, ImageType, VectorType], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType, ImageType, VectorType], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType, ImageType, VectorType], Attr.Pure>;
+  }
+  foreach imgTy = [Image3d, Image2dArray] in {
+    def : Builtin<"read_imagef", [VectorType, ImageType, VectorType], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType, ImageType, VectorType], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType, ImageType, VectorType], Attr.Pure>;
+  }
+  foreach imgTy = [Image1d, Image1dBuffer] in {
+    def : Builtin<"read_imagef", [VectorType, ImageType, Int], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType, ImageType, Int], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType, ImageType, Int], Attr.Pure>;
+  }
+  def : Builtin<"read_imagef", [Float, ImageType, VectorType], Attr.Pure>;
+  def : Builtin<"read_imagef", [Float, ImageType, VectorType], Attr.Pure>;
+}
+
 let MinVersion = CL12 in {
-  foreach aQual = ["RO", "RW"] in {
-    foreach imgTy = [Image2d, Image1dArray] in {
-      def : Builtin<"read_imagef", [VectorType, ImageType, VectorType], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType, ImageType, VectorType], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType, ImageType, VectorType], Attr.Pure>;
-    }
-    foreach imgTy = [Image3d, Image2dArray] in {
-      def : Builtin<"read_imagef", [VectorType, ImageType, VectorType], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType, ImageType, VectorType], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType, ImageType, VectorType], Attr.Pure>;
-    }
-    foreach imgTy = [Image1d, Image1dBuffer] in {
-      def : Builtin<"read_imagef", [VectorType, ImageType, Int], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType, ImageType, Int], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType, ImageType, Int], Attr.Pure>;
-    }
-    def : Builtin<"read_imagef", [Float, ImageType, VectorType], Attr.Pure>;
-    def : Builtin<"read_imagef", [Float, ImageType, VectorType], Attr.Pure>;
+  defm : ImageReadSamplerless<"RO">;
+  let Extension = FuncExtOpenCLCReadWriteImages in {
+    defm : ImageReadSamplerless<"RW">;
   }
 }
 
 // --- Table 24: Image Write Functions ---
-foreach aQual = ["WO", "RW"] in {
+multiclass ImageWrite {
   foreach imgTy = [Image2d] in {
     def : Builtin<"write_imagef", [Void, ImageType, VectorType, VectorType]>;
     def : Builtin<"write_imagei", [Void, ImageType, VectorType, VectorType]>;
@@ -1443,8 +1451,13 @@ foreach aQual = ["WO", "RW"] in {
   def : Builtin<"write_imagef", [Void, ImageType, VectorType, Float]>;
 }
 
+defm : ImageWrite<"WO">;
+let Extension = FuncExtOpenCLCReadWriteImages in {
+  defm : ImageWrite<"RW">;
+}
+
 // --- Table 25: Image Query Functions ---
-foreach aQual = ["RO", "WO", "RW"] in {
+multiclass ImageQuery {
   foreach imgTy = [Image1d, Image1dBuffer, Image2d, Image3d,
                    Image1dArray, Image2dArray, Image2dDepth,
                    Image2dArrayDepth] in {
@@ -1468,6 +1481,12 @@ foreach aQual = ["RO", "WO", "RW"] in {
   }
 }
 
+defm : ImageQuery<"RO">;
+defm : ImageQuery<"WO">;
+let Extension = FuncExtOpenCLCReadWriteImages in {
+  defm : ImageQuery<"RW">;
+}
+
 // OpenCL extension v2.0 s5.1.9: Built-in Image Read Functions
 // --- Table 8 ---
 foreach aQual = ["RO"] in {
@@ -1488,7 +1507,7 @@ foreach aQual = ["RO"] in {
 // OpenCL extension v2.0 s5.1.10: Built-in Image Sampler-less Read Functions
 // --- Table 9 ---
 let MinVersion = CL12 in {
-  foreach aQual = ["RO", "RW"] in {
+  multiclass ImageReadHalf {
     foreach name = ["read_imageh"] in {
       foreach imgTy = [Image2d, Image1dArray] in {
         def : Builtin, ImageType, VectorType], Attr.Pure>;
@@ -1501,10 +1520,14 @@ let MinVersion = CL12 in {
       }
     }
   }
+  defm : ImageReadHalf<"RO">;
+  let Extension = FuncExtOpenCLCReadWriteImages in {
+    defm : ImageReadHalf<"RW">;
+  }
 }
 // OpenCL extension v2.0 s5.1.11: Built-in Image Write Functions
 // --- Table 10 ---
-foreach aQual = ["WO", "RW"] in {
+multiclass ImageWriteHalf {
   foreach name = ["write_imageh"] in {
     def : Builtin, VectorType, VectorType]>;
     def : Builtin, VectorType, VectorType]>;
@@ -1515,6 +1538,12 @@ foreach aQual = ["WO", "RW"] in {
   }
 }
 
+defm : ImageWriteHalf<"WO">;
+let Extension = FuncExtOpenCLCReadWriteImages in {
+  defm : ImageWriteHalf<"RW">;
+}
+
+
 
 //--------------------------------------------------------------------
 // OpenCL v2.0 s6.13.15 - Work-group Functions
@@ -1688,14 +1717,24 @@ let Extension = FuncExtKhrMipmapImage in {
       }
     }
   }
-  // Added to section 6.13.14.5
-  foreach aQual = ["RO", "WO", "RW"] in {
-    foreach imgTy = [Image1d, Image2d, Image3d, Image1dArray, Image2dArray, Image2dDepth, Image2dArrayDepth] in {
-      def : Builtin<"get_image_num_mip_levels", [Int, ImageType]>;
-    }
+}
+
+// Added to section 6.13.14.5
+multiclass ImageQueryNumMipLevels {
+  foreach imgTy = [Image1d, Image2d, Image3d, Image1dArray, Image2dArray, Image2dDepth, Image2dArrayDepth] in {
+    def : Builtin<"get_image_num_mip_levels", [Int, ImageType]>;
   }
 }
 
+let Extension = FuncExtKhrMipmapImage in {
+  defm : ImageQueryNumMipLevels<"RO">;
+  defm : ImageQueryNumMipLevels<"WO">;
+}
+
+let Extension = FuncExtKhrMipmapImageReadWrite in {
+  defm : ImageQueryNumMipLevels<"RW">;
+}
+
 // Write functions are enabled using a separate extension.
 let Extension = FuncExtKhrMipmapImageWrites in {
   // Added to section 6.13.14.4.
@@ -1734,39 +1773,48 @@ let Extension = FuncExtKhrMipmapImageWrites in {
 
 //--------------------------------------------------------------------
 // OpenCL Extension v2.0 s18.3 - Creating OpenCL Memory Objects from OpenGL MSAA Textures
-let Extension = FuncExtKhrGlMsaaSharing in {
-  // --- Table 6.13.14.3 ---
-  foreach aQual = ["RO", "RW"] in {
-    foreach imgTy = [Image2dMsaa] in {
-      def : Builtin<"read_imagef", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
-    }
-    foreach imgTy = [Image2dArrayMsaa] in {
-      def : Builtin<"read_imagef", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
-    }
-    foreach name = ["read_imagef"] in {
-      def : Builtin, VectorType, Int], Attr.Pure>;
-      def : Builtin, VectorType, Int], Attr.Pure>;
-    }
-  }
-
-  // --- Table 6.13.14.5 ---
-  foreach aQual = ["RO", "WO", "RW"] in {
-    foreach imgTy = [Image2dMsaa, Image2dArrayMsaa, Image2dMsaaDepth, Image2dArrayMsaaDepth] in {
-      foreach name = ["get_image_width", "get_image_height",
-                      "get_image_channel_data_type", "get_image_channel_order",
-                      "get_image_num_samples"] in {
-        def : Builtin], Attr.Const>;
-      }
-      def : Builtin<"get_image_dim", [VectorType, ImageType], Attr.Const>;
-    }
-    foreach imgTy = [Image2dArrayMsaa, Image2dArrayMsaaDepth] in {
-      def : Builtin<"get_image_array_size", [Size, ImageType], Attr.Const>;
+// --- Table 6.13.14.3 ---
+multiclass ImageReadMsaa {
+  foreach imgTy = [Image2dMsaa] in {
+    def : Builtin<"read_imagef", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
+  }
+  foreach imgTy = [Image2dArrayMsaa] in {
+    def : Builtin<"read_imagef", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType, ImageType, VectorType, Int], Attr.Pure>;
+  }
+  foreach name = ["read_imagef"] in {
+    def : Builtin, VectorType, Int], Attr.Pure>;
+    def : Builtin, VectorType, Int], Attr.Pure>;
+  }
+}
+
+// --- Table 6.13.14.5 ---
+multiclass ImageQueryMsaa {
+  foreach imgTy = [Image2dMsaa, Image2dArrayMsaa, Image2dMsaaDepth, Image2dArrayMsaaDepth] in {
+    foreach name = ["get_image_width", "get_image_height",
+                    "get_image_channel_data_type", "get_image_channel_order",
+                    "get_image_num_samples"] in {
+      def : Builtin], Attr.Const>;
     }
+    def : Builtin<"get_image_dim", [VectorType, ImageType], Attr.Const>;
   }
+  foreach imgTy = [Image2dArrayMsaa, Image2dArrayMsaaDepth] in {
+    def : Builtin<"get_image_array_size", [Size, ImageType], Attr.Const>;
+  }
+}
+
+let Extension = FuncExtKhrGlMsaaSharing in {
+  defm : ImageReadMsaa<"RO">;
+  defm : ImageQueryMsaa<"RO">;
+  defm : ImageQueryMsaa<"WO">;
+}
+
+let Extension = FuncExtKhrGlMsaaSharingReadWrite in {
+  defm : ImageReadMsaa<"RW">;
+  defm : ImageQueryMsaa<"RW">;
 }
 
 //--------------------------------------------------------------------
diff --git a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
index 5da38c121e393..be6e53a07bdf3 100644
--- a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
+++ b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
@@ -74,6 +74,7 @@ typedef struct {int a;} ndrange_t;
 #define cl_khr_subgroup_ballot 1
 #define cl_khr_subgroup_non_uniform_arithmetic 1
 #define cl_khr_subgroup_clustered_reduce 1
+#define __opencl_c_read_write_images 1
 #endif
 #endif
 

From 30d4a7e2955356c69ae412bfe2de46b92a2202c1 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 12:23:28 +0100
Subject: [PATCH 535/946] [IRBuilder] Require explicit element type in
 CreatePtrDiff()

For opaque pointer compatibility, we cannot derive the element
type from the pointer type.
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp          | 12 +++++++-----
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp       |  5 +++--
 llvm/include/llvm/IR/IRBuilder.h               |  3 ++-
 llvm/lib/IR/Core.cpp                           |  4 +++-
 llvm/lib/IR/IRBuilder.cpp                      |  8 +++++---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp |  3 ++-
 6 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 233656b90095b..db1c3ca191ca2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -840,7 +840,8 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
   if (AsArraySection) {
-    Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
+    Size = CGF.Builder.CreatePtrDiff(ElemType,
+                                     OrigAddresses[N].second.getPointer(CGF),
                                      OrigAddresses[N].first.getPointer(CGF));
     Size = CGF.Builder.CreateNUWAdd(
         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
@@ -1006,7 +1007,8 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
                     OriginalBaseLValue);
     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
-        BaseLValue.getPointer(CGF), SharedAddr.getPointer());
+        SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
+        SharedAddr.getPointer());
     llvm::Value *PrivatePointer =
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
             PrivateAddr.getPointer(), SharedAddr.getType());
@@ -8119,7 +8121,7 @@ class MappableExprsHandler {
                           .getAddress(CGF);
                 }
                 Size = CGF.Builder.CreatePtrDiff(
-                    CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
+                    CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
                     CGF.EmitCastToVoidPtr(LB.getPointer()));
                 break;
               }
@@ -8140,7 +8142,7 @@ class MappableExprsHandler {
           CombinedInfo.BasePointers.push_back(BP.getPointer());
           CombinedInfo.Pointers.push_back(LB.getPointer());
           Size = CGF.Builder.CreatePtrDiff(
-              CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
+              CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
               CGF.EmitCastToVoidPtr(LB.getPointer()));
           CombinedInfo.Sizes.push_back(
               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
@@ -8966,7 +8968,7 @@ class MappableExprsHandler {
         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
-    llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
+    llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
                                                   /*isSigned=*/false);
     CombinedInfo.Sizes.push_back(Size);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 7c8e4e6b52a08..e09ea5e01b1a9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1798,8 +1798,9 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
       Ptr = Address(PhiSrc, Ptr.getAlignment());
       ElemPtr = Address(PhiDest, ElemPtr.getAlignment());
       llvm::Value *PtrDiff = Bld.CreatePtrDiff(
-          PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast(
-                                   Ptr.getPointer(), CGF.VoidPtrTy));
+          CGF.Int8Ty, PtrEnd.getPointer(),
+          Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr.getPointer(),
+                                                  CGF.VoidPtrTy));
       Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
                        ThenBB, ExitBB);
       CGF.EmitBlock(ThenBB);
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 32c978f02a4d2..53f517480ca1c 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -2413,7 +2413,8 @@ class IRBuilderBase {
   /// This is intended to implement C-style pointer subtraction. As such, the
   /// pointers must be appropriately aligned for their element types and
   /// pointing into the same object.
-  Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "");
+  Value *CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS,
+                       const Twine &Name = "");
 
   /// Create a launder.invariant.group intrinsic call. If Ptr type is
   /// different from pointer to i8, it's casted to pointer to i8 in the same
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 47ee4cbbf5829..988d2affd9654 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -4019,7 +4019,9 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
 
 LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
                               LLVMValueRef RHS, const char *Name) {
-  return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+  Value *L = unwrap(LHS);
+  Type *ElemTy = L->getType()->getPointerElementType();
+  return wrap(unwrap(B)->CreatePtrDiff(ElemTy, L, unwrap(RHS), Name));
 }
 
 LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b91885bcfac47..27528a69be219 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -998,15 +998,17 @@ Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False,
   return Insert(Sel, Name);
 }
 
-Value *IRBuilderBase::CreatePtrDiff(Value *LHS, Value *RHS,
+Value *IRBuilderBase::CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS,
                                     const Twine &Name) {
   assert(LHS->getType() == RHS->getType() &&
          "Pointer subtraction operand types must match!");
-  auto *ArgElemType = LHS->getType()->getPointerElementType();
+  assert(cast(LHS->getType())
+             ->isOpaqueOrPointeeTypeMatches(ElemTy) &&
+         "Pointer type must match element type");
   Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
   Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
   Value *Difference = CreateSub(LHS_int, RHS_int);
-  return CreateExactSDiv(Difference, ConstantExpr::getSizeOf(ArgElemType),
+  return CreateExactSDiv(Difference, ConstantExpr::getSizeOf(ElemTy),
                          Name);
 }
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 3122fcd41c8ac..123fb0cfd1cbb 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2516,7 +2516,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
       // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
       // Handle mismatched pointer types (goes away with typeless pointers?).
       V = B.CreatePointerCast(V, Dest->getType());
-      Value *PtrDiff = B.CreatePtrDiff(V, Dest);
+      Value *PtrDiff = B.CreatePtrDiff(
+          Dest->getType()->getPointerElementType(), V, Dest);
       return B.CreateIntCast(PtrDiff, CI->getType(), false);
     }
 

From d8962b4139a58ba6955cd4e15dc432c674a013c0 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 12:47:50 +0100
Subject: [PATCH 536/946] [llvm-c] Deprecate LLVMBuildPtrDiff()

In favor of LLVMBuildPtrDiff2(), which accepts an explicit element
type and is compatible with opaque pointers.
---
 llvm/include/llvm-c/Core.h | 9 +++++++--
 llvm/lib/IR/Core.cpp       | 9 ++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index ae2bcb8444b4b..ca3ca24487a51 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -4020,8 +4020,13 @@ LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef, LLVMValueRef Val,
                              const char *Name);
 LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
                                 const char *Name);
-LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
-                              LLVMValueRef RHS, const char *Name);
+LLVM_ATTRIBUTE_C_DEPRECATED(
+    LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
+                                  LLVMValueRef RHS, const char *Name),
+    "Use LLVMBuildPtrDiff2 instead to support opaque pointers");
+LLVMValueRef LLVMBuildPtrDiff2(LLVMBuilderRef, LLVMTypeRef ElemTy,
+                               LLVMValueRef LHS, LLVMValueRef RHS,
+                               const char *Name);
 LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering ordering,
                             LLVMBool singleThread, const char *Name);
 LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B, LLVMAtomicRMWBinOp op,
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 988d2affd9654..43df15e4d9328 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -4020,10 +4020,17 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
 LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
                               LLVMValueRef RHS, const char *Name) {
   Value *L = unwrap(LHS);
-  Type *ElemTy = L->getType()->getPointerElementType();
+  Type *ElemTy = L->getType()->getNonOpaquePointerElementType();
   return wrap(unwrap(B)->CreatePtrDiff(ElemTy, L, unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildPtrDiff2(LLVMBuilderRef B, LLVMTypeRef ElemTy,
+                               LLVMValueRef LHS, LLVMValueRef RHS,
+                               const char *Name) {
+  return wrap(unwrap(B)->CreatePtrDiff(unwrap(ElemTy), unwrap(LHS),
+                                       unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
                                LLVMValueRef PTR, LLVMValueRef Val,
                                LLVMAtomicOrdering ordering,

From 157f9b68a3722101c3c2e40926dadc4aeb3e4a39 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Tue, 25 Jan 2022 11:41:15 +0000
Subject: [PATCH 537/946] [X86] combineVectorSignBitsTruncation - fix
 indentation. NFC.

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c68774a838a25..b25a170a683ab 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48989,7 +48989,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
     // originally concatenated from subvectors.
     SmallVector ConcatOps;
     if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps))
-    return SDValue();
+      return SDValue();
   }
 
   unsigned NumPackedSignBits = std::min(SVT.getSizeInBits(), 16);

From 15e2be291f7fc8a694f796dd494e4f14d7f5a982 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Tue, 25 Jan 2022 11:54:11 +0000
Subject: [PATCH 538/946] [DAG] visitMULHS/MULHU/AND - remove some redundant
 LHS constant checks

Now that we constant fold and canonicalize constants to the RHS, we don't need to check both LHS and RHS for specific constants
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c3d2ed2dcf85c..082e2508aa4ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4470,15 +4470,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
       return FoldedVOp;
 
     // fold (mulhs x, 0) -> 0
-    // do not return N0/N1, because undef node may exist.
-    if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
-        ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+    // do not return N1, because undef node may exist.
+    if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
       return DAG.getConstant(0, DL, VT);
   }
 
   // fold (mulhs x, 0) -> 0
   if (isNullConstant(N1))
     return N1;
+
   // fold (mulhs x, 1) -> (sra x, size(x)-1)
   if (isOneConstant(N1))
     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
@@ -4530,18 +4530,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
       return FoldedVOp;
 
     // fold (mulhu x, 0) -> 0
-    // do not return N0/N1, because undef node may exist.
-    if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
-        ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+    // do not return N1, because undef node may exist.
+    if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
       return DAG.getConstant(0, DL, VT);
   }
 
   // fold (mulhu x, 0) -> 0
   if (isNullConstant(N1))
     return N1;
+
   // fold (mulhu x, 1) -> 0
   if (isOneConstant(N1))
     return DAG.getConstant(0, DL, N0.getValueType());
+
   // fold (mulhu x, undef) -> 0
   if (N0.isUndef() || N1.isUndef())
     return DAG.getConstant(0, DL, VT);
@@ -5815,18 +5816,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return FoldedVOp;
 
     // fold (and x, 0) -> 0, vector edition
-    if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
-      // do not return N0, because undef node may exist in N0
-      return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
-                             SDLoc(N), N0.getValueType());
     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
                              SDLoc(N), N1.getValueType());
 
     // fold (and x, -1) -> x, vector edition
-    if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
-      return N1;
     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
       return N0;
 

From 3e2ae92d3f062f47b7cc8103e9a6c15b815d9018 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 12:55:02 +0100
Subject: [PATCH 539/946] [SCEV] Remove an unnecessary GEP type check

The code already checked that the addrec step size and type alloc
size are the same. The actual pointer element type is irrelevant
here.
---
 llvm/lib/Analysis/ScalarEvolution.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4f2123b4c5fab..07aac1523b478 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7596,10 +7596,6 @@ const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
       auto *ArrSize = dyn_cast(AllocateInst->getArraySize());
       if (!Ty || !ArrSize || !ArrSize->isOne())
         continue;
-      // Also make sure step was increased the same with sizeof allocated
-      // element type.
-      if (Ty->getElementType() != GEP->getType()->getPointerElementType())
-        continue;
 
       // FIXME: Since gep indices are silently zext to the indexing type,
       // we will have a narrow gep index which wraps around rather than

From 4f4d071c909ef142c858a7e938aca9a99173cabe Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 13:07:36 +0100
Subject: [PATCH 540/946] [ObjCArcOpts] Regenerate test checks (NFC)

---
 llvm/test/Transforms/ObjCARC/weak.ll | 43 +++++++++++++++-------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/llvm/test/Transforms/ObjCARC/weak.ll b/llvm/test/Transforms/ObjCARC/weak.ll
index caaeba7280e80..2009addbbdc56 100644
--- a/llvm/test/Transforms/ObjCARC/weak.ll
+++ b/llvm/test/Transforms/ObjCARC/weak.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -objc-arc -S < %s | FileCheck %s
 
 declare i8* @llvm.objc.initWeak(i8**, i8*)
@@ -10,27 +11,29 @@ declare void @llvm.objc.copyWeak(i8**, i8**)
 
 ; If the pointer-to-weak-pointer is null, it's undefined behavior.
 
-; CHECK-LABEL: define void @test0(
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: ret void
 define void @test0(i8* %p, i8** %q) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   call i8* @llvm.objc.storeWeak(i8** null, i8* %p)
   call i8* @llvm.objc.storeWeak(i8** undef, i8* %p)

From 78e1f70220a57bc7a7554d4240c5f15810959d7c Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 13:05:14 +0100
Subject: [PATCH 541/946] [ObjCARCOpts] Use standard non-terminator unreachable
 pattern

This is what CreateNonTerminatorUnreachable() in InstCombine uses.
Specific choice here doesn't really matter, but we should pick
one that is pointer element type independent.
---
 llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 10 +++---
 llvm/test/Transforms/ObjCARC/weak.ll        | 36 ++++++++++-----------
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index caad91867112c..b6dc97f1e43f8 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -979,9 +979,8 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
     CallInst *CI = cast(Inst);
     if (IsNullOrUndef(CI->getArgOperand(0))) {
       Changed = true;
-      Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
-                    Constant::getNullValue(Ty), CI);
+      new StoreInst(ConstantInt::getTrue(CI->getContext()),
+                    UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
       Value *NewValue = UndefValue::get(CI->getType());
       LLVM_DEBUG(
           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
@@ -999,9 +998,8 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
     if (IsNullOrUndef(CI->getArgOperand(0)) ||
         IsNullOrUndef(CI->getArgOperand(1))) {
       Changed = true;
-      Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
-                    Constant::getNullValue(Ty), CI);
+      new StoreInst(ConstantInt::getTrue(CI->getContext()),
+                    UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
 
       Value *NewValue = UndefValue::get(CI->getType());
       LLVM_DEBUG(
diff --git a/llvm/test/Transforms/ObjCARC/weak.ll b/llvm/test/Transforms/ObjCARC/weak.ll
index 2009addbbdc56..00a4e9f681e7d 100644
--- a/llvm/test/Transforms/ObjCARC/weak.ll
+++ b/llvm/test/Transforms/ObjCARC/weak.ll
@@ -14,24 +14,24 @@ declare void @llvm.objc.copyWeak(i8**, i8**)
 define void @test0(i8* %p, i8** %q) {
 ; CHECK-LABEL: @test0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:

From d95cf1f6cf4242ae9f045b8032b9e4c08d41a12f Mon Sep 17 00:00:00 2001
From: Paul Walker 
Date: Fri, 10 Dec 2021 18:05:38 +0000
Subject: [PATCH 542/946] [SVE] Enable ISD::ABDS/U ISel for scalable vectors.

NOTE: This patch also includes tests that highlight those cases
where the existing DAG combine doesn't yet work well for SVE.

Differential Revision: https://reviews.llvm.org/D117873
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |   8 +
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   2 +
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   4 +
 llvm/test/CodeGen/AArch64/sve-abd.ll          | 267 ++++++++++++++++++
 4 files changed, 281 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-abd.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fd35ab2049e92..fc6e0b865681c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1206,6 +1206,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SRL, VT, Custom);
       setOperationAction(ISD::SRA, VT, Custom);
       setOperationAction(ISD::ABS, VT, Custom);
+      setOperationAction(ISD::ABDS, VT, Custom);
+      setOperationAction(ISD::ABDU, VT, Custom);
       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
@@ -1994,6 +1996,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::CSINC)
     MAKE_CASE(AArch64ISD::THREAD_POINTER)
     MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
+    MAKE_CASE(AArch64ISD::ABDS_PRED)
+    MAKE_CASE(AArch64ISD::ABDU_PRED)
     MAKE_CASE(AArch64ISD::ADD_PRED)
     MAKE_CASE(AArch64ISD::MUL_PRED)
     MAKE_CASE(AArch64ISD::MULHS_PRED)
@@ -5196,6 +5200,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerFixedLengthVectorSelectToSVE(Op, DAG);
   case ISD::ABS:
     return LowerABS(Op, DAG);
+  case ISD::ABDS:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+  case ISD::ABDU:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
   case ISD::BITREVERSE:
     return LowerBitreverse(Op, DAG);
   case ISD::BSWAP:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9841a4c048632..df19a4729bb49 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -77,6 +77,8 @@ enum NodeType : unsigned {
   SBC, // adc, sbc instructions
 
   // Predicated instructions where inactive lanes produce undefined results.
+  ABDS_PRED,
+  ABDU_PRED,
   ADD_PRED,
   FADD_PRED,
   FDIV_PRED,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 63f8f58e76c53..63cd8f476272a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -189,11 +189,13 @@ def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
 def AArch64lsl_p  : SDNode<"AArch64ISD::SHL_PRED",  SDT_AArch64Arith>;
 def AArch64lsr_p  : SDNode<"AArch64ISD::SRL_PRED",  SDT_AArch64Arith>;
 def AArch64mul_p  : SDNode<"AArch64ISD::MUL_PRED",  SDT_AArch64Arith>;
+def AArch64sabd_p : SDNode<"AArch64ISD::ABDS_PRED", SDT_AArch64Arith>;
 def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
 def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
 def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
 def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
 def AArch64sub_p  : SDNode<"AArch64ISD::SUB_PRED",  SDT_AArch64Arith>;
+def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>;
 def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
 def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
 def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
@@ -418,6 +420,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
   defm UMAX_ZPZZ : sve_int_bin_pred_bhsd;
   defm SMIN_ZPZZ : sve_int_bin_pred_bhsd;
   defm UMIN_ZPZZ : sve_int_bin_pred_bhsd;
+  defm SABD_ZPZZ : sve_int_bin_pred_bhsd;
+  defm UABD_ZPZZ : sve_int_bin_pred_bhsd;
 
   defm FRECPE_ZZ  : sve_fp_2op_u_zd<0b110, "frecpe",  AArch64frecpe>;
   defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>;
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
new file mode 100644
index 0000000000000..affd6d5b15f79
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -0,0 +1,267 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; SABD
+;
+
+define  @sabd_b( %a,  %b) #0 {
+; CHECK-LABEL: sabd_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    sabd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv16i16( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @sabd_b_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: sabd_b_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    ptrue p2.b
+; CHECK-NEXT:    sub z0.b, z0.b, z1.b
+; CHECK-NEXT:    abs z0.b, p2/m, z0.b
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv16i8( %sub, i1 true)
+  ret  %abs
+}
+
+define  @sabd_h( %a,  %b) #0 {
+; CHECK-LABEL: sabd_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv8i32( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @sabd_h_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: sabd_h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    abs z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv8i16( %sub, i1 true)
+  ret  %abs
+}
+
+define  @sabd_s( %a,  %b) #0 {
+; CHECK-LABEL: sabd_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv4i64( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @sabd_s_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: sabd_s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv4i32( %sub, i1 true)
+  ret  %abs
+}
+
+define  @sabd_d( %a,  %b) #0 {
+; CHECK-LABEL: sabd_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv2i128( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @sabd_d_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: sabd_d_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    abs z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %a.sext = sext  %a to 
+  %b.sext = sext  %b to 
+  %sub = sub  %a.sext, %b.sext
+  %abs = call  @llvm.abs.nxv2i64( %sub, i1 true)
+  ret  %abs
+}
+
+;
+; UABD
+;
+
+define  @uabd_b( %a,  %b) #0 {
+; CHECK-LABEL: uabd_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    uabd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv16i16( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @uabd_b_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: uabd_b_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
+; CHECK-NEXT:    mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    ptrue p2.b
+; CHECK-NEXT:    add z0.b, z0.b, z1.b
+; CHECK-NEXT:    abs z0.b, p2/m, z0.b
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv16i8( %sub, i1 true)
+  ret  %abs
+}
+
+define  @uabd_h( %a,  %b) #0 {
+; CHECK-LABEL: uabd_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    uabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv8i32( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @uabd_h_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: uabd_h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    abs z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv8i16( %sub, i1 true)
+  ret  %abs
+}
+
+define  @uabd_s( %a,  %b) #0 {
+; CHECK-LABEL: uabd_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    uabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv4i64( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @uabd_s_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: uabd_s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    and z1.s, z1.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv4i32( %sub, i1 true)
+  ret  %abs
+}
+
+define  @uabd_d( %a,  %b) #0 {
+; CHECK-LABEL: uabd_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv2i128( %sub, i1 true)
+  %trunc = trunc  %abs to 
+  ret  %trunc
+}
+
+define  @uabd_d_promoted_ops( %a,  %b) #0 {
+; CHECK-LABEL: uabd_d_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    abs z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %a.zext = zext  %a to 
+  %b.zext = zext  %b to 
+  %sub = sub  %a.zext, %b.zext
+  %abs = call  @llvm.abs.nxv2i64( %sub, i1 true)
+  ret  %abs
+}
+
+declare  @llvm.abs.nxv16i8(, i1)
+
+declare  @llvm.abs.nxv8i16(, i1)
+declare  @llvm.abs.nxv16i16(, i1)
+
+declare  @llvm.abs.nxv4i32(, i1)
+declare  @llvm.abs.nxv8i32(, i1)
+
+declare  @llvm.abs.nxv2i64(, i1)
+declare  @llvm.abs.nxv4i64(, i1)
+
+declare  @llvm.abs.nxv2i128(, i1)
+
+attributes #0 = { "target-features"="+neon,+sve" }

From 153b1e3cba1e0469bfa6c72208d91708c219e6a6 Mon Sep 17 00:00:00 2001
From: Danila Malyutin 
Date: Tue, 18 Jan 2022 20:17:22 +0300
Subject: [PATCH 543/946] [AArch64] Add patterns for relaxed atomic ld/st into
 fp registers

Adds patterns to match integer loads/stores bitcasted to fp values

Fixes https://github.com/llvm/llvm-project/issues/52927

Differential Revision: https://reviews.llvm.org/D117573
---
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 60 ++++++++++++
 .../CodeGen/AArch64/relaxed-fp-atomics.ll     | 94 +++++++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 84573dac7e41f..b220929514f9d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -102,6 +102,34 @@ def : Pat<(relaxed_load
                (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
           (LDURXi GPR64sp:$Rn, simm9:$offset)>;
 
+// FP 32-bit loads
+def : Pat<(f32 (bitconvert (i32 (relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend32:$extend))))),
+          (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend32:$extend))))),
+          (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load (am_indexed32 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURSi GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit loads
+def : Pat<(f64 (bitconvert (i64 (relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend64:$extend))))),
+          (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend64:$extend))))),
+          (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load (am_indexed64 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Atomic stores
 //===----------------------------------
@@ -196,6 +224,38 @@ def : Pat<(relaxed_store
                (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
           (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
 
+// FP 32-bit stores
+def : Pat<(relaxed_store (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_store (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_store
+              (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit stores
+def : Pat<(relaxed_store (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_store (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_store
+              (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Low-level exclusive operations
 //===----------------------------------
diff --git a/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
new file mode 100644
index 0000000000000..1f8ba6da24646
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
@@ -0,0 +1,94 @@
+; PR52927: Relaxed atomics can load to/store from fp regs directly
+; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+
+define float @atomic_load_relaxed_f32(float* %p, i32 %off32, i64 %off64) #0 {
+; CHECK-LABEL: atomic_load_relaxed_f32:
+  %ptr_unsigned = getelementptr float, float* %p, i32 4095
+  %val_unsigned = load atomic float, float* %ptr_unsigned monotonic, align 4
+; CHECK: ldr {{s[0-9]+}}, [x0, #16380]
+
+  %ptr_regoff = getelementptr float, float* %p, i32 %off32
+  %val_regoff = load atomic float, float* %ptr_regoff unordered, align 4
+  %tot1 = fadd float %val_unsigned, %val_regoff
+; CHECK: ldr {{s[0-9]+}}, [x0, w1, sxtw #2]
+
+  %ptr_regoff64 = getelementptr float, float* %p, i64 %off64
+  %val_regoff64 = load atomic float, float* %ptr_regoff64 monotonic, align 4
+  %tot2 = fadd float %tot1, %val_regoff64
+; CHECK: ldr {{s[0-9]+}}, [x0, x2, lsl #2]
+
+  %ptr_unscaled = getelementptr float, float* %p, i32 -64
+  %val_unscaled = load atomic float, float* %ptr_unscaled unordered, align 4
+  %tot3 = fadd float %tot2, %val_unscaled
+; CHECK: ldur {{s[0-9]+}}, [x0, #-256]
+
+  ret float %tot3
+}
+
+define double @atomic_load_relaxed_f64(double* %p, i32 %off32, i64 %off64) #0 {
+; CHECK-LABEL: atomic_load_relaxed_f64:
+  %ptr_unsigned = getelementptr double, double* %p, i32 4095
+  %val_unsigned = load atomic double, double* %ptr_unsigned monotonic, align 8
+; CHECK: ldr {{d[0-9]+}}, [x0, #32760]
+
+  %ptr_regoff = getelementptr double, double* %p, i32 %off32
+  %val_regoff = load atomic double, double* %ptr_regoff unordered, align 8
+  %tot1 = fadd double %val_unsigned, %val_regoff
+; CHECK: ldr {{d[0-9]+}}, [x0, w1, sxtw #3]
+
+  %ptr_regoff64 = getelementptr double, double* %p, i64 %off64
+  %val_regoff64 = load atomic double, double* %ptr_regoff64 monotonic, align 8
+  %tot2 = fadd double %tot1, %val_regoff64
+; CHECK: ldr {{d[0-9]+}}, [x0, x2, lsl #3]
+
+  %ptr_unscaled = getelementptr double, double* %p, i32 -32
+  %val_unscaled = load atomic double, double* %ptr_unscaled unordered, align 8
+  %tot3 = fadd double %tot2, %val_unscaled
+; CHECK: ldur {{d[0-9]+}}, [x0, #-256]
+
+  ret double %tot3
+}
+
+define void @atomic_store_relaxed_f32(float* %p, i32 %off32, i64 %off64, float %val) #0 {
+; CHECK-LABEL: atomic_store_relaxed_f32:
+  %ptr_unsigned = getelementptr float, float* %p, i32 4095
+  store atomic float %val, float* %ptr_unsigned monotonic, align 4
+; CHECK: str {{s[0-9]+}}, [x0, #16380]
+
+  %ptr_regoff = getelementptr float, float* %p, i32 %off32
+  store atomic float %val, float* %ptr_regoff unordered, align 4
+; CHECK: str {{s[0-9]+}}, [x0, w1, sxtw #2]
+
+  %ptr_regoff64 = getelementptr float, float* %p, i64 %off64
+  store atomic float %val, float* %ptr_regoff64 monotonic, align 4
+; CHECK: str {{s[0-9]+}}, [x0, x2, lsl #2]
+
+  %ptr_unscaled = getelementptr float, float* %p, i32 -64
+  store atomic float %val, float* %ptr_unscaled unordered, align 4
+; CHECK: stur {{s[0-9]+}}, [x0, #-256]
+
+  ret void
+}
+
+define void @atomic_store_relaxed_f64(double* %p, i32 %off32, i64 %off64, double %val) #0 {
+; CHECK-LABEL: atomic_store_relaxed_f64:
+  %ptr_unsigned = getelementptr double, double* %p, i32 4095
+  store atomic double %val, double* %ptr_unsigned monotonic, align 8
+; CHECK: str {{d[0-9]+}}, [x0, #32760]
+
+  %ptr_regoff = getelementptr double, double* %p, i32 %off32
+  store atomic double %val, double* %ptr_regoff unordered, align 8
+; CHECK: str {{d[0-9]+}}, [x0, w1, sxtw #3]
+
+  %ptr_regoff64 = getelementptr double, double* %p, i64 %off64
+  store atomic double %val, double* %ptr_regoff64 unordered, align 8
+; CHECK: str {{d[0-9]+}}, [x0, x2, lsl #3]
+
+  %ptr_unscaled = getelementptr double, double* %p, i32 -32
+  store atomic double %val, double* %ptr_unscaled monotonic, align 8
+; CHECK: stur {{d[0-9]+}}, [x0, #-256]
+
+  ret void
+}
+
+attributes #0 = { nounwind }

From fc15ab7b1b26ebe8ac8435f0da23cc91b735ef4e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Tue, 25 Jan 2022 12:35:00 +0000
Subject: [PATCH 544/946] [X86] Add folded load tests to PR46809 tests

---
 llvm/test/CodeGen/X86/select-lea.ll | 198 +++++++++++++++++++++++++++-
 1 file changed, 192 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/X86/select-lea.ll b/llvm/test/CodeGen/X86/select-lea.ll
index 4b50fdc2ca4e0..df4a760eb339e 100644
--- a/llvm/test/CodeGen/X86/select-lea.ll
+++ b/llvm/test/CodeGen/X86/select-lea.ll
@@ -46,6 +46,52 @@ define i32 @sadd_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @sadd_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: sadd_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    cmovnol %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: sadd_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    leal (%eax,%edx), %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    addl %edx, %eax
+; CMOV-NEXT:    cmovol %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: sadd_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
+; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    jno .LBB1_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB1_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 define i32 @uadd_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: uadd_add_imm:
 ; X64:       # %bb.0:
@@ -73,11 +119,11 @@ define i32 @uadd_add_imm(i32 %x, i32 %y) {
 ; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
 ; NOCMOV-NEXT:    addl %edx, %eax
-; NOCMOV-NEXT:    jae .LBB1_2
+; NOCMOV-NEXT:    jae .LBB2_2
 ; NOCMOV-NEXT:  # %bb.1:
 ; NOCMOV-NEXT:    addl $100, %ecx
 ; NOCMOV-NEXT:    movl %ecx, %eax
-; NOCMOV-NEXT:  .LBB1_2:
+; NOCMOV-NEXT:  .LBB2_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
   %v1 = extractvalue { i32, i1 } %o, 1
@@ -87,6 +133,52 @@ define i32 @uadd_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @uadd_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: uadd_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: uadd_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    leal (%eax,%edx), %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    addl %edx, %eax
+; CMOV-NEXT:    cmovbl %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: uadd_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
+; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    jae .LBB3_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB3_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 define i32 @ssub_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: ssub_add_imm:
 ; X64:       # %bb.0:
@@ -115,11 +207,11 @@ define i32 @ssub_add_imm(i32 %x, i32 %y) {
 ; NOCMOV-NEXT:    movl %eax, %ecx
 ; NOCMOV-NEXT:    subl %edx, %ecx
 ; NOCMOV-NEXT:    subl %edx, %eax
-; NOCMOV-NEXT:    jno .LBB2_2
+; NOCMOV-NEXT:    jno .LBB4_2
 ; NOCMOV-NEXT:  # %bb.1:
 ; NOCMOV-NEXT:    addl $100, %ecx
 ; NOCMOV-NEXT:    movl %ecx, %eax
-; NOCMOV-NEXT:  .LBB2_2:
+; NOCMOV-NEXT:  .LBB4_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
   %v1 = extractvalue { i32, i1 } %o, 1
@@ -129,6 +221,53 @@ define i32 @ssub_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @ssub_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: ssub_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovnol %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: ssub_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    movl %eax, %esi
+; CMOV-NEXT:    subl %edx, %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    subl %edx, %eax
+; CMOV-NEXT:    cmovol %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: ssub_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    movl %eax, %ecx
+; NOCMOV-NEXT:    subl %edx, %ecx
+; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    jno .LBB5_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB5_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 define i32 @usub_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: usub_add_imm:
 ; X64:       # %bb.0:
@@ -157,11 +296,11 @@ define i32 @usub_add_imm(i32 %x, i32 %y) {
 ; NOCMOV-NEXT:    movl %eax, %ecx
 ; NOCMOV-NEXT:    subl %edx, %ecx
 ; NOCMOV-NEXT:    subl %edx, %eax
-; NOCMOV-NEXT:    jae .LBB3_2
+; NOCMOV-NEXT:    jae .LBB6_2
 ; NOCMOV-NEXT:  # %bb.1:
 ; NOCMOV-NEXT:    addl $100, %ecx
 ; NOCMOV-NEXT:    movl %ecx, %eax
-; NOCMOV-NEXT:  .LBB3_2:
+; NOCMOV-NEXT:  .LBB6_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
   %v1 = extractvalue { i32, i1 } %o, 1
@@ -171,6 +310,53 @@ define i32 @usub_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @usub_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: usub_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: usub_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    movl %eax, %esi
+; CMOV-NEXT:    subl %edx, %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    subl %edx, %eax
+; CMOV-NEXT:    cmovbl %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: usub_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    movl %eax, %ecx
+; NOCMOV-NEXT:    subl %edx, %ecx
+; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    jae .LBB7_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB7_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)

From 345d85e1240801999ed321eb13a39048a9aa1a06 Mon Sep 17 00:00:00 2001
From: Pavel Labath 
Date: Tue, 25 Jan 2022 13:41:09 +0100
Subject: [PATCH 545/946] [lldb] Fix mac build for D117490

This is exactly that kind of a API misuse that the patch was meant to
detect.
---
 .../Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp     | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
index abb3b30e175a4..75271ca6abf49 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
@@ -484,10 +484,8 @@ PlatformDarwinKernel::GetKernelsAndKextsInDirectoryHelper(
   ConstString file_spec_extension = file_spec.GetFileNameExtension();
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM));
-  Log *log_verbose(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM | LLDB_LOG_OPTION_VERBOSE));
 
-  LLDB_LOGF(log_verbose, "PlatformDarwinKernel examining '%s'",
-            file_spec.GetPath().c_str());
+  LLDB_LOGV(log, "PlatformDarwinKernel examining '{0}'", file_spec);
 
   PlatformDarwinKernel *thisp = (PlatformDarwinKernel *)baton;
 
@@ -567,9 +565,8 @@ PlatformDarwinKernel::GetKernelsAndKextsInDirectoryHelper(
   if (recurse && file_spec_extension != g_dsym_suffix &&
       file_spec_extension != g_kext_suffix &&
       file_spec_extension != g_bundle_suffix) {
-    LLDB_LOGF(log_verbose,
-              "PlatformDarwinKernel descending into directory '%s'",
-              file_spec.GetPath().c_str());
+    LLDB_LOGV(log, "PlatformDarwinKernel descending into directory '{0}'",
+              file_spec);
     return FileSystem::eEnumerateDirectoryResultEnter;
   } else {
     return FileSystem::eEnumerateDirectoryResultNext;

From 6b67e89b45c1e84a5ddac23f8c9c8c3961577bda Mon Sep 17 00:00:00 2001
From: Pavel Labath 
Date: Tue, 25 Jan 2022 13:51:53 +0100
Subject: [PATCH 546/946] [lldb] Fix windows build for D117490

I forgot to update ProcessWindowsLog to the new API.
---
 .../Windows/Common/ProcessWindowsLog.cpp      | 22 ++++++-----
 .../Windows/Common/ProcessWindowsLog.h        | 37 +++++++++++++------
 2 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp
index 6f5e020e812b6..0d7649a98e8c3 100644
--- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp
@@ -11,17 +11,21 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-    {{"break"}, {"log breakpoints"}, WINDOWS_LOG_BREAKPOINTS},
-    {{"event"}, {"log low level debugger events"}, WINDOWS_LOG_EVENT},
-    {{"exception"}, {"log exception information"}, WINDOWS_LOG_EXCEPTION},
-    {{"memory"}, {"log memory reads and writes"}, WINDOWS_LOG_MEMORY},
-    {{"process"}, {"log process events and activities"}, WINDOWS_LOG_PROCESS},
-    {{"registers"}, {"log register read/writes"}, WINDOWS_LOG_REGISTERS},
-    {{"step"}, {"log step related activities"}, WINDOWS_LOG_STEP},
-    {{"thread"}, {"log thread events and activities"}, WINDOWS_LOG_THREAD},
+    {{"break"}, {"log breakpoints"}, WindowsLog::Breakpoints},
+    {{"event"}, {"log low level debugger events"}, WindowsLog::Event},
+    {{"exception"}, {"log exception information"}, WindowsLog::Exception},
+    {{"memory"}, {"log memory reads and writes"}, WindowsLog::Memory},
+    {{"process"}, {"log process events and activities"}, WindowsLog::Process},
+    {{"registers"}, {"log register read/writes"}, WindowsLog::Registers},
+    {{"step"}, {"log step related activities"}, WindowsLog::Step},
+    {{"thread"}, {"log thread events and activities"}, WindowsLog::Thread},
 };
 
-Log::Channel ProcessWindowsLog::g_channel(g_categories, WINDOWS_LOG_PROCESS);
+static Log::Channel g_channel(g_categories, WindowsLog::Process);
+
+template <> Log::Channel &lldb_private::LogChannelFor() {
+  return g_channel;
+}
 
 void ProcessWindowsLog::Initialize() {
   static llvm::once_flag g_once_flag;
diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
index 66ba245c9fa88..68a9d767c227d 100644
--- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
+++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
@@ -11,25 +11,38 @@
 
 #include "lldb/Utility/Log.h"
 
-#define WINDOWS_LOG_PROCESS (1u << 1)     // Log process operations
-#define WINDOWS_LOG_EXCEPTION (1u << 1)   // Log exceptions
-#define WINDOWS_LOG_THREAD (1u << 2)      // Log thread operations
-#define WINDOWS_LOG_MEMORY (1u << 3)      // Log memory reads/writes calls
-#define WINDOWS_LOG_BREAKPOINTS (1u << 4) // Log breakpoint operations
-#define WINDOWS_LOG_STEP (1u << 5)        // Log step operations
-#define WINDOWS_LOG_REGISTERS (1u << 6)   // Log register operations
-#define WINDOWS_LOG_EVENT (1u << 7)       // Low level debug events
-
 namespace lldb_private {
-class ProcessWindowsLog {
-  static Log::Channel g_channel;
 
+enum class WindowsLog : Log::MaskType {
+  Breakpoints = Log::ChannelFlag<0>, // Log breakpoint operations
+  Event = Log::ChannelFlag<1>,       // Low level debug events
+  Exception = Log::ChannelFlag<2>,   // Log exceptions
+  Memory = Log::ChannelFlag<3>,      // Log memory reads/writes calls
+  Process = Log::ChannelFlag<4>,     // Log process operations
+  Registers = Log::ChannelFlag<5>,   // Log register operations
+  Step = Log::ChannelFlag<6>,        // Log step operations
+  Thread = Log::ChannelFlag<7>,      // Log thread operations
+  LLVM_MARK_AS_BITMASK_ENUM(Thread)
+};
+
+#define WINDOWS_LOG_PROCESS ::lldb_private::WindowsLog::Process
+#define WINDOWS_LOG_EXCEPTION ::lldb_private::WindowsLog::Exception
+#define WINDOWS_LOG_THREAD ::lldb_private::WindowsLog::Thread
+#define WINDOWS_LOG_MEMORY ::lldb_private::WindowsLog::Memory
+#define WINDOWS_LOG_BREAKPOINTS ::lldb_private::WindowsLog::Breakpoints
+#define WINDOWS_LOG_STEP ::lldb_private::WindowsLog::Step
+#define WINDOWS_LOG_REGISTERS ::lldb_private::WindowsLog::Registers
+#define WINDOWS_LOG_EVENT ::lldb_private::WindowsLog::Event
+
+class ProcessWindowsLog {
 public:
   static void Initialize();
   static void Terminate();
 
-  static Log *GetLogIfAny(uint32_t mask) { return g_channel.GetLogIfAny(mask); }
+  static Log *GetLogIfAny(WindowsLog mask) { return GetLog(mask); }
 };
+
+template <> Log::Channel &LogChannelFor();
 }
 
 #endif // liblldb_ProcessWindowsLog_h_

From 694df0f0a807e1b0c1c04edd6714955f96b9cae1 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Tue, 25 Jan 2022 21:50:02 +0900
Subject: [PATCH 547/946] [mlir][linalg][bufferize] Fix build

This fixes a linker error related to ModuleBufferization.cpp.

Differential Revision: https://reviews.llvm.org/D118127
---
 .../ComprehensiveBufferize/ModuleBufferization.cpp     | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
index 0fe79862a69d0..e5eac1fb2765d 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -85,6 +85,16 @@ using namespace tensor;
 using namespace comprehensive_bufferize;
 using namespace mlir::bufferization;
 
+/// Attribute name used to mark the bufferization layout for region
+/// arguments during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
+
+/// Attribute name used to mark region arguments that can be bufferized
+/// in-place during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kInplaceableAttrName;
+
 namespace {
 /// The state of analysis of a FuncOp.
 enum class FuncOpAnalysisState { NotAnalyzed, InProgress, Analyzed };

From 4100cf2e92594342b53138bdbeeba220c7dd82c0 Mon Sep 17 00:00:00 2001
From: Marek Kurdej 
Date: Tue, 25 Jan 2022 14:12:30 +0100
Subject: [PATCH 548/946] [Visualizers] Fix Optional visualizer.

As discussed in https://reviews.llvm.org/D118105#3268773, OptionalStorage has been changed in commit https://github.com/llvm/llvm-project/commit/fb9730575086b3c2ba38a1aabf3106b01339888b, but the visualizer still tries to use old members.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D118117
---
 llvm/utils/LLVMVisualizers/llvm.natvis | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis
index 6e75ebd6f4bba..108f1912c75e8 100644
--- a/llvm/utils/LLVMVisualizers/llvm.natvis
+++ b/llvm/utils/LLVMVisualizers/llvm.natvis
@@ -197,9 +197,9 @@ For later versions of Visual Studio, no setup is required.
   
   
     None
-    {*(($T1 *)(unsigned char *)Storage.storage.buffer)}
+    {Storage.value}
     
-      *(($T1 *)(unsigned char *)Storage.storage.buffer)
+      Storage.value
     
   
 

From 6a008de82a8965bd8aac4456e20a51a883e4126b Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 14:18:05 +0100
Subject: [PATCH 549/946] [Evaluator] Simplify handling of bitcasted calls

When fetching the function, strip casts. When casting the result,
use the call result type. Don't actually inspect the bitcast.
---
 .../include/llvm/Transforms/Utils/Evaluator.h |  2 +-
 llvm/lib/Transforms/Utils/Evaluator.cpp       | 29 ++++++-------------
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h
index 9346212dd8889..99e826bf855f2 100644
--- a/llvm/include/llvm/Transforms/Utils/Evaluator.h
+++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h
@@ -127,7 +127,7 @@ class Evaluator {
   }
 
   /// Casts call result to a type of bitcast call expression
-  Constant *castCallResultIfNeeded(Value *CallExpr, Constant *RV);
+  Constant *castCallResultIfNeeded(Type *ReturnType, Constant *RV);
 
   /// Given call site return callee and list of its formal arguments
   Function *getCalleeWithFormalArgs(CallBase &CB,
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index 34f56b1a591d9..e73287c060ae4 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -245,17 +245,10 @@ static Function *getFunction(Constant *C) {
 Function *
 Evaluator::getCalleeWithFormalArgs(CallBase &CB,
                                    SmallVectorImpl &Formals) {
-  auto *V = CB.getCalledOperand();
+  auto *V = CB.getCalledOperand()->stripPointerCasts();
   if (auto *Fn = getFunction(getVal(V)))
     return getFormalParams(CB, Fn, Formals) ? Fn : nullptr;
-
-  auto *CE = dyn_cast(V);
-  if (!CE || CE->getOpcode() != Instruction::BitCast ||
-      !getFormalParams(CB, getFunction(CE->getOperand(0)), Formals))
-    return nullptr;
-
-  return dyn_cast(
-      ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL));
+  return nullptr;
 }
 
 bool Evaluator::getFormalParams(CallBase &CB, Function *F,
@@ -284,17 +277,13 @@ bool Evaluator::getFormalParams(CallBase &CB, Function *F,
 
 /// If call expression contains bitcast then we may need to cast
 /// evaluated return value to a type of the call expression.
-Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) {
-  ConstantExpr *CE = dyn_cast(CallExpr);
-  if (!RV || !CE || CE->getOpcode() != Instruction::BitCast)
+Constant *Evaluator::castCallResultIfNeeded(Type *ReturnType, Constant *RV) {
+  if (!RV || RV->getType() == ReturnType)
     return RV;
 
-  if (auto *FT =
-          dyn_cast(CE->getType()->getPointerElementType())) {
-    RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL);
-    if (!RV)
-      LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
-  }
+  RV = ConstantFoldLoadThroughBitcast(RV, ReturnType, DL);
+  if (!RV)
+    LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
   return RV;
 }
 
@@ -540,7 +529,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
         if (Callee->isDeclaration()) {
           // If this is a function we can constant fold, do it.
           if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) {
-            InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C);
+            InstResult = castCallResultIfNeeded(CB.getType(), C);
             if (!InstResult)
               return false;
             LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
@@ -564,7 +553,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
             return false;
           }
           ValueStack.pop_back();
-          InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal);
+          InstResult = castCallResultIfNeeded(CB.getType(), RetVal);
           if (RetVal && !InstResult)
             return false;
 

From 71bbb78b8fdc72732e3c21ee6d37f3c3868a7fdc Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Tue, 25 Jan 2022 22:05:10 +0900
Subject: [PATCH 550/946] [mlir][linalg][bufferize] Support tensor.generate

This is mostly a copy of the existing tensor.generate bufferization. Once TensorInterfaceImpl.cpp is moved to the tensor dialect, the existing rewrite pattern can be deleted.

Differential Revision: https://reviews.llvm.org/D117770
---
 .../BufferizableOpInterfaceImpl.cpp           | 61 +++++++++++++++++++
 .../comprehensive-module-bufferize.mlir       | 20 ++++++
 2 files changed, 81 insertions(+)

diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index ea9d885736f90..aaa89b4ae2242 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Operation.h"
@@ -228,6 +229,65 @@ struct ExtractOpInterface
   }
 };
 
+/// Bufferization of tensor.generate.
+struct GenerateOpInterface
+    : public BufferizableOpInterface::ExternalModel {
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationState &state) const {
+    auto generateOp = cast(op);
+
+    // Allocate memory.
+    Location loc = op->getLoc();
+    MemRefType memrefType =
+        getContiguousMemRefType(generateOp.getType().cast());
+    FailureOr maybeResult =
+        createAlloc(rewriter, loc, memrefType, generateOp.dynamicExtents(),
+                    /*deallocMemref=*/state.getOptions().createDeallocs,
+                    state.getOptions());
+    if (failed(maybeResult))
+      return failure();
+    Value result = *maybeResult;
+
+    // Collect loop bounds.
+    int64_t rank = memrefType.getRank();
+    Value zero = rewriter.create(loc, 0);
+    Value one = rewriter.create(loc, 1);
+    SmallVector lowerBounds(rank, zero);
+    SmallVector steps(rank, one);
+    SmallVector upperBounds;
+    int nextDynamicIndex = 0;
+    for (int i = 0; i < rank; i++) {
+      Value upperBound = memrefType.isDynamicDim(i)
+                             ? generateOp.dynamicExtents()[nextDynamicIndex++]
+                             : rewriter.create(
+                                   loc, memrefType.getDimSize(i));
+      upperBounds.push_back(upperBound);
+    }
+
+    // Generate tensor elements with a parallel loop that stores into
+    // each element of the resulting memref. We use mergeBlockBefore to "move"
+    // this op's body into the scf.parallel's body.
+    auto parallel =
+        rewriter.create(loc, lowerBounds, upperBounds, steps);
+    Block *parallelBody = parallel.getBody();
+    rewriter.mergeBlockBefore(generateOp.getBody(),
+                              parallelBody->getTerminator(),
+                              parallelBody->getArguments());
+    // Replace the inlined yield op with a store op. The scf.parallel's builder
+    // already populated an scf.yield at the end, so we don't need to worry
+    // about creating that.
+    Operation *elementYield = parallelBody->getTerminator()->getPrevNode();
+    rewriter.setInsertionPointAfter(elementYield);
+    rewriter.replaceOpWithNewOp(
+        elementYield, elementYield->getOperands()[0], result,
+        parallelBody->getArguments());
+
+    replaceOpWithBufferizedValues(rewriter, op, result);
+    return success();
+  }
+};
+
 /// Bufferization of tensor.insert. Replace with memref.store.
 struct InsertOpInterface
     : public BufferizableOpInterface::ExternalModel();
   registry.addOpInterface();
   registry.addOpInterface();
+  registry.addOpInterface();
   registry.addOpInterface();
   registry.addOpInterface();
   registry.addOpInterface();
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 28ee8bea2e9ec..eacb2bb9314dc 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1359,3 +1359,23 @@ func @tensor_rank(%arg0: tensor<*xf32>) -> index {
   // CHECK: return %[[r]] : index
   return %0 : index
 }
+
+// -----
+
+// CHECK-LABEL: func @tensor_generate_static_and_dynamic(
+//  CHECK-SAME:     %[[arg0:.*]]: index
+func @tensor_generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
+  // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+  // CHECK-DAG: %[[c16:.*]] = arith.constant 16 : index
+  // CHECK: %[[alloc:.*]] = memref.alloc(%[[arg0]]) {{.*}} : memref<16x?xindex>
+  // CHECK: scf.parallel (%[[arg1:.*]], %[[arg2:.*]]) = (%[[c0]], %[[c0]]) to (%[[c16]], %[[arg0]]) {{.*}} {
+  %result = tensor.generate %arg0 {
+  ^bb0(%i: index, %j: index):
+    %sum = arith.addi %i, %j : index
+    // CHECK: memref.store {{.*}}, %[[alloc]][%[[arg1]], %[[arg2]]]
+    // CHECK: scf.yield
+    tensor.yield %sum : index
+  } : tensor<16x?xindex>
+  // CHECK: }
+  return %result : tensor<16x?xindex>
+}

From d581c94d6bfbb336b8620ef06e4340b5ea18a23e Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Tue, 25 Jan 2022 22:05:35 +0900
Subject: [PATCH 551/946] [mlir][linalg][bufferize] Support
 tensor.from_elements

This is mostly a copy of the existing tensor.from_elements bufferization. Once TensorInterfaceImpl.cpp is moved to the tensor dialect, the existing rewrite pattern can be deleted.

Differential Revision: https://reviews.llvm.org/D117775
---
 .../BufferizableOpInterfaceImpl.cpp           | 77 +++++++++++++++++++
 .../comprehensive-module-bufferize.mlir       | 21 +++++
 2 files changed, 98 insertions(+)

diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index aaa89b4ae2242..1c1226b451688 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -229,6 +229,82 @@ struct ExtractOpInterface
   }
 };
 
+// Implements backtracking to traverse indices of the output buffer while
+// iterating over op.elements().
+static void createStores(RewriterBase &rewriter, Location loc, int dim,
+                         Value buffer, ArrayRef shape,
+                         ArrayRef constants,
+                         OperandRange::iterator &elementIt,
+                         SmallVectorImpl &indices) {
+  if (dim == static_cast(shape.size()) - 1) {
+    for (int i = 0; i < shape.back(); ++i) {
+      indices.back() = constants[i];
+      rewriter.create(loc, *elementIt, buffer, indices);
+      ++elementIt;
+    }
+    return;
+  }
+  for (int i = 0; i < shape[dim]; ++i) {
+    indices[dim] = constants[i];
+    createStores(rewriter, loc, dim + 1, buffer, shape, constants, elementIt,
+                 indices);
+  }
+}
+
+/// Bufferization of tensor.from_elements.
+struct FromElementsOpInterface
+    : public BufferizableOpInterface::ExternalModel {
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationState &state) const {
+    auto fromElementsOp = cast(op);
+
+    // Allocate a buffer for the result.
+    Location loc = op->getLoc();
+    auto tensorType = fromElementsOp.getType().cast();
+    auto shape = tensorType.getShape();
+    MemRefType resultType =
+        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+    FailureOr maybeBuffer =
+        createAlloc(rewriter, loc, resultType, {},
+                    /*deallocMemref=*/state.getOptions().createDeallocs,
+                    state.getOptions());
+    if (failed(maybeBuffer))
+      return failure();
+    Value buffer = *maybeBuffer;
+
+    // Case: tensor<0xelem_type>.
+    if (fromElementsOp.elements().empty()) {
+      replaceOpWithBufferizedValues(rewriter, op, buffer);
+      return success();
+    }
+
+    // Case: tensor.
+    if (shape.empty()) {
+      rewriter.create(loc, fromElementsOp.elements().front(),
+                                       buffer);
+      replaceOpWithBufferizedValues(rewriter, op, buffer);
+      return success();
+    }
+
+    // Create constants for the range of possible indices [0, max{shape_i}).
+    auto maxDim = *std::max_element(shape.begin(), shape.end());
+    SmallVector constants;
+    constants.reserve(maxDim);
+    for (int i = 0; i < maxDim; ++i)
+      constants.push_back(rewriter.create(loc, i));
+
+    // Traverse all `elements` and create `memref.store` ops.
+    auto elementIt = fromElementsOp.elements().begin();
+    SmallVector indices(tensorType.getRank(), constants[0]);
+    createStores(rewriter, loc, /*dim=*/0, buffer, shape, constants, elementIt,
+                 indices);
+
+    replaceOpWithBufferizedValues(rewriter, op, buffer);
+    return success();
+  }
+};
+
 /// Bufferization of tensor.generate.
 struct GenerateOpInterface
     : public BufferizableOpInterface::ExternalModel();
   registry.addOpInterface();
   registry.addOpInterface();
+  registry.addOpInterface();
   registry.addOpInterface();
   registry.addOpInterface();
   registry.addOpInterface();
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index eacb2bb9314dc..c4ea9a48b8ece 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1379,3 +1379,24 @@ func @tensor_generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
   // CHECK: }
   return %result : tensor<16x?xindex>
 }
+
+// -----
+
+// CHECK-LABEL: func @tensor_from_elements_2d(
+//  CHECK-SAME:     %[[ELEM0:.*]]: index, %[[ELEM1:.*]]: index
+func @tensor_from_elements_2d(%arg0: index, %arg1: index) -> tensor<3x2xindex> {
+  // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+  // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+  // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+  // CHECK-DAG: %[[MEMREF:.*]] = memref.alloc() {{.*}} : memref<3x2xindex>
+  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C0]], %[[C0]]]
+  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C0]], %[[C1]]]
+  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C1]], %[[C0]]]
+  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C1]], %[[C1]]]
+  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C2]], %[[C0]]]
+  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C2]], %[[C1]]]
+  %0 = tensor.from_elements %arg0, %arg1, %arg0, %arg1, %arg0, %arg1
+         : tensor<3x2xindex>
+  //     CHECK: return %[[MEMREF]]
+  return %0 : tensor<3x2xindex>
+}

From c0e3c893aa095c78156900ef11f68042aff83839 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=BChnel?= 
Date: Mon, 13 Dec 2021 15:14:31 +0000
Subject: [PATCH 552/946] [NFC][clangd] cleaning up llvm-qualified-auto

This is a cleanup of all llvm-qualified-auto findings.
This patch was created by automatically applying the fixes from
clang-tidy.

Differential Revision: https://reviews.llvm.org/D113898
---
 clang-tools-extra/clangd/AST.cpp              |  2 +-
 clang-tools-extra/clangd/CodeComplete.cpp     |  4 +-
 clang-tools-extra/clangd/ExpectedTypes.cpp    |  2 +-
 clang-tools-extra/clangd/FindSymbols.cpp      |  2 +-
 .../clangd/HeaderSourceSwitch.cpp             |  4 +-
 clang-tools-extra/clangd/Hover.cpp            |  2 +-
 clang-tools-extra/clangd/TUScheduler.cpp      |  2 +-
 .../clangd/index/IndexAction.cpp              |  2 +-
 .../clangd/index/SymbolCollector.cpp          |  2 +-
 .../clangd/index/dex/Iterator.cpp             |  4 +-
 .../refactor/tweaks/AnnotateHighlightings.cpp |  2 +-
 .../clangd/refactor/tweaks/DefineInline.cpp   |  4 +-
 .../clangd/refactor/tweaks/DumpAST.cpp        |  2 +-
 .../clangd/refactor/tweaks/ExpandMacro.cpp    |  2 +-
 .../clangd/unittests/ClangdTests.cpp          | 26 +++++------
 .../clangd/unittests/FileIndexTests.cpp       |  2 +-
 .../clangd/unittests/HoverTests.cpp           |  2 +-
 .../clangd/unittests/TUSchedulerTests.cpp     | 10 ++---
 .../unittests/tweaks/DefineInlineTests.cpp    | 44 +++++++++----------
 19 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp
index 53b55e1579ec4..b970325098c65 100644
--- a/clang-tools-extra/clangd/AST.cpp
+++ b/clang-tools-extra/clangd/AST.cpp
@@ -456,7 +456,7 @@ class DeducedTypeVisitor : public RecursiveASTVisitor {
     const AutoType *AT = D->getReturnType()->getContainedAutoType();
     if (AT && !AT->getDeducedType().isNull()) {
       DeducedType = AT->getDeducedType();
-    } else if (auto DT = dyn_cast(D->getReturnType())) {
+    } else if (auto *DT = dyn_cast(D->getReturnType())) {
       // auto in a trailing return type just points to a DecltypeType and
       // getContainedAutoType does not unwrap it.
       if (!DT->getUnderlyingType().isNull())
diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index d9165599fb9ab..43f35232270a3 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -453,7 +453,7 @@ struct CodeCompletionBuilder {
   template 
   const std::string *onlyValue() const {
     auto B = Bundled.begin(), E = Bundled.end();
-    for (auto I = B + 1; I != E; ++I)
+    for (auto *I = B + 1; I != E; ++I)
       if (I->*Member != B->*Member)
         return nullptr;
     return &(B->*Member);
@@ -461,7 +461,7 @@ struct CodeCompletionBuilder {
 
   template  const bool *onlyValue() const {
     auto B = Bundled.begin(), E = Bundled.end();
-    for (auto I = B + 1; I != E; ++I)
+    for (auto *I = B + 1; I != E; ++I)
       if (I->*Member != B->*Member)
         return nullptr;
     return &(B->*Member);
diff --git a/clang-tools-extra/clangd/ExpectedTypes.cpp b/clang-tools-extra/clangd/ExpectedTypes.cpp
index e0a4e472f3496..01a08c8589c18 100644
--- a/clang-tools-extra/clangd/ExpectedTypes.cpp
+++ b/clang-tools-extra/clangd/ExpectedTypes.cpp
@@ -53,7 +53,7 @@ typeOfCompletion(const CodeCompletionResult &R) {
   auto T = VD->getType();
   if (T.isNull())
     return llvm::None;
-  if (auto FuncT = T->getAs()) {
+  if (auto *FuncT = T->getAs()) {
     // Functions are a special case. They are completed as 'foo()' and we want
     // to match their return type rather than the function type itself.
     // FIXME(ibiryukov): in some cases, we might want to avoid completing `()`
diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp
index edbeeed9e2ca6..75961d3a6ea17 100644
--- a/clang-tools-extra/clangd/FindSymbols.cpp
+++ b/clang-tools-extra/clangd/FindSymbols.cpp
@@ -483,7 +483,7 @@ class DocumentOutline {
     if (!llvm::isa(D))
       return VisitKind::No;
 
-    if (auto Func = llvm::dyn_cast(D)) {
+    if (auto *Func = llvm::dyn_cast(D)) {
       // Some functions are implicit template instantiations, those should be
       // ignored.
       if (auto *Info = Func->getTemplateSpecializationInfo()) {
diff --git a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp
index e3e2ab3ea8694..ed86c2eb0d12d 100644
--- a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp
+++ b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp
@@ -25,13 +25,13 @@ llvm::Optional getCorrespondingHeaderOrSource(
   llvm::StringRef PathExt = llvm::sys::path::extension(OriginalFile);
 
   // Lookup in a list of known extensions.
-  auto SourceIter =
+  auto *SourceIter =
       llvm::find_if(SourceExtensions, [&PathExt](PathRef SourceExt) {
         return SourceExt.equals_insensitive(PathExt);
       });
   bool IsSource = SourceIter != std::end(SourceExtensions);
 
-  auto HeaderIter =
+  auto *HeaderIter =
       llvm::find_if(HeaderExtensions, [&PathExt](PathRef HeaderExt) {
         return HeaderExt.equals_insensitive(PathExt);
       });
diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index 58ef2e3feb99d..c5cb5cd3df53a 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -86,7 +86,7 @@ std::string getLocalScope(const Decl *D) {
       Policy.SuppressScope = true;
       return declaredType(D).getAsString(Policy);
     }
-    if (auto RD = dyn_cast(D))
+    if (auto *RD = dyn_cast(D))
       return ("(anonymous " + RD->getKindName() + ")").str();
     return std::string("");
   };
diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp
index 43b4d8ca8881f..9b98791e748c3 100644
--- a/clang-tools-extra/clangd/TUScheduler.cpp
+++ b/clang-tools-extra/clangd/TUScheduler.cpp
@@ -1714,7 +1714,7 @@ DebouncePolicy::compute(llvm::ArrayRef History) const {
   // nth_element needs a mutable array, take the chance to bound the data size.
   History = History.take_back(15);
   llvm::SmallVector Recent(History.begin(), History.end());
-  auto Median = Recent.begin() + Recent.size() / 2;
+  auto *Median = Recent.begin() + Recent.size() / 2;
   std::nth_element(Recent.begin(), Median, Recent.end());
 
   clock::duration Target =
diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp
index 0d8ae93efa4ae..cd0c6dbd5ec69 100644
--- a/clang-tools-extra/clangd/index/IndexAction.cpp
+++ b/clang-tools-extra/clangd/index/IndexAction.cpp
@@ -61,7 +61,7 @@ struct IncludeGraphCollector : public PPCallbacks {
       return;
 
     const auto FileID = SM.getFileID(Loc);
-    const auto File = SM.getFileEntryForID(FileID);
+    const auto *File = SM.getFileEntryForID(FileID);
     auto URI = toURI(File);
     if (!URI)
       return;
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index cccc2c4e8f3d4..3257041ffa0e3 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -43,7 +43,7 @@ namespace {
 /// If \p ND is a template specialization, returns the described template.
 /// Otherwise, returns \p ND.
 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
-  if (auto T = ND.getDescribedTemplate())
+  if (auto *T = ND.getDescribedTemplate())
     return *T;
   return ND;
 }
diff --git a/clang-tools-extra/clangd/index/dex/Iterator.cpp b/clang-tools-extra/clangd/index/dex/Iterator.cpp
index 8b37403ff406f..8b5e5244d3111 100644
--- a/clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ b/clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -77,7 +77,7 @@ class AndIterator : public Iterator {
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
     OS << "(& ";
-    auto Separator = "";
+    auto *Separator = "";
     for (const auto &Child : Children) {
       OS << Separator << *Child;
       Separator = " ";
@@ -206,7 +206,7 @@ class OrIterator : public Iterator {
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
     OS << "(| ";
-    auto Separator = "";
+    auto *Separator = "";
     for (const auto &Child : Children) {
       OS << Separator << *Child;
       Separator = " ";
diff --git a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp
index cb53b3bedf9d0..1383560ad4656 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp
@@ -38,7 +38,7 @@ REGISTER_TWEAK(AnnotateHighlightings)
 
 Expected AnnotateHighlightings::apply(const Selection &Inputs) {
   const Decl *CommonDecl = nullptr;
-  for (auto N = Inputs.ASTSelection.commonAncestor(); N && !CommonDecl;
+  for (auto *N = Inputs.ASTSelection.commonAncestor(); N && !CommonDecl;
        N = N->Parent)
     CommonDecl = N->ASTNode.get();
 
diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp
index 14d2d082ce0f5..ae1ceaf12cb86 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp
@@ -342,13 +342,13 @@ renameParameters(const FunctionDecl *Dest, const FunctionDecl *Source,
 // Because canonical declaration points to template decl instead of
 // specialization.
 const FunctionDecl *findTarget(const FunctionDecl *FD) {
-  auto CanonDecl = FD->getCanonicalDecl();
+  auto *CanonDecl = FD->getCanonicalDecl();
   if (!FD->isFunctionTemplateSpecialization() || CanonDecl == FD)
     return CanonDecl;
   // For specializations CanonicalDecl is the TemplatedDecl, which is not the
   // target we want to inline into. Instead we traverse previous decls to find
   // the first forward decl for this specialization.
-  auto PrevDecl = FD;
+  auto *PrevDecl = FD;
   while (PrevDecl->getPreviousDecl() != CanonDecl) {
     PrevDecl = PrevDecl->getPreviousDecl();
     assert(PrevDecl && "Found specialization without template decl");
diff --git a/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp b/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp
index d7a7852925db5..3f340fbd2a3fd 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp
@@ -34,7 +34,7 @@ class DumpAST : public Tweak {
   const char *id() const override final;
 
   bool prepare(const Selection &Inputs) override {
-    for (auto N = Inputs.ASTSelection.commonAncestor(); N && !Node;
+    for (auto *N = Inputs.ASTSelection.commonAncestor(); N && !Node;
          N = N->Parent)
       if (dumpable(N->ASTNode))
         Node = N->ASTNode;
diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp
index a7e2dddf4cbaf..fad3a3c3d20df 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp
@@ -52,7 +52,7 @@ findTokenUnderCursor(const SourceManager &SM,
                      llvm::ArrayRef Spelled,
                      unsigned CursorOffset) {
   // Find the token that strats after the offset, then look at a previous one.
-  auto It = llvm::partition_point(Spelled, [&](const syntax::Token &T) {
+  auto *It = llvm::partition_point(Spelled, [&](const syntax::Token &T) {
     assert(T.location().isFileID());
     return SM.getFileOffset(T.location()) <= CursorOffset;
   });
diff --git a/clang-tools-extra/clangd/unittests/ClangdTests.cpp b/clang-tools-extra/clangd/unittests/ClangdTests.cpp
index 8295275835177..e6a2e377c59d2 100644
--- a/clang-tools-extra/clangd/unittests/ClangdTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ClangdTests.cpp
@@ -212,7 +212,7 @@ TEST(ClangdServerTest, ParseWithHeader) {
   parseSourceAndDumpAST("foo.cpp", "#include \"foo.h\"", {{"foo.h", ""}},
                         /*ExpectErrors=*/false);
 
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include "foo.h"
 int b = a;
 )cpp";
@@ -228,7 +228,7 @@ TEST(ClangdServerTest, Reparse) {
   MockCompilationDatabase CDB;
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include "foo.h"
 int b = a;
 )cpp";
@@ -263,7 +263,7 @@ TEST(ClangdServerTest, ReparseOnHeaderChange) {
   MockCompilationDatabase CDB;
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include "foo.h"
 int b = a;
 )cpp";
@@ -420,7 +420,7 @@ TEST(ClangdServerTest, SearchLibDir) {
   FS.Files[StringPath] = "class mock_string {};";
 
   auto FooCpp = testPath("foo.cpp");
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include 
 mock_string x;
 )cpp";
@@ -429,7 +429,7 @@ mock_string x;
   runAddDocument(Server, FooCpp, SourceContents);
   EXPECT_FALSE(DiagConsumer.hadErrorInLastDiags());
 
-  const auto SourceContentsWithError = R"cpp(
+  const auto *SourceContentsWithError = R"cpp(
 #include 
 std::string x;
 )cpp";
@@ -445,11 +445,11 @@ TEST(ClangdServerTest, ForceReparseCompileCommand) {
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
   auto FooCpp = testPath("foo.cpp");
-  const auto SourceContents1 = R"cpp(
+  const auto *SourceContents1 = R"cpp(
 template 
 struct foo { T x; };
 )cpp";
-  const auto SourceContents2 = R"cpp(
+  const auto *SourceContents2 = R"cpp(
 template 
 struct bar { T x; };
 )cpp";
@@ -481,7 +481,7 @@ TEST(ClangdServerTest, ForceReparseCompileCommandDefines) {
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
   auto FooCpp = testPath("foo.cpp");
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #ifdef WITH_ERROR
 this
 #endif
@@ -585,7 +585,7 @@ TEST(ClangdServerTest, FileStats) {
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
   Path FooCpp = testPath("foo.cpp");
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 struct Something {
   int method();
 };
@@ -652,14 +652,14 @@ TEST(ClangdThreadingTest, StressTest) {
   // BlockingRequestInterval-request will be a blocking one.
   const unsigned BlockingRequestInterval = 40;
 
-  const auto SourceContentsWithoutErrors = R"cpp(
+  const auto *SourceContentsWithoutErrors = R"cpp(
 int a;
 int b;
 int c;
 int d;
 )cpp";
 
-  const auto SourceContentsWithErrors = R"cpp(
+  const auto *SourceContentsWithErrors = R"cpp(
 int a = x;
 int b;
 int c;
@@ -892,14 +892,14 @@ TEST(ClangdThreadingTest, NoConcurrentDiagnostics) {
     std::promise StartSecondReparse;
   };
 
-  const auto SourceContentsWithoutErrors = R"cpp(
+  const auto *SourceContentsWithoutErrors = R"cpp(
 int a;
 int b;
 int c;
 int d;
 )cpp";
 
-  const auto SourceContentsWithErrors = R"cpp(
+  const auto *SourceContentsWithErrors = R"cpp(
 int a = x;
 int b;
 int c;
diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
index 7ce47e5dbed07..fe2f1c395c382 100644
--- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -249,7 +249,7 @@ TEST(FileIndexTest, HasSystemHeaderMappingsInPreamble) {
 }
 
 TEST(FileIndexTest, TemplateParamsInLabel) {
-  auto Source = R"cpp(
+  auto *Source = R"cpp(
 template 
 class vector {
 };
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index 740c907d7e4e4..6f5f6ba7b2669 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -2695,7 +2695,7 @@ TEST(Hover, DocsFromMostSpecial) {
 
   TestTU TU = TestTU::withCode(T.code());
   auto AST = TU.build();
-  for (auto Comment : {"doc1", "doc2", "doc3"}) {
+  for (const auto *Comment : {"doc1", "doc2", "doc3"}) {
     for (const auto &P : T.points(Comment)) {
       auto H = getHover(AST, P, format::getLLVMStyle(), nullptr);
       ASSERT_TRUE(H);
diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
index 605f398a3fd56..b63db4b8ccd87 100644
--- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
@@ -132,7 +132,7 @@ class TUSchedulerTests : public ::testing::Test {
     private:
       void reportDiagnostics(PathRef File, llvm::ArrayRef Diags,
                              PublishFn Publish) {
-        auto D = Context::current().get(DiagsCallbackKey);
+        auto *D = Context::current().get(DiagsCallbackKey);
         if (!D)
           return;
         Publish([&]() {
@@ -671,11 +671,11 @@ TEST_F(TUSchedulerTests, EmptyPreamble) {
 
   FS.Files[Header] = "void foo()";
   FS.Timestamps[Header] = time_t(0);
-  auto WithPreamble = R"cpp(
+  auto *WithPreamble = R"cpp(
     #include "foo.h"
     int main() {}
   )cpp";
-  auto WithEmptyPreamble = R"cpp(int main() {})cpp";
+  auto *WithEmptyPreamble = R"cpp(int main() {})cpp";
   S.update(Foo, getInputs(Foo, WithPreamble), WantDiagnostics::Auto);
   S.runWithPreamble(
       "getNonEmptyPreamble", Foo, TUScheduler::Stale,
@@ -748,7 +748,7 @@ TEST_F(TUSchedulerTests, RunWaitsForPreamble) {
   // the same time. All reads should get the same non-null preamble.
   TUScheduler S(CDB, optsForTest());
   auto Foo = testPath("foo.cpp");
-  auto NonEmptyPreamble = R"cpp(
+  auto *NonEmptyPreamble = R"cpp(
     #define FOO 1
     #define BAR 2
 
@@ -844,7 +844,7 @@ TEST_F(TUSchedulerTests, MissingHeader) {
   auto HeaderA = testPath("a/foo.h");
   auto HeaderB = testPath("b/foo.h");
 
-  auto SourceContents = R"cpp(
+  auto *SourceContents = R"cpp(
       #include "foo.h"
       int c = b;
     )cpp";
diff --git a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
index 4c1bd3ab55567..37d7459b16a0f 100644
--- a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
+++ b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
@@ -192,7 +192,7 @@ TEST_F(DefineInlineTest, UsingShadowDecls) {
 }
 
 TEST_F(DefineInlineTest, TransformNestedNamespaces) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       void bar();
       namespace b {
@@ -220,7 +220,7 @@ TEST_F(DefineInlineTest, TransformNestedNamespaces) {
       b::c::aux();
       a::b::c::aux();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       void bar();
       namespace b {
@@ -252,7 +252,7 @@ TEST_F(DefineInlineTest, TransformNestedNamespaces) {
 }
 
 TEST_F(DefineInlineTest, TransformUsings) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a { namespace b { namespace c { void aux(); } } }
 
     void foo();
@@ -263,7 +263,7 @@ TEST_F(DefineInlineTest, TransformUsings) {
       using c::aux;
       namespace d = c;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a { namespace b { namespace c { void aux(); } } }
 
     void foo(){
@@ -278,7 +278,7 @@ TEST_F(DefineInlineTest, TransformUsings) {
 }
 
 TEST_F(DefineInlineTest, TransformDecls) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     void foo();
     void f^oo() {
       class Foo {
@@ -293,7 +293,7 @@ TEST_F(DefineInlineTest, TransformDecls) {
       enum class EnClass { Zero, One };
       EnClass y = EnClass::Zero;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     void foo(){
       class Foo {
       public:
@@ -312,7 +312,7 @@ TEST_F(DefineInlineTest, TransformDecls) {
 }
 
 TEST_F(DefineInlineTest, TransformTemplDecls) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template  class Bar {
       public:
@@ -329,7 +329,7 @@ TEST_F(DefineInlineTest, TransformTemplDecls) {
       bar>.bar();
       aux>();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template  class Bar {
       public:
@@ -350,7 +350,7 @@ TEST_F(DefineInlineTest, TransformTemplDecls) {
 }
 
 TEST_F(DefineInlineTest, TransformMembers) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     class Foo {
       void foo();
     };
@@ -358,7 +358,7 @@ TEST_F(DefineInlineTest, TransformMembers) {
     void Foo::f^oo() {
       return;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     class Foo {
       void foo(){
       return;
@@ -395,7 +395,7 @@ TEST_F(DefineInlineTest, TransformMembers) {
 }
 
 TEST_F(DefineInlineTest, TransformDependentTypes) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template  class Bar {};
     }
@@ -409,7 +409,7 @@ TEST_F(DefineInlineTest, TransformDependentTypes) {
       Bar B;
       Bar> q;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template  class Bar {};
     }
@@ -511,7 +511,7 @@ TEST_F(DefineInlineTest, TransformFunctionTempls) {
 }
 
 TEST_F(DefineInlineTest, TransformTypeLocs) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template  class Bar {
       public:
@@ -528,7 +528,7 @@ TEST_F(DefineInlineTest, TransformTypeLocs) {
       Foo foo;
       a::Bar>::Baz> q;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template  class Bar {
       public:
@@ -549,7 +549,7 @@ TEST_F(DefineInlineTest, TransformTypeLocs) {
 }
 
 TEST_F(DefineInlineTest, TransformDeclRefs) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template  class Bar {
       public:
@@ -575,7 +575,7 @@ TEST_F(DefineInlineTest, TransformDeclRefs) {
       bar();
       a::test();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template  class Bar {
       public:
@@ -605,12 +605,12 @@ TEST_F(DefineInlineTest, TransformDeclRefs) {
 }
 
 TEST_F(DefineInlineTest, StaticMembers) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace ns { class X { static void foo(); void bar(); }; }
     void ns::X::b^ar() {
       foo();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace ns { class X { static void foo(); void bar(){
       foo();
     } }; }
@@ -654,7 +654,7 @@ est);
 }
 
 TEST_F(DefineInlineTest, TransformTemplParamNames) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     struct Foo {
       struct Bar {
         template  class V, template class W,
               int X, int Y>
     void Foo::Bar::f^oo(U, W, int Q) {})cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     struct Foo {
       struct Bar {
         template 
Date: Tue, 25 Jan 2022 14:25:48 +0100
Subject: [PATCH 553/946] [GlobalISel] Avoid pointer element type access during
 InlineAsm lowering

Same change as has been made for the SDAG lowering.
---
 llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e0503b1ed2050..dfcf1b80392af 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -298,7 +298,7 @@ bool InlineAsmLowering::lowerInlineAsm(
 
     // Compute the value type for each operand.
     if (OpInfo.hasArg()) {
-      OpInfo.CallOperandVal = const_cast(Call.getArgOperand(ArgNo++));
+      OpInfo.CallOperandVal = const_cast(Call.getArgOperand(ArgNo));
 
       if (isa(OpInfo.CallOperandVal)) {
         LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n");
@@ -310,10 +310,8 @@ bool InlineAsmLowering::lowerInlineAsm(
       // If this is an indirect operand, the operand is a pointer to the
       // accessed type.
       if (OpInfo.isIndirect) {
-        PointerType *PtrTy = dyn_cast(OpTy);
-        if (!PtrTy)
-          report_fatal_error("Indirect operand for inline asm not a pointer!");
-        OpTy = PtrTy->getPointerElementType();
+        OpTy = Call.getAttributes().getParamElementType(ArgNo);
+        assert(OpTy && "Indirect operand must have elementtype attribute");
       }
 
       // FIXME: Support aggregate input operands
@@ -325,7 +323,7 @@ bool InlineAsmLowering::lowerInlineAsm(
 
       OpInfo.ConstraintVT =
           TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT();
-
+      ++ArgNo;
     } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
       if (StructType *STy = dyn_cast(Call.getType())) {

From 475927d04606433f4ad70b9e41bbe731994ba9b6 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 14:31:51 +0100
Subject: [PATCH 554/946] [AsmParserTest] Avoid pointer element type accesses
 (NFC)

Use isOpaqueOrPointeeTypeEquals() instead.
---
 llvm/unittests/AsmParser/AsmParserTest.cpp | 26 +++++-----------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 7639ff50571e6..9d2333b49bc31 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -252,9 +252,7 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PointerType *PT = cast(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(Type::getIntNTy(Ctx, 32)));
 
   // Two indirections.
   Ty = parseType("i32**", Error, M, &Mapping);
@@ -262,13 +260,8 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isPointerTy());
-
-  PT = cast(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  Type *ExpectedElemTy = PointerType::getUnqual(Type::getIntNTy(Ctx, 32));
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(ExpectedElemTy));
 
   // Check that we reject types with garbage.
   Ty = parseType("i32 garbage", Error, M, &Mapping);
@@ -386,9 +379,7 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 4);
 
   PointerType *PT = cast(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(Type::getIntNTy(Ctx, 32)));
 
   // Two indirections.
   Ty = parseTypeAtBeginning("i32**", Read, Error, M, &Mapping);
@@ -397,13 +388,8 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 5);
 
   PT = cast(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isPointerTy());
-
-  PT = cast(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  Type *ExpectedElemTy = PointerType::getUnqual(Type::getIntNTy(Ctx, 32));
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(ExpectedElemTy));
 
   // Check that we reject types with garbage.
   Ty = parseTypeAtBeginning("i32 garbage", Read, Error, M, &Mapping);

From bf00f7a64e3a37b1b9cc59a152da6ddb0accdbd9 Mon Sep 17 00:00:00 2001
From: Hans Wennborg 
Date: Mon, 24 Jan 2022 15:25:17 +0100
Subject: [PATCH 555/946] Add llvm-dwp to LLVM_TOOLCHAIN_TOOLS

since it qualifies as a toolchain tool rather than "internal llvm tool".
This will make it part of builds which set the
LLVM_INSTALL_TOOLCHAIN_ONLY cmake option, such as the Windows installer.

Differential revision: https://reviews.llvm.org/D118042
---
 llvm/cmake/modules/AddLLVM.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index fed1fec7d72e8..a262d55ddfa42 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1210,6 +1210,7 @@ if(NOT LLVM_TOOLCHAIN_TOOLS)
     llvm-ar
     llvm-cov
     llvm-cxxfilt
+    llvm-dwp
     llvm-ranlib
     llvm-lib
     llvm-ml

From 8e3e772f84e590ed9ba7182c7b01844afdb2dbff Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 14:39:22 +0100
Subject: [PATCH 556/946] [OpenMPIRBuilderTest] Avoid some pointer element type
 accesses (NFC)

Use isOpaqueOrPointeeTypeMatches() instead, where possible.
---
 .../Frontend/OpenMPIRBuilderTest.cpp          | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index bc2d3ec8e7abe..a4d82e07dc139 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1096,14 +1096,15 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
 
   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
   EXPECT_TRUE(Arg2Type->isPointerTy());
-  EXPECT_EQ(Arg2Type->getPointerElementType(), I32Ty);
+  EXPECT_TRUE(cast(Arg2Type)->isOpaqueOrPointeeTypeMatches(I32Ty));
 
   // Arguments that need to be passed through pointers and reloaded will get
   // used earlier in the functions and therefore will appear first in the
   // argument list after outlining.
   Type *Arg3Type = OutlinedFn->getArg(3)->getType();
   EXPECT_TRUE(Arg3Type->isPointerTy());
-  EXPECT_EQ(Arg3Type->getPointerElementType(), StructTy);
+  EXPECT_TRUE(
+      cast(Arg3Type)->isOpaqueOrPointeeTypeMatches(StructTy));
 
   Type *Arg4Type = OutlinedFn->getArg(4)->getType();
   EXPECT_EQ(Arg4Type, I32PtrTy);
@@ -3814,10 +3815,10 @@ TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
                   ->getArrayElementType()
                   ->isPointerTy());
-  EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
-                  ->getArrayElementType()
-                  ->getPointerElementType()
-                  ->isIntegerTy(8));
+  EXPECT_TRUE(
+      cast(
+          MapperAllocas.ArgsBase->getAllocatedType()->getArrayElementType())
+          ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
 
   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
   ArrType = dyn_cast(MapperAllocas.Args->getAllocatedType());
@@ -3825,10 +3826,9 @@ TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
                   ->getArrayElementType()
                   ->isPointerTy());
-  EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
-                  ->getArrayElementType()
-                  ->getPointerElementType()
-                  ->isIntegerTy(8));
+  EXPECT_TRUE(cast(
+                  MapperAllocas.Args->getAllocatedType()->getArrayElementType())
+                  ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
 
   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
   ArrType = dyn_cast(MapperAllocas.ArgSizes->getAllocatedType());

From 7cc3e141d7106eb753b73cb8ad7251c67c738e9f Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 14:52:01 +0100
Subject: [PATCH 557/946] [MemProf] Avoid pointer element type access

Determine the masked load/store access type from the value type
of the intrinsics, rather than the pointer element type. For
cleanliness, include the access type in InterestingMemoryAccess.
---
 .../Instrumentation/MemProfiler.cpp           | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 92ea007691b27..8fedefccf0e16 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -156,6 +156,7 @@ struct InterestingMemoryAccess {
   Value *Addr = nullptr;
   bool IsWrite;
   unsigned Alignment;
+  Type *AccessTy;
   uint64_t TypeSize;
   Value *MaybeMask = nullptr;
 };
@@ -181,7 +182,7 @@ class MemProfiler {
                          Value *Addr, uint32_t TypeSize, bool IsWrite);
   void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
                                    Instruction *I, Value *Addr,
-                                   unsigned Alignment, uint32_t TypeSize,
+                                   unsigned Alignment, Type *AccessTy,
                                    bool IsWrite);
   void instrumentMemIntrinsic(MemIntrinsic *MI);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
@@ -334,36 +335,32 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
 
   InterestingMemoryAccess Access;
 
-  const DataLayout &DL = I->getModule()->getDataLayout();
   if (LoadInst *LI = dyn_cast(I)) {
     if (!ClInstrumentReads)
       return None;
     Access.IsWrite = false;
-    Access.TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
+    Access.AccessTy = LI->getType();
     Access.Alignment = LI->getAlignment();
     Access.Addr = LI->getPointerOperand();
   } else if (StoreInst *SI = dyn_cast(I)) {
     if (!ClInstrumentWrites)
       return None;
     Access.IsWrite = true;
-    Access.TypeSize =
-        DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
+    Access.AccessTy = SI->getValueOperand()->getType();
     Access.Alignment = SI->getAlignment();
     Access.Addr = SI->getPointerOperand();
   } else if (AtomicRMWInst *RMW = dyn_cast(I)) {
     if (!ClInstrumentAtomics)
       return None;
     Access.IsWrite = true;
-    Access.TypeSize =
-        DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
+    Access.AccessTy = RMW->getValOperand()->getType();
     Access.Alignment = 0;
     Access.Addr = RMW->getPointerOperand();
   } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) {
     if (!ClInstrumentAtomics)
       return None;
     Access.IsWrite = true;
-    Access.TypeSize =
-        DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
+    Access.AccessTy = XCHG->getCompareOperand()->getType();
     Access.Alignment = 0;
     Access.Addr = XCHG->getPointerOperand();
   } else if (auto *CI = dyn_cast(I)) {
@@ -376,16 +373,16 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
           return None;
         // Masked store has an initial operand for the value.
         OpOffset = 1;
+        Access.AccessTy = CI->getArgOperand(0)->getType();
         Access.IsWrite = true;
       } else {
         if (!ClInstrumentReads)
           return None;
+        Access.AccessTy = CI->getType();
         Access.IsWrite = false;
       }
 
       auto *BasePtr = CI->getOperand(0 + OpOffset);
-      auto *Ty = BasePtr->getType()->getPointerElementType();
-      Access.TypeSize = DL.getTypeStoreSizeInBits(Ty);
       if (auto *AlignmentConstant =
               dyn_cast(CI->getOperand(1 + OpOffset)))
         Access.Alignment = (unsigned)AlignmentConstant->getZExtValue();
@@ -412,14 +409,16 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
   if (Access.Addr->isSwiftError())
     return None;
 
+  const DataLayout &DL = I->getModule()->getDataLayout();
+  Access.TypeSize = DL.getTypeStoreSizeInBits(Access.AccessTy);
   return Access;
 }
 
 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
                                               Instruction *I, Value *Addr,
                                               unsigned Alignment,
-                                              uint32_t TypeSize, bool IsWrite) {
-  auto *VTy = cast(Addr->getType()->getPointerElementType());
+                                              Type *AccessTy, bool IsWrite) {
+  auto *VTy = cast(AccessTy);
   uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
   unsigned Num = VTy->getNumElements();
   auto *Zero = ConstantInt::get(IntptrTy, 0);
@@ -468,7 +467,7 @@ void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
 
   if (Access.MaybeMask) {
     instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
-                                Access.Alignment, Access.TypeSize,
+                                Access.Alignment, Access.AccessTy,
                                 Access.IsWrite);
   } else {
     // Since the access counts will be accumulated across the entire allocation,

From 4ed7c6eec97925281f4c2a02ec7030dab750ba34 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer 
Date: Mon, 24 Jan 2022 18:46:33 +0100
Subject: [PATCH 558/946] [AMDGPU] Only match correct type for a16

Addresses are floats when a sampler is present and unsigned integers
when no sampler is present.

Therefore, only zext instructions, not sext instructions should match.

Also match integer constants that can be truncated.

Differential Revision: https://reviews.llvm.org/D118043
---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 45 ++++++---
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 99 +++++++++++++++++++
 2 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index c3a326945557e..4f1d700bcd842 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -58,24 +58,37 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
 
 // Check if a value can be converted to a 16-bit value without losing
 // precision.
-static bool canSafelyConvertTo16Bit(Value &V) {
+// The value is expected to be either a float (IsFloat = true) or an unsigned
+// integer (IsFloat = false).
+static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
   Type *VTy = V.getType();
   if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
     // The value is already 16-bit, so we don't want to convert to 16-bit again!
     return false;
   }
-  if (ConstantFP *ConstFloat = dyn_cast(&V)) {
-    // We need to check that if we cast the index down to a half, we do not lose
-    // precision.
-    APFloat FloatValue(ConstFloat->getValueAPF());
-    bool LosesInfo = true;
-    FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
-    return !LosesInfo;
+  if (IsFloat) {
+    if (ConstantFP *ConstFloat = dyn_cast(&V)) {
+      // We need to check that if we cast the index down to a half, we do not
+      // lose precision.
+      APFloat FloatValue(ConstFloat->getValueAPF());
+      bool LosesInfo = true;
+      FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
+                         &LosesInfo);
+      return !LosesInfo;
+    }
+  } else {
+    if (ConstantInt *ConstInt = dyn_cast(&V)) {
+      // We need to check that if we cast the index down to an i16, we do not
+      // lose precision.
+      APInt IntValue(ConstInt->getValue());
+      return IntValue.getActiveBits() <= 16;
+    }
   }
+
   Value *CastSrc;
-  if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) ||
-      match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) ||
-      match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) {
+  bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
+                       : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
+  if (IsExt) {
     Type *CastSrcTy = CastSrc->getType();
     if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
       return true;
@@ -203,6 +216,10 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
   if (!ST->hasA16() && !ST->hasG16())
     return None;
 
+  // Address is interpreted as float if the instruction has a sampler or as
+  // unsigned int if there is no sampler.
+  bool HasSampler =
+      AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
   bool FloatCoord = false;
   // true means derivatives can be converted to 16 bit, coordinates not
   bool OnlyDerivatives = false;
@@ -211,7 +228,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
        OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
     Value *Coord = II.getOperand(OperandIndex);
     // If the values are not derived from 16-bit values, we cannot optimize.
-    if (!canSafelyConvertTo16Bit(*Coord)) {
+    if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
       if (OperandIndex < ImageDimIntr->CoordStart ||
           ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
         return None;
@@ -232,7 +249,9 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
   // Check if there is a bias parameter and if it can be converted to f16
   if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
     Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
-    if (!canSafelyConvertTo16Bit(*Bias))
+    assert(HasSampler &&
+           "Only image instructions with a sampler can have a bias");
+    if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
       OnlyDerivatives = true;
   }
 
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 709d531b8cee6..894e0ef860646 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -3667,6 +3667,105 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspa
   ret void
 }
 
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
+; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const(
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half 0xH3400, half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %dsdh32 = fpext half %dsdh to float
+  %dtdh32 = fpext half %dtdh to float
+  %dsdv32 = fpext half %dsdv to float
+  %dtdv32 = fpext half %dtdv to float
+  %s32 = fpext half %s to float
+  %slice32 = fpext half %slice to float
+  %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 0.25, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
+; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const_noopt(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S32]], float 1.000000e+10, float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %dsdh32 = fpext half %dsdh to float
+  %dtdh32 = fpext half %dtdh to float
+  %dsdv32 = fpext half %dsdv to float
+  %dtdv32 = fpext half %dtdv to float
+  %s32 = fpext half %s to float
+  %slice32 = fpext half %slice to float
+  %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 1.0e+10, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_1d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_1d_noopt(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_1d_noopt(
+; CHECK-NEXT:    [[S32:%.*]] = sext i16 [[S:%.*]] to i32
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S32]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = sext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s, i16 %t) {
+; CHECK-LABEL: @image_load_a16_mip_2d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %t32 = zext i16 %t to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 %t32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_2d_const(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_2d_const(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 -1, <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65535, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_2d_const_noopt(
+; CHECK-NEXT:    [[S32:%.*]] = zext i16 [[S:%.*]] to i32
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S32]], i32 65536, <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65536, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.image.sample g16
 ; --------------------------------------------------------------------

From 2c8a77ab21ff3a41829a5d67e0b838cc7a9f5f21 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer 
Date: Tue, 25 Jan 2022 15:17:07 +0100
Subject: [PATCH 559/946] [mlir] Move duplicated
 BufferizableOpInterface::kBufferLayoutAttrName defs to a single place

---
 .../Bufferization/IR/BufferizableOpInterface.cpp       | 10 ++++++++++
 .../ComprehensiveBufferize/ModuleBufferization.cpp     | 10 ----------
 mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp           | 10 ----------
 3 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 9cb99db16d6a7..7d91229625cca 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -32,6 +32,16 @@ namespace bufferization {
 using namespace mlir;
 using namespace bufferization;
 
+/// Attribute name used to mark the bufferization layout for region
+/// arguments during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
+
+/// Attribute name used to mark region arguments that can be bufferized
+/// in-place during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kInplaceableAttrName;
+
 //===----------------------------------------------------------------------===//
 // BufferizationOptions
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
index e5eac1fb2765d..0fe79862a69d0 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -85,16 +85,6 @@ using namespace tensor;
 using namespace comprehensive_bufferize;
 using namespace mlir::bufferization;
 
-/// Attribute name used to mark the bufferization layout for region
-/// arguments during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
-
-/// Attribute name used to mark region arguments that can be bufferized
-/// in-place during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kInplaceableAttrName;
-
 namespace {
 /// The state of analysis of a FuncOp.
 enum class FuncOpAnalysisState { NotAnalyzed, InProgress, Analyzed };
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
index ccab0bbd6cb2c..623dd5085363d 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
@@ -62,16 +62,6 @@ struct LinalgInlinerInterface : public DialectInlinerInterface {
 constexpr const ::llvm::StringLiteral
     LinalgDialect::kMemoizedIndexingMapsAttrName;
 
-/// Attribute name used to mark the bufferization layout for region
-/// arguments during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
-
-/// Attribute name used to mark region arguments that can be bufferized
-/// in-place during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kInplaceableAttrName;
-
 /// Trait to check if T provides a `regionBuilder` method.
 template 
 using has_region_builder = decltype(T::regionBuilder);

From 98db33349bcc4d12a56313c406c1f40038258f10 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 15:20:32 +0100
Subject: [PATCH 560/946] [SLC] Fix pointer diff type in sprintf() optimization

We should always be calculating a byte-wise difference here.
Previously this calculated the pointer difference while taking
the pointer element type into account, which is incorrect.
---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp |  6 +++---
 llvm/test/Transforms/InstCombine/stpcpy-1.ll   | 14 +++++---------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 123fb0cfd1cbb..e02d02a057521 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2515,9 +2515,9 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
     } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
       // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
       // Handle mismatched pointer types (goes away with typeless pointers?).
-      V = B.CreatePointerCast(V, Dest->getType());
-      Value *PtrDiff = B.CreatePtrDiff(
-          Dest->getType()->getPointerElementType(), V, Dest);
+      V = B.CreatePointerCast(V, B.getInt8PtrTy());
+      Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy());
+      Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
       return B.CreateIntCast(PtrDiff, CI->getType(), false);
     }
 
diff --git a/llvm/test/Transforms/InstCombine/stpcpy-1.ll b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
index 5c59edc639c91..79587102af8a0 100644
--- a/llvm/test/Transforms/InstCombine/stpcpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
@@ -59,8 +59,8 @@ define i8* @test_no_simplify1() {
 
 define i8* @test_no_simplify2(i8* %dst, i8* %src) {
 ; CHECK-LABEL: @test_no_simplify2(
-; CHECK-NEXT:    %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src)
-; CHECK-NEXT:    ret i8* %ret
+; CHECK-NEXT:    [[RET:%.*]] = musttail call i8* @stpcpy(i8* [[DST:%.*]], i8* [[SRC:%.*]])
+; CHECK-NEXT:    ret i8* [[RET]]
 ;
   %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src)
   ret i8* %ret
@@ -87,13 +87,9 @@ define i32 @PR51200(i8** %p, i32* %p2) {
 ; CHECK-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[P2:%.*]] to i8*
 ; CHECK-NEXT:    [[STPCPY:%.*]] = call i8* @stpcpy(i8* [[CSTR]], i8* [[CSTR1]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i8* [[STPCPY]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i8** [[P]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw i64 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2
-; CHECK-NEXT:    [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
-; CHECK-NEXT:    ret i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i8** [[P]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %call = call i32 (i8**, i32*, ...) @sprintf(i8** %p, i32* bitcast ([3 x i8]* @percent_s to i32*), i32* %p2)
   ret i32 %call

From f3314e3747873fdf026a28742a30f372503baf32 Mon Sep 17 00:00:00 2001
From: Jim Lin 
Date: Tue, 25 Jan 2022 15:05:43 +0800
Subject: [PATCH 561/946] [clang-tidy] Pop Files only if FileChangeReason is
 ExitFile

enum FileChangeReason has four possible type EnterFile, ExitFile,
SystemHeaderPragma and RenameFile,
It should pop the back element of Files only if FileChangeReason is ExitFile.
---
 .../clang-tidy/readability/DuplicateIncludeCheck.cpp            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
index 681b8399154a7..a6e49439c8434 100644
--- a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
@@ -71,7 +71,7 @@ void DuplicateIncludeCallbacks::FileChanged(SourceLocation Loc,
                                             FileID PrevFID) {
   if (Reason == EnterFile)
     Files.emplace_back();
-  else
+  else if (Reason == ExitFile)
     Files.pop_back();
 }
 

From b0956a9acf73a9eef9cb426d00b8dee775682f35 Mon Sep 17 00:00:00 2001
From: Florian Hahn 
Date: Tue, 25 Jan 2022 14:49:17 +0000
Subject: [PATCH 562/946] [GVN] Add tests for loop load PRE through select.

---
 .../GVN/PRE/pre-loop-load-through-select.ll   | 841 ++++++++++++++++++
 1 file changed, 841 insertions(+)
 create mode 100644 llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll

diff --git a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll
new file mode 100644
index 0000000000000..58f3d64fc8f6f
--- /dev/null
+++ b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll
@@ -0,0 +1,841 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='require,loop(loop-simplifycfg),gvn' -S %s | FileCheck %s
+
+define i32 @test_pointer_phi_select_same_object(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_lcssa(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_lcssa(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %lcssa.min = phi i32* [ %min.select, %loop ]
+  %res = load i32, i32* %lcssa.min, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_different_objects(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_different_objects(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B:%.*]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_multiple_loads_1(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_multiple_loads_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.ptr, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_multiple_loads_2(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_multiple_loads_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %ptr.iv, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_load_after(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_load_after(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_2_PRE:%.*]] = load i32, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[L_2:%.*]] = phi i32 [ [[L_2_PRE]], [[ENTRY:%.*]] ], [ [[L_3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[L_3]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_3]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[L_3]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.select, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_split_edge(i32* %ptr, i32* %end, i1 %c)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_split_edge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ], [ [[START_PTR]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[MIN_SELECT:%.*]], [[LOOP]] ], [ [[PTR]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
+; CHECK:       loop.exit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[LCSSA_PHI_2:%.*]] = phi i32* [ [[END]], [[ENTRY:%.*]] ], [ [[MIN_SELECT]], [[LOOP_EXIT]] ]
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[LCSSA_PHI_2]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br i1 %c, label %exit, label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %loop.exit, label %loop
+
+loop.exit:
+  %lcssa.phi.1 = phi i32* [ %min.select, %loop ]
+  br label %exit
+
+exit:
+  %lcssa.phi.2 = phi i32* [ %end, %entry ], [ %lcssa.phi.1, %loop.exit ]
+  %res = load i32, i32* %lcssa.phi.2, align 4
+  ret i32 %res
+}
+
+
+declare void @may_throw() readonly
+
+define i32 @test_pointer_phi_select_load_may_not_execute_1(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_load_may_not_execute_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B:%.*]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @may_throw()
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  call void @may_throw()
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_load_may_not_execute_2(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_load_may_not_execute_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B:%.*]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    call void @may_throw()
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  call void @may_throw()
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_store(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  store i32 0, i32* %min.ptr
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+declare void @may_write()
+
+define i32 @test_pointer_phi_select_same_object_may_write_call(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_may_write_call(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    call void @may_write()
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  call void @may_write()
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_header_exit(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_header_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP_LATCH]] ]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop.latch ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop.latch ]
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop.latch
+
+loop.latch:
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  br label %loop.header
+
+exit:
+  %res = load i32, i32* %min.ptr, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_ptr_use_cycle(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_ptr_use_cycle(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT_PREHEADER:%.*]], label [[LOOP]]
+; CHECK:       exit.preheader:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[P:%.*]] = phi i32* [ [[P_NEXT:%.*]], [[EXIT]] ], [ [[MIN_SELECT]], [[EXIT_PREHEADER]] ]
+; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
+; CHECK-NEXT:    [[P_NEXT]] = getelementptr inbounds i32, i32* [[P]], i64 1
+; CHECK-NEXT:    br label [[EXIT]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %p = phi i32* [ %min.select, %loop ], [ %p.next, %exit ]
+  store i32 0, i32* %p
+  %p.next = getelementptr inbounds i32, i32* %p, i64 1
+  br label %exit
+}
+
+define i32 @test_pointer_phi_select_same_object_maybe_clobbered_in_exit(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_maybe_clobbered_in_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i32 0, i32* [[START_PTR]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  store i32 0, i32* %start.ptr
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_maybe_clobbered_in_exit_2(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_maybe_clobbered_in_exit_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT_1:%.*]], label [[LOOP]]
+; CHECK:       exit.1:
+; CHECK-NEXT:    store i32 0, i32* [[START_PTR]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit.1, label %loop
+
+exit.1:
+  %lcssa.min = phi i32* [ %min.select, %loop ]
+  store i32 0, i32* %start.ptr
+  br label %exit.2
+
+exit.2:
+  %res = load i32, i32* %lcssa.min, align 4
+  ret i32 %res
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @test_pointer_phi_select_same_object_invoke_in_chain(i32* %ptr, i32* %end)  personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_invoke_in_chain(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT_1:%.*]], label [[LOOP]]
+; CHECK:       exit.1:
+; CHECK-NEXT:    store i32 0, i32* [[START_PTR]], align 4
+; CHECK-NEXT:    invoke void @may_throw()
+; CHECK-NEXT:    to label [[EXIT_2:%.*]] unwind label [[CATCH_OBJECT:%.*]]
+; CHECK:       exit.2:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       catch.object:
+; CHECK-NEXT:    [[LP:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    catch i8* null
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit.1, label %loop
+
+exit.1:
+  %lcssa.min = phi i32* [ %min.select, %loop ]
+  store i32 0, i32* %start.ptr
+  invoke void @may_throw()
+  to label %exit.2 unwind label %catch.object
+
+exit.2:
+  %res = load i32, i32* %lcssa.min, align 4
+  ret i32 %res
+
+catch.object:
+  %lp = landingpad { i8*, i32 }
+  catch i8* null
+  unreachable
+}
+
+define i32 @test_pointer_phi_select_used_by_others_in_loop(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_used_by_others_in_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    [[L_2_PRE:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[L_2:%.*]] = phi i32 [ [[L_2_PRE]], [[ENTRY:%.*]] ], [ [[L_3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[L_3]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_3]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[L_3]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.select, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_used_by_others_in_loop_1(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_used_by_others_in_loop_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_2]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.ptr, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_used_by_others_in_loop_2(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_used_by_others_in_loop_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[GEP_MIN_PTR:%.*]] = getelementptr inbounds i32, i32* [[MIN_PTR]], i32 1
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_MIN_PTR]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_3]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %gep.min.ptr = getelementptr inbounds i32, i32* %min.ptr, i32 1
+  %l.3 = load i32, i32* %gep.min.ptr, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_no_iter_load(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_no_iter_load(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 10, [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 10, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}

From 9d8c3ad94fad5dd5fb511af89c9e7c3679922ce0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <1.int32@gmail.com>
Date: Tue, 25 Jan 2022 15:09:31 +0100
Subject: [PATCH 563/946] [clang-tidy] Change code of SignalHandlerCheck (NFC).

Using clang::CallGraph to get the called functions.
This makes a better foundation to improve support for
C++ and print the call chain.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D118016
---
 .../bugprone/SignalHandlerCheck.cpp           | 122 +++++++++---------
 .../clang-tidy/bugprone/SignalHandlerCheck.h  |   6 +-
 .../checkers/bugprone-signal-handler.c        |  35 +++++
 3 files changed, 98 insertions(+), 65 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp
index 63478a9540589..1fc19a8652e3e 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp
@@ -7,15 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SignalHandlerCheck.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/Analysis/CallGraph.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include 
-#include 
 
 using namespace clang::ast_matchers;
 
@@ -42,7 +36,9 @@ struct OptionEnumMapping<
 
 namespace bugprone {
 
-static bool isSystemCall(const FunctionDecl *FD) {
+namespace {
+
+bool isSystemCall(const FunctionDecl *FD) {
   // Find a possible redeclaration in system header.
   // FIXME: Looking at the canonical declaration is not the most exact way
   // to do this.
@@ -73,6 +69,22 @@ static bool isSystemCall(const FunctionDecl *FD) {
       FD->getCanonicalDecl()->getLocation());
 }
 
+/// Given a call graph node of a function and another one that is called from
+/// this function, get a CallExpr of the corresponding function call.
+/// It is unspecified which call is found if multiple calls exist, but the order
+/// should be deterministic (depend only on the AST).
+Expr *findCallExpr(const CallGraphNode *Caller, const CallGraphNode *Callee) {
+  auto FoundCallee = llvm::find_if(
+      Caller->callees(), [Callee](const CallGraphNode::CallRecord &Call) {
+        return Call.Callee == Callee;
+      });
+  assert(FoundCallee != Caller->end() &&
+         "Callee should be called from the caller function here.");
+  return FoundCallee->CallExpr;
+}
+
+} // namespace
+
 AST_MATCHER(FunctionDecl, isSystemCall) { return isSystemCall(&Node); }
 
 SignalHandlerCheck::SignalHandlerCheck(StringRef Name,
@@ -117,68 +129,50 @@ void SignalHandlerCheck::check(const MatchFinder::MatchResult &Result) {
   const auto *HandlerDecl =
       Result.Nodes.getNodeAs("handler_decl");
   const auto *HandlerExpr = Result.Nodes.getNodeAs("handler_expr");
+  assert(SignalCall && HandlerDecl && HandlerExpr &&
+         "All of these should exist in a match here.");
+
+  if (CG.size() <= 1) {
+    // Call graph must be populated with the entire TU at the beginning.
+    // (It is possible to add a single function but the functions called from it
+    // are not analysed in this case.)
+    CG.addToCallGraph(const_cast(
+        HandlerDecl->getTranslationUnitDecl()));
+    assert(CG.size() > 1 &&
+           "There should be at least one function added to call graph.");
+  }
 
-  // Visit each function encountered in the callgraph only once.
-  llvm::DenseSet SeenFunctions;
-
-  // The worklist of the callgraph visitation algorithm.
-  std::deque CalledFunctions;
-
-  auto ProcessFunction = [&](const FunctionDecl *F, const Expr *CallOrRef) {
-    // Ensure that canonical declaration is used.
-    F = F->getCanonicalDecl();
-
-    // Do not visit function if already encountered.
-    if (!SeenFunctions.insert(F).second)
-      return true;
-
-    // Check if the call is allowed.
-    // Non-system calls are not considered.
-    if (isSystemCall(F)) {
-      if (isSystemCallAllowed(F))
-        return true;
-
-      reportBug(F, CallOrRef, SignalCall, HandlerDecl);
-
-      return false;
-    }
-
-    // Get the body of the encountered non-system call function.
-    const FunctionDecl *FBody;
-    if (!F->hasBody(FBody)) {
-      reportBug(F, CallOrRef, SignalCall, HandlerDecl);
-      return false;
-    }
-
-    // Collect all called functions.
-    auto Matches = match(decl(forEachDescendant(callExpr().bind("call"))),
-                         *FBody, FBody->getASTContext());
-    for (const auto &Match : Matches) {
-      const auto *CE = Match.getNodeAs("call");
-      if (isa(CE->getCalleeDecl()))
-        CalledFunctions.push_back(CE);
-    }
-
-    return true;
-  };
-
-  if (!ProcessFunction(HandlerDecl, HandlerExpr))
+  // Check for special case when the signal handler itself is an unsafe external
+  // function.
+  if (!isFunctionAsyncSafe(HandlerDecl)) {
+    reportBug(HandlerDecl, HandlerExpr, SignalCall, HandlerDecl);
     return;
+  }
 
-  // Visit the definition of every function referenced by the handler function.
-  // Check for allowed function calls.
-  while (!CalledFunctions.empty()) {
-    const CallExpr *FunctionCall = CalledFunctions.front();
-    CalledFunctions.pop_front();
-    // At insertion we have already ensured that only function calls are there.
-    const auto *F = cast(FunctionCall->getCalleeDecl());
-
-    if (!ProcessFunction(F, FunctionCall))
-      break;
+  CallGraphNode *HandlerNode = CG.getNode(HandlerDecl);
+  // Signal handler can be external but not unsafe, no call graph in this case.
+  if (!HandlerNode)
+    return;
+  // Start from signal handler and visit every function call.
+  for (auto Itr = llvm::df_begin(HandlerNode), ItrE = llvm::df_end(HandlerNode);
+       Itr != ItrE; ++Itr) {
+    const auto *CallF = dyn_cast((*Itr)->getDecl());
+    if (CallF && !isFunctionAsyncSafe(CallF)) {
+      assert(Itr.getPathLength() >= 2);
+      reportBug(CallF, findCallExpr(Itr.getPath(Itr.getPathLength() - 2), *Itr),
+                SignalCall, HandlerDecl);
+    }
   }
 }
 
-bool SignalHandlerCheck::isSystemCallAllowed(const FunctionDecl *FD) const {
+bool SignalHandlerCheck::isFunctionAsyncSafe(const FunctionDecl *FD) const {
+  if (isSystemCall(FD))
+    return isSystemCallAsyncSafe(FD);
+  // For external (not checkable) functions assume that these are unsafe.
+  return FD->hasBody();
+}
+
+bool SignalHandlerCheck::isSystemCallAsyncSafe(const FunctionDecl *FD) const {
   const IdentifierInfo *II = FD->getIdentifier();
   // Unnamed functions are not explicitly allowed.
   if (!II)
diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h
index 51f273ffa51ec..9ac77a5cb323f 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNALHANDLERCHECK_H
 
 #include "../ClangTidyCheck.h"
+#include "clang/Analysis/CallGraph.h"
 #include "llvm/ADT/StringSet.h"
 
 namespace clang {
@@ -31,9 +32,12 @@ class SignalHandlerCheck : public ClangTidyCheck {
   void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
 
 private:
+  bool isFunctionAsyncSafe(const FunctionDecl *FD) const;
+  bool isSystemCallAsyncSafe(const FunctionDecl *FD) const;
   void reportBug(const FunctionDecl *CalledFunction, const Expr *CallOrRef,
                  const CallExpr *SignalCall, const FunctionDecl *HandlerDecl);
-  bool isSystemCallAllowed(const FunctionDecl *FD) const;
+
+  CallGraph CG;
 
   AsyncSafeFunctionSetType AsyncSafeFunctionSet;
   llvm::StringSet<> &ConformingFunctions;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c
index e0b2c24f44ed3..ac9b731cdc7a4 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c
@@ -51,6 +51,37 @@ void handler_extern(int) {
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'f_extern' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
 }
 
+void test_false_condition(int) {
+  if (0)
+    printf("1234");
+  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'printf' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+}
+
+void test_multiple_calls(int) {
+  f_extern();
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'f_extern' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  printf("1234");
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'printf' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  f_extern();
+  // first 'f_extern' call found only
+}
+
+void f_recursive();
+
+void test_recursive(int) {
+  f_recursive();
+  printf("");
+  // first 'printf' call (in other function) found only
+}
+
+void f_recursive() {
+  f_extern();
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'f_extern' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  printf("");
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'printf' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  f_recursive(2);
+}
+
 void test() {
   signal(SIGINT, handler_abort);
   signal(SIGINT, handler_signal);
@@ -66,4 +97,8 @@ void test() {
 
   signal(SIGINT, SIG_IGN);
   signal(SIGINT, SIG_DFL);
+
+  signal(SIGINT, test_false_condition);
+  signal(SIGINT, test_multiple_calls);
+  signal(SIGINT, test_recursive);
 }

From ea4b0489f5caf136aefe04869d650aa8d966041c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Tue, 25 Jan 2022 14:58:20 +0000
Subject: [PATCH 564/946] [X86][AVX] Add PR47194 shuffle test case

---
 .../CodeGen/X86/vector-shuffle-256-v32.ll     | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index d280580f55f1b..d6731f851d0bf 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -4785,6 +4785,60 @@ define <32 x i8> @shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_
   ret <32 x i8> %shuffle
 }
 
+; PR47194
+define <32 x i8> @shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-SLOW-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
+; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
+; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX2-SLOW-NEXT:    retq
+;
+; AVX2-FAST-ALL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX2-FAST-ALL:       # %bb.0:
+; AVX2-FAST-ALL-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
+; AVX2-FAST-ALL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
+; AVX2-FAST-ALL-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-FAST-ALL-NEXT:    retq
+;
+; AVX2-FAST-PERLANE-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX2-FAST-PERLANE:       # %bb.0:
+; AVX2-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,30,31,30,31,30,31,30,31,u,u,u,u,u,u,u,u]
+; AVX2-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX2-FAST-PERLANE-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
+; AVX512VL-NEXT:    retq
+;
+; XOPAVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
+; XOPAVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
+; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; XOPAVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> 
+  ret <32 x i8> %shuffle
+}
+
 define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) {
 ; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
 ; AVX1:       # %bb.0:

From a2505bd063e7949e2f0d892d85dec360fb28c702 Mon Sep 17 00:00:00 2001
From: Sean Fertile 
Date: Thu, 20 Jan 2022 13:30:42 -0500
Subject: [PATCH 565/946] [PowerPC][AIX] Override markFunctionEnd()

During fast-isel calling 'markFunctionEnd' in the base class will call
tidyLandingPads. This can cause an issue where we have determined that
we need ehinfo and emitted a traceback table with the bits set to
indicate that we will be emitting the ehinfo, but the tidying deletes
all landing pads. In this case we end up emitting a reference to
__ehinfo.N symbol, but not emitting a definition to said symbol and the
resulting file fails to assemble.

Differential Revision: https://reviews.llvm.org/D117040
---
 llvm/lib/CodeGen/AsmPrinter/AIXException.cpp |  2 +
 llvm/lib/CodeGen/AsmPrinter/DwarfException.h |  2 +
 llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll  | 50 ++++++++++++++++++++
 3 files changed, 54 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 964cef75d164f..03e63321e3c4d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -23,6 +23,8 @@ namespace llvm {
 
 AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
 
+void AIXException::markFunctionEnd() { endFragment(); }
+
 void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
                                           const MCSymbol *PerSym) {
   // Generate EH Info Table.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 40898c9fc8551..4defa8a30855e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -98,6 +98,8 @@ class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase {
 public:
   AIXException(AsmPrinter *A);
 
+  void markFunctionEnd() override;
+
   void endModule() override {}
   void beginFunction(const MachineFunction *MF) override {}
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll b/llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll
new file mode 100644
index 0000000000000..24ead04072f14
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple powerpc64-ibm-aix -verify-machineinstrs < %s | \
+; RUN:  FileCheck %s
+
+; RUN: llc -mtriple powerpc64-ibm-aix -fast-isel -verify-machineinstrs < %s | \
+; RUN:  FileCheck %s
+
+; Function Attrs: nounwind
+declare i32 @func1() #0
+
+declare i32 @__xlcxx_personality_v1(...)
+
+; Function Attrs: mustprogress noinline optnone
+define linkonce_odr void @func2() #1 align 2 personality i8* bitcast (i32 (...)* @__xlcxx_personality_v1 to i8*) {
+entry:
+  %0 = alloca i8*, align 8
+  %1 = alloca i32, align 4
+  br label %2
+
+2:                                                ; preds = %3, %entry
+  br i1 false, label %3, label %8
+
+3:                                                ; preds = %2
+  %4 = invoke i32 @func1()
+          to label %2 unwind label %lpad
+
+lpad:                                                ; preds = %3
+  %5 = landingpad { i8*, i32 }
+          cleanup
+  %6 = extractvalue { i8*, i32 } %5, 0
+  store i8* %6, i8** %0, align 8
+  %7 = extractvalue { i8*, i32 } %5, 1
+  store i32 %7, i32* %1, align 4
+  br label %eh.resume
+
+8:                                               ; preds = 2%
+  ret void
+
+eh.resume:                                               ; preds = %lpad
+  %9 = load i8*, i8** %0, align 8
+  %10 = load i32, i32* %1, align 4
+  %11 = insertvalue { i8*, i32 } undef, i8* %9, 0
+  %12 = insertvalue { i8*, i32 } %11, i32 %10, 1
+  resume { i8*, i32 } %12
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { mustprogress noinline optnone }
+
+; CHECK: __ehinfo.0:
+; CHECK: .tc __ehinfo.0[TC],__ehinfo.0

From e581841e8cf46109acea92e1acb661c404fa62b9 Mon Sep 17 00:00:00 2001
From: Tue Ly 
Date: Mon, 24 Jan 2022 21:35:25 -0500
Subject: [PATCH 566/946] [libc] Implement log10f correctly rounded for all
 rounding modes.

Based on RLIBM implementation similar to logf and log2f.  Most of the exceptional inputs are the exact powers of 10.

Reviewed By: sivachandra, zimmermann6, santoshn, jpl169

Differential Revision: https://reviews.llvm.org/D118093
---
 libc/config/linux/aarch64/entrypoints.txt     |   1 +
 libc/config/linux/x86_64/entrypoints.txt      |   1 +
 libc/spec/stdc.td                             |   2 +
 libc/src/math/CMakeLists.txt                  |   2 +
 libc/src/math/generic/CMakeLists.txt          |  14 ++
 libc/src/math/generic/log10f.cpp              | 182 ++++++++++++++++++
 libc/src/math/log10f.h                        |  18 ++
 libc/test/src/math/CMakeLists.txt             |  13 ++
 .../math/differential_testing/CMakeLists.txt  |  11 ++
 .../math/differential_testing/log10f_perf.cpp |  16 ++
 libc/test/src/math/exhaustive/CMakeLists.txt  |  17 ++
 libc/test/src/math/exhaustive/log10f_test.cpp |  55 ++++++
 libc/test/src/math/log10f_test.cpp            |  74 +++++++
 libc/utils/MPFRWrapper/MPFRUtils.cpp          |   8 +
 libc/utils/MPFRWrapper/MPFRUtils.h            |   1 +
 15 files changed, 415 insertions(+)
 create mode 100644 libc/src/math/generic/log10f.cpp
 create mode 100644 libc/src/math/log10f.h
 create mode 100644 libc/test/src/math/differential_testing/log10f_perf.cpp
 create mode 100644 libc/test/src/math/exhaustive/log10f_test.cpp
 create mode 100644 libc/test/src/math/log10f_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 89d3fabc9f815..65dacdef31222 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -145,6 +145,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
+    libc.src.math.log10f
     libc.src.math.log2f
     libc.src.math.logf
     libc.src.math.logb
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 578fa2dd30d36..1e63ed0390092 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -144,6 +144,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.llround
     libc.src.math.llroundf
     libc.src.math.llroundl
+    libc.src.math.log10f
     libc.src.math.log2f
     libc.src.math.logf
     libc.src.math.logb
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index ac8dfe425c771..5cae1ddfe0ad0 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -396,6 +396,8 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"ldexpf", RetValSpec, [ArgSpec, ArgSpec]>,
           FunctionSpec<"ldexpl", RetValSpec, [ArgSpec, ArgSpec]>,
 
+          FunctionSpec<"log10f", RetValSpec, [ArgSpec]>,
+
           FunctionSpec<"log2f", RetValSpec, [ArgSpec]>,
 
           FunctionSpec<"logf", RetValSpec, [ArgSpec]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index aa4019d63b813..0ff088e6061ea 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -116,6 +116,8 @@ add_math_entrypoint_object(ldexp)
 add_math_entrypoint_object(ldexpf)
 add_math_entrypoint_object(ldexpl)
 
+add_math_entrypoint_object(log10f)
+
 add_math_entrypoint_object(log2f)
 
 add_math_entrypoint_object(logf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c3914b8c45af3..88c29eb4ba0bb 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -666,6 +666,20 @@ add_object_library(
     -Wno-c++17-extensions
 )
 
+add_entrypoint_object(
+  log10f
+  SRCS
+    log10f.cpp
+  HDRS
+    ../log10f.h
+  DEPENDS
+    .common_constants
+    libc.src.__support.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O3
+    -Wno-c++17-extensions
+)
+
 add_entrypoint_object(
   log2f
   SRCS
diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp
new file mode 100644
index 0000000000000..c7dbbfd494c7d
--- /dev/null
+++ b/libc/src/math/generic/log10f.cpp
@@ -0,0 +1,182 @@
+//===-- Single-precision log10(x) function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log10f.h"
+#include "common_constants.h" // Lookup table for (1/f)
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvUtils.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/common.h"
+
+// This is an algorithm for log10(x) in single precision which is
+// correctly rounded for all rounding modes, based on the implementation of
+// log10(x) from the RLIBM project at:
+// https://people.cs.rutgers.edu/~sn349/rlibm
+
+// Step 1 - Range reduction:
+//   For x = 2^m * 1.mant, log(x) = m * log10(2) + log10(1.m)
+//   If x is denormal, we normalize it by multiplying x by 2^23 and subtracting
+//   m by 23.
+
+// Step 2 - Another range reduction:
+//   To compute log(1.mant), let f be the highest 8 bits including the hidden
+// bit, and d be the difference (1.mant - f), i.e. the remaining 16 bits of the
+// mantissa. Then we have the following approximation formula:
+//   log10(1.mant) = log10(f) + log10(1.mant / f)
+//                 = log10(f) + log10(1 + d/f)
+//                 ~ log10(f) + P(d/f)
+// since d/f is sufficiently small.
+//   log10(f) and 1/f are then stored in two 2^7 = 128 entries look-up tables.
+
+// Step 3 - Polynomial approximation:
+//   To compute P(d/f), we use a single degree-5 polynomial in double precision
+// which provides correct rounding for all but few exception values.
+//   For more detail about how this polynomial is obtained, please refer to the
+// papers:
+//   Lim, J. and Nagarakatte, S., "One Polynomial Approximation to Produce
+// Correctly Rounded Results of an Elementary Function for Multiple
+// Representations and Rounding Modes", Proceedings of the 49th ACM SIGPLAN
+// Symposium on Principles of Programming Languages (POPL-2022), Philadelphia,
+// USA, Jan. 16-22, 2022.
+// https://people.cs.rutgers.edu/~sn349/papers/rlibmall-popl-2022.pdf
+//   Aanjaneya, M., Lim, J., and Nagarakatte, S., "RLibm-Prog: Progressive
+// Polynomial Approximations for Fast Correctly Rounded Math Libraries",
+// Dept. of Comp. Sci., Rutgets U., Technical Report DCS-TR-758, Nov. 2021.
+// https://arxiv.org/pdf/2111.12852.pdf.
+
+namespace __llvm_libc {
+
+// Exact power of 10 in float:
+
+// Lookup table for log10(f) = log10(1 + n*2^(-7)) where n = 0..127.
+static constexpr double LOG10_F[128] = {
+    0x0.0000000000000p+0, 0x1.bafd47221ed26p-9, 0x1.b9476a4fcd10fp-8,
+    0x1.49b0851443684p-7, 0x1.b5e908eb13790p-7, 0x1.10a83a8446c78p-6,
+    0x1.45f4f5acb8be0p-6, 0x1.7adc3df3b1ff8p-6, 0x1.af5f92b00e610p-6,
+    0x1.e3806acbd058fp-6, 0x1.0ba01a8170000p-5, 0x1.25502c0fc314cp-5,
+    0x1.3ed1199a5e425p-5, 0x1.58238eeb353dap-5, 0x1.71483427d2a99p-5,
+    0x1.8a3fadeb847f4p-5, 0x1.a30a9d609efeap-5, 0x1.bba9a058dfd84p-5,
+    0x1.d41d5164facb4p-5, 0x1.ec6647eb58808p-5, 0x1.02428c1f08016p-4,
+    0x1.0e3d29d81165ep-4, 0x1.1a23445501816p-4, 0x1.25f5215eb594ap-4,
+    0x1.31b3055c47118p-4, 0x1.3d5d335c53179p-4, 0x1.48f3ed1df48fbp-4,
+    0x1.5477731973e85p-4, 0x1.5fe80488af4fdp-4, 0x1.6b45df6f3e2c9p-4,
+    0x1.769140a2526fdp-4, 0x1.81ca63d05a44ap-4, 0x1.8cf183886480dp-4,
+    0x1.9806d9414a209p-4, 0x1.a30a9d609efeap-4, 0x1.adfd07416be07p-4,
+    0x1.b8de4d3ab3d98p-4, 0x1.c3aea4a5c6effp-4, 0x1.ce6e41e463da5p-4,
+    0x1.d91d5866aa99cp-4, 0x1.e3bc1ab0e19fep-4, 0x1.ee4aba610f204p-4,
+    0x1.f8c9683468191p-4, 0x1.019c2a064b486p-3, 0x1.06cbd67a6c3b6p-3,
+    0x1.0bf3d0937c41cp-3, 0x1.11142f0811357p-3, 0x1.162d082ac9d10p-3,
+    0x1.1b3e71ec94f7bp-3, 0x1.204881dee8777p-3, 0x1.254b4d35e7d3cp-3,
+    0x1.2a46e8ca7ba2ap-3, 0x1.2f3b691c5a001p-3, 0x1.3428e2540096dp-3,
+    0x1.390f6844a0b83p-3, 0x1.3def0e6dfdf85p-3, 0x1.42c7e7fe3fc02p-3,
+    0x1.479a07d3b6411p-3, 0x1.4c65807e93338p-3, 0x1.512a644296c3dp-3,
+    0x1.55e8c518b10f8p-3, 0x1.5aa0b4b0988fap-3, 0x1.5f52447255c92p-3,
+    0x1.63fd857fc49bbp-3, 0x1.68a288b60b7fcp-3, 0x1.6d415eaf0906bp-3,
+    0x1.71da17c2b7e80p-3, 0x1.766cc40889e85p-3, 0x1.7af97358b9e04p-3,
+    0x1.7f80354d952a0p-3, 0x1.84011944bcb75p-3, 0x1.887c2e605e119p-3,
+    0x1.8cf183886480dp-3, 0x1.9161276ba2978p-3, 0x1.95cb2880f45bap-3,
+    0x1.9a2f95085a45cp-3, 0x1.9e8e7b0c0d4bep-3, 0x1.a2e7e8618c2d2p-3,
+    0x1.a73beaaaa22f4p-3, 0x1.ab8a8f56677fcp-3, 0x1.afd3e3a23b680p-3,
+    0x1.b417f49ab8807p-3, 0x1.b856cf1ca3105p-3, 0x1.bc907fd5d1c40p-3,
+    0x1.c0c5134610e26p-3, 0x1.c4f495c0002a2p-3, 0x1.c91f1369eb7cap-3,
+    0x1.cd44983e9e7bdp-3, 0x1.d165300e333f7p-3, 0x1.d580e67edc43dp-3,
+    0x1.d997c70da9b47p-3, 0x1.dda9dd0f4a329p-3, 0x1.e1b733b0c7381p-3,
+    0x1.e5bfd5f83d342p-3, 0x1.e9c3cec58f807p-3, 0x1.edc328d3184afp-3,
+    0x1.f1bdeeb654901p-3, 0x1.f5b42ae08c407p-3, 0x1.f9a5e79f76ac5p-3,
+    0x1.fd932f1ddb4d6p-3, 0x1.00be05b217844p-2, 0x1.02b0432c96ff0p-2,
+    0x1.04a054e139004p-2, 0x1.068e3fa282e3dp-2, 0x1.087a0832fa7acp-2,
+    0x1.0a63b3456c819p-2, 0x1.0c4b457d3193dp-2, 0x1.0e30c36e71a7fp-2,
+    0x1.1014319e661bdp-2, 0x1.11f594839a5bdp-2, 0x1.13d4f0862b2e1p-2,
+    0x1.15b24a0004a92p-2, 0x1.178da53d1ee01p-2, 0x1.1967067bb94b8p-2,
+    0x1.1b3e71ec94f7bp-2, 0x1.1d13ebb32d7f9p-2, 0x1.1ee777e5f0dc3p-2,
+    0x1.20b91a8e76105p-2, 0x1.2288d7a9b2b64p-2, 0x1.2456b3282f786p-2,
+    0x1.2622b0ee3b79dp-2, 0x1.27ecd4d41eb67p-2, 0x1.29b522a64b609p-2,
+    0x1.2b7b9e258e422p-2, 0x1.2d404b073e27ep-2, 0x1.2f032cf56a5bep-2,
+    0x1.30c4478f0835fp-2, 0x1.32839e681fc62p-2};
+
+INLINE_FMA
+LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
+  constexpr double LOG10_2 = 0x1.34413509f79ffp-2;
+
+  using FPBits = typename fputil::FPBits;
+  FPBits xbits(x);
+  double m = 0.0;
+
+  // Exact powers of 10 and other hard-to-round cases.
+  switch (xbits.uintval()) {
+  case 0x4120'0000U: // x = 10
+    return 1.0f;
+  case 0x42c8'0000U: // x = 100
+    return 2.0f;
+  case 0x447a'0000U: // x = 1,000
+    return 3.0f;
+  case 0x461c'4000U: // x = 10,000
+    return 4.0f;
+  case 0x47c3'5000U: // x = 100,000
+    return 5.0f;
+  case 0x4974'2400U: // x = 1,000,000
+    return 6.0f;
+  case 0x4b18'9680U: // x = 10,000,000
+    return 7.0f;
+  case 0x4cbe'bc20U: // x = 100,000,000
+    return 8.0f;
+  case 0x4e6e'6b28U: // x = 1,000,000,000
+    return 9.0f;
+  case 0x5015'02f9U: // x = 10,000,000,000
+    return 10.0f;
+  case 0x4f13'4f83U: // x = 2471461632.0
+    if (fputil::get_round() == FE_UPWARD)
+      return 0x1.2c9314p+3f;
+    break;
+  case 0x7956'ba5eU: { // x = 69683218960000541503257137270226944.0
+    int round_mode = fputil::get_round();
+    if (round_mode == FE_DOWNWARD || round_mode == FE_TOWARDZERO)
+      return 0x1.16bebap+5f;
+    break;
+  }
+  }
+
+  if (xbits.uintval() < FPBits::MIN_NORMAL ||
+      xbits.uintval() > FPBits::MAX_NORMAL) {
+    if (xbits.is_zero()) {
+      return static_cast(FPBits::neg_inf());
+    }
+    if (xbits.get_sign() && !xbits.is_nan()) {
+      return FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1));
+    }
+    if (xbits.is_inf_or_nan()) {
+      return x;
+    }
+    // Normalize denormal inputs.
+    xbits.val *= 0x1.0p23f;
+    m -= 23.0;
+  }
+
+  m += static_cast(xbits.get_exponent());
+  // Set bits to 1.m
+  xbits.set_unbiased_exponent(0x7F);
+  int f_index = xbits.get_mantissa() >> 16;
+
+  FPBits f(xbits.val);
+  f.bits &= ~0x0000'FFFF;
+
+  double d = static_cast(xbits) - static_cast(f);
+  d *= ONE_OVER_F[f_index];
+
+  double extra_factor = fputil::fma(m, LOG10_2, LOG10_F[f_index]);
+
+  double r = fputil::polyeval(d, extra_factor, 0x1.bcb7b1526e4c5p-2,
+                              -0x1.bcb7b1518a5e9p-3, 0x1.287a72a6f716p-3,
+                              -0x1.bcadb40b85565p-4, 0x1.5e0bc97f97e22p-4);
+
+  return static_cast(r);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/log10f.h b/libc/src/math/log10f.h
new file mode 100644
index 0000000000000..d544ab5a55daa
--- /dev/null
+++ b/libc/src/math/log10f.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for log10f ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LOG10F_H
+#define LLVM_LIBC_SRC_MATH_LOG10F_H
+
+namespace __llvm_libc {
+
+float log10f(float x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LOG10F_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 7c51dc00d49db..73ecef959aba0 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1211,6 +1211,19 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fputil
 )
 
+add_fp_unittest(
+  log10f_test
+  NEED_MPFR
+  SUITE
+    libc_math_unittests
+  SRCS
+    log10f_test.cpp
+  DEPENDS
+    libc.include.math
+    libc.src.math.log10f
+    libc.src.__support.FPUtil.fputil
+)
+
 add_subdirectory(generic)
 add_subdirectory(exhaustive)
 add_subdirectory(differential_testing)
diff --git a/libc/test/src/math/differential_testing/CMakeLists.txt b/libc/test/src/math/differential_testing/CMakeLists.txt
index f2ac818fbd3b3..56d4ff4edf749 100644
--- a/libc/test/src/math/differential_testing/CMakeLists.txt
+++ b/libc/test/src/math/differential_testing/CMakeLists.txt
@@ -233,6 +233,17 @@ add_diff_binary(
     -fno-builtin
 )
 
+add_diff_binary(
+  log10f_perf
+  SRCS
+    log10f_perf.cpp
+  DEPENDS
+    .single_input_single_output_diff
+    libc.src.math.log10f
+  COMPILE_OPTIONS
+    -fno-builtin
+)
+
 add_diff_binary(
   log2f_diff
   SRCS
diff --git a/libc/test/src/math/differential_testing/log10f_perf.cpp b/libc/test/src/math/differential_testing/log10f_perf.cpp
new file mode 100644
index 0000000000000..e890d0393e0af
--- /dev/null
+++ b/libc/test/src/math/differential_testing/log10f_perf.cpp
@@ -0,0 +1,16 @@
+//===-- Differential test for log10f --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SingleInputSingleOutputDiff.h"
+
+#include "src/math/log10f.h"
+
+#include 
+
+SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, __llvm_libc::log10f, ::log10f,
+                                "log10f_perf.log")
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 772276bb01e16..f089bfb112e1a 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -79,6 +79,23 @@ add_fp_unittest(
     -lpthread
 )
 
+add_fp_unittest(
+  log10f_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    log10f_test.cpp
+  DEPENDS
+    .exhaustive_test
+    libc.include.math
+    libc.src.math.log10f
+    libc.src.__support.FPUtil.fputil
+  LINK_OPTIONS
+    -lpthread
+)
+
 add_fp_unittest(
   log2f_test
   NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/exhaustive/log10f_test.cpp b/libc/test/src/math/exhaustive/log10f_test.cpp
new file mode 100644
index 0000000000000..f8739ef0de7ca
--- /dev/null
+++ b/libc/test/src/math/exhaustive/log10f_test.cpp
@@ -0,0 +1,55 @@
+//===-- Exhaustive test for log10f ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/log10f.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+
+using FPBits = __llvm_libc::fputil::FPBits;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+struct LlvmLibcLog10fExhaustiveTest : public LlvmLibcExhaustiveTest {
+  void check(uint32_t start, uint32_t stop,
+             mpfr::RoundingMode rounding) override {
+    mpfr::ForceRoundingMode r(rounding);
+    uint32_t bits = start;
+    do {
+      FPBits xbits(bits);
+      float x = float(xbits);
+      EXPECT_MPFR_MATCH(mpfr::Operation::Log10, x, __llvm_libc::log10f(x), 0.5,
+                        rounding);
+    } while (bits++ < stop);
+  }
+};
+
+// Range: All non-negative;
+static constexpr uint32_t START = 0x0000'0000U;
+static constexpr uint32_t STOP = 0x7f80'0000U;
+// Range: [1, 10];
+// static constexpr uint32_t START = 0x3f80'0000U;
+// static constexpr uint32_t STOP  = 0x41c0'0000U;
+static constexpr int NUM_THREADS = 16;
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundNearestTieToEven) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundUp) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundDown) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundTowardZero) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::TowardZero);
+}
diff --git a/libc/test/src/math/log10f_test.cpp b/libc/test/src/math/log10f_test.cpp
new file mode 100644
index 0000000000000..a43290b858894
--- /dev/null
+++ b/libc/test/src/math/log10f_test.cpp
@@ -0,0 +1,74 @@
+//===-- Unittests for log10f ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/log10f.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+#include "utils/UnitTest/Test.h"
+#include 
+
+#include 
+#include 
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+DECLARE_SPECIAL_CONSTANTS(float)
+
+TEST(LlvmLibcLog10fTest, SpecialNumbers) {
+  EXPECT_FP_EQ(aNaN, __llvm_libc::log10f(aNaN));
+  EXPECT_FP_EQ(inf, __llvm_libc::log10f(inf));
+  EXPECT_TRUE(FPBits(__llvm_libc::log10f(neg_inf)).is_nan());
+  EXPECT_FP_EQ(neg_inf, __llvm_libc::log10f(0.0f));
+  EXPECT_FP_EQ(neg_inf, __llvm_libc::log10f(-0.0f));
+  EXPECT_TRUE(FPBits(__llvm_libc::log10f(-1.0f)).is_nan());
+  EXPECT_FP_EQ(zero, __llvm_libc::log10f(1.0f));
+}
+
+TEST(LlvmLibcLog10fTest, TrickyInputs) {
+  constexpr int N = 12;
+  constexpr uint32_t INPUTS[N] = {
+      0x41200000U /*10.0f*/,
+      0x42c80000U /*100.0f*/,
+      0x447a0000U /*1,000.0f*/,
+      0x461c4000U /*10,000.0f*/,
+      0x47c35000U /*100,000.0f*/,
+      0x49742400U /*1,000,000.0f*/,
+      0x4b189680U /*10,000,000.0f*/,
+      0x4cbebc20U /*100,000,000.0f*/,
+      0x4e6e6b28U /*1,000,000,000.0f*/,
+      0x501502f9U /*10,000,000,000.0f*/,
+      0x4f134f83U /*2471461632.0f*/,
+      0x7956ba5eU /*69683218960000541503257137270226944.0f*/};
+
+  for (int i = 0; i < N; ++i) {
+    float x = float(FPBits(INPUTS[i]));
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x,
+                                   __llvm_libc::log10f(x), 0.5);
+  }
+}
+
+TEST(LlvmLibcLog10fTest, InFloatRange) {
+  constexpr uint32_t COUNT = 1000000;
+  constexpr uint32_t STEP = UINT32_MAX / COUNT;
+  for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+    float x = float(FPBits(v));
+    if (isnan(x) || isinf(x))
+      continue;
+    errno = 0;
+    float result = __llvm_libc::log10f(x);
+    // If the computation resulted in an error or did not produce valid result
+    // in the single-precision floating point range, then ignore comparing with
+    // MPFR result as MPFR can still produce valid results because of its
+    // wider precision.
+    if (isnan(result) || isinf(result) || errno != 0)
+      continue;
+    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x,
+                                   __llvm_libc::log10f(x), 0.5);
+  }
+}
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 4f293837bf4e8..c6470e23451e0 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -273,6 +273,12 @@ class MPFRNumber {
     return result;
   }
 
+  MPFRNumber log10() const {
+    MPFRNumber result(*this);
+    mpfr_log10(result.value, value, mpfr_rounding);
+    return result;
+  }
+
   MPFRNumber remquo(const MPFRNumber &divisor, int "ient) {
     MPFRNumber remainder(*this);
     long q;
@@ -502,6 +508,8 @@ unary_operation(Operation op, InputType input, unsigned int precision,
     return mpfrInput.log();
   case Operation::Log2:
     return mpfrInput.log2();
+  case Operation::Log10:
+    return mpfrInput.log10();
   case Operation::Mod2PI:
     return mpfrInput.mod_2pi();
   case Operation::ModPIOver2:
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 5094f08b9003e..2a13ab1d11608 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -32,6 +32,7 @@ enum class Operation : int {
   Floor,
   Log,
   Log2,
+  Log10,
   Mod2PI,
   ModPIOver2,
   ModPIOver4,

From 519810d63eb16ff6f367d8e9784a804805c30c44 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Tue, 25 Jan 2022 09:52:15 -0500
Subject: [PATCH 567/946] [NFC] Refine header dependencies of llvm/ADT/Any.h

---
 llvm/include/llvm/ADT/Any.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index e513586845a1e..1b4f2c2fa985f 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -15,7 +15,8 @@
 #ifndef LLVM_ADT_ANY_H
 #define LLVM_ADT_ANY_H
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/Support/Compiler.h"
 
 #include 
 #include 

From adc9a346d842a61c8ce97627d2928dd2384bee5c Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Tue, 25 Jan 2022 09:53:09 -0500
Subject: [PATCH 568/946] Always use df_iterator_default_set as default set
 type for [i]df_ext_iterator

This is consistent with other default values in this file.
---
 llvm/include/llvm/ADT/DepthFirstIterator.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h
index d4f173ca7caa8..42ac61d7cf52a 100644
--- a/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -38,7 +38,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/iterator_range.h"
 #include 
-#include 
 #include 
 #include 
 
@@ -231,7 +230,7 @@ iterator_range> depth_first(const T& G) {
 }
 
 // Provide global definitions of external depth first iterators...
-template ::NodeRef>>
+template ::NodeRef>>
 struct df_ext_iterator : public df_iterator {
   df_ext_iterator(const df_iterator &V)
     : df_iterator(V) {}
@@ -280,7 +279,7 @@ iterator_range> inverse_depth_first(const T& G) {
 }
 
 // Provide global definitions of external inverse depth first iterators...
-template ::NodeRef>>
+template ::NodeRef>>
 struct idf_ext_iterator : public idf_iterator {
   idf_ext_iterator(const idf_iterator &V)
     : idf_iterator(V) {}

From 2f02c7e1f2580c7faf9032922c5a8d3e957d0bd5 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Tue, 25 Jan 2022 17:20:26 +0100
Subject: [PATCH 569/946] [SanitizerCoverage] Avoid pointer element type access

Use the load/store type instead.
---
 llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 387ea5243265d..d3b60c7add34c 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -917,8 +917,7 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
 
 void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
     Function &, ArrayRef Loads, ArrayRef Stores) {
-  auto CallbackIdx = [&](const Value *Ptr) -> int {
-    auto *ElementTy = Ptr->getType()->getPointerElementType();
+  auto CallbackIdx = [&](Type *ElementTy) -> int {
     uint64_t TypeSize = DL->getTypeStoreSizeInBits(ElementTy);
     return TypeSize == 8     ? 0
            : TypeSize == 16  ? 1
@@ -932,7 +931,7 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
   for (auto LI : Loads) {
     IRBuilder<> IRB(LI);
     auto Ptr = LI->getPointerOperand();
-    int Idx = CallbackIdx(Ptr);
+    int Idx = CallbackIdx(LI->getType());
     if (Idx < 0)
       continue;
     IRB.CreateCall(SanCovLoadFunction[Idx],
@@ -941,7 +940,7 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
   for (auto SI : Stores) {
     IRBuilder<> IRB(SI);
     auto Ptr = SI->getPointerOperand();
-    int Idx = CallbackIdx(Ptr);
+    int Idx = CallbackIdx(SI->getValueOperand()->getType());
     if (Idx < 0)
       continue;
     IRB.CreateCall(SanCovStoreFunction[Idx],

From ef0d90f682b169b3148ff66cb1f592d78adec2f3 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Tue, 25 Jan 2022 16:23:10 +0000
Subject: [PATCH 570/946] [X86] Regenerate avx-vbroadcast.ll

Remove '' around mattr to stop update script crash and use X86 prefixes instead of X32
---
 llvm/test/CodeGen/X86/avx-vbroadcast.ll | 576 ++++++++++++------------
 1 file changed, 288 insertions(+), 288 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index 5ebe6709a2fe4..df131f030baa3 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx,+mmx | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx,+mmx | FileCheck %s --check-prefix=X64
 
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: A:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: A:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: A:
 ; X64:       ## %bb.0: ## %entry
@@ -23,20 +23,20 @@ entry:
 }
 
 define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: A2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %esi, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %edx
-; X32-NEXT:    movl 4(%ecx), %esi
-; X32-NEXT:    vbroadcastsd (%ecx), %ymm0
-; X32-NEXT:    movl %edx, (%eax)
-; X32-NEXT:    movl %esi, 4(%eax)
-; X32-NEXT:    popl %esi
-; X32-NEXT:    retl
+; X86-LABEL: A2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %esi
+; X86-NEXT:    vbroadcastsd (%ecx), %ymm0
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: A2:
 ; X64:       ## %bb.0: ## %entry
@@ -57,11 +57,11 @@ entry:
 }
 
 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: B:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: B:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: B:
 ; X64:       ## %bb.0: ## %entry
@@ -77,11 +77,11 @@ entry:
 }
 
 define <8 x i32> @B2(i32* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: B2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: B2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: B2:
 ; X64:       ## %bb.0: ## %entry
@@ -101,16 +101,16 @@ entry:
 }
 
 define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: B3:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %ecx
-; X32-NEXT:    movl %ecx, (%eax)
-; X32-NEXT:    vmovd %ecx, %xmm0
-; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: B3:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %ecx
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    vmovd %ecx, %xmm0
+; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: B3:
 ; X64:       ## %bb.0: ## %entry
@@ -135,11 +135,11 @@ entry:
 }
 
 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: C:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: C:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: C:
 ; X64:       ## %bb.0: ## %entry
@@ -155,13 +155,13 @@ entry:
 }
 
 define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: C2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastsd (%ecx), %ymm0
-; X32-NEXT:    vmovlps %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: C2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastsd (%ecx), %ymm0
+; X86-NEXT:    vmovlps %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: C2:
 ; X64:       ## %bb.0: ## %entry
@@ -179,11 +179,11 @@ entry:
 }
 
 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: D:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: D:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: D:
 ; X64:       ## %bb.0: ## %entry
@@ -199,11 +199,11 @@ entry:
 }
 
 define <8 x float> @D2(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: D2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: D2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: D2:
 ; X64:       ## %bb.0: ## %entry
@@ -223,13 +223,13 @@ entry:
 }
 
 define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: D3:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %ymm0
-; X32-NEXT:    vmovss %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: D3:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %ymm0
+; X86-NEXT:    vmovss %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: D3:
 ; X64:       ## %bb.0: ## %entry
@@ -253,11 +253,11 @@ entry:
 ;;;; 128-bit versions
 
 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: e:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: e:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: e:
 ; X64:       ## %bb.0: ## %entry
@@ -273,13 +273,13 @@ entry:
 }
 
 define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: e2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %xmm0
-; X32-NEXT:    vmovss %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: e2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %xmm0
+; X86-NEXT:    vmovss %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: e2:
 ; X64:       ## %bb.0: ## %entry
@@ -298,10 +298,10 @@ entry:
 
 ; Don't broadcast constants on pre-AVX2 hardware.
 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: _e2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
-; X32-NEXT:    retl
+; X86-LABEL: _e2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: _e2:
 ; X64:       ## %bb.0: ## %entry
@@ -317,11 +317,11 @@ entry:
 
 
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: F:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: F:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: F:
 ; X64:       ## %bb.0: ## %entry
@@ -337,15 +337,15 @@ entry:
 }
 
 define <4 x i32> @F2(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: F2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %ecx
-; X32-NEXT:    movl %ecx, (%eax)
-; X32-NEXT:    vmovd %ecx, %xmm0
-; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; X32-NEXT:    retl
+; X86-LABEL: F2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %ecx
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    vmovd %ecx, %xmm0
+; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: F2:
 ; X64:       ## %bb.0: ## %entry
@@ -367,11 +367,11 @@ entry:
 ; FIXME: Pointer adjusted broadcasts
 
 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4i32_4i32_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4i32_4i32_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4i32_4i32_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -384,11 +384,11 @@ entry:
 }
 
 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8i32_4i32_33333333:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8i32_4i32_33333333:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 12(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8i32_4i32_33333333:
 ; X64:       ## %bb.0: ## %entry
@@ -401,11 +401,11 @@ entry:
 }
 
 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8i32_8i32_55555555:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8i32_8i32_55555555:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 20(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8i32_8i32_55555555:
 ; X64:       ## %bb.0: ## %entry
@@ -418,11 +418,11 @@ entry:
 }
 
 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4f32_4f32_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4f32_4f32_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 4(%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4f32_4f32_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -435,11 +435,11 @@ entry:
 }
 
 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8f32_4f32_33333333:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8f32_4f32_33333333:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 12(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8f32_4f32_33333333:
 ; X64:       ## %bb.0: ## %entry
@@ -452,11 +452,11 @@ entry:
 }
 
 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8f32_8f32_55555555:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8f32_8f32_55555555:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 20(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8f32_8f32_55555555:
 ; X64:       ## %bb.0: ## %entry
@@ -469,11 +469,11 @@ entry:
 }
 
 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_2i64_2i64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_2i64_2i64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_2i64_2i64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -486,11 +486,11 @@ entry:
 }
 
 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4i64_2i64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4i64_2i64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 8(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4i64_2i64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -503,11 +503,11 @@ entry:
 }
 
 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4i64_4i64_2222:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4i64_4i64_2222:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 16(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4i64_4i64_2222:
 ; X64:       ## %bb.0: ## %entry
@@ -520,11 +520,11 @@ entry:
 }
 
 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_2f64_2f64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_2f64_2f64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_2f64_2f64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -537,11 +537,11 @@ entry:
 }
 
 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4f64_2f64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4f64_2f64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 8(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4f64_2f64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -554,11 +554,11 @@ entry:
 }
 
 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4f64_4f64_2222:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4f64_4f64_2222:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 16(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4f64_4f64_2222:
 ; X64:       ## %bb.0: ## %entry
@@ -573,11 +573,11 @@ entry:
 ; Unsupported vbroadcasts
 
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: G:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    retl
+; X86-LABEL: G:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: G:
 ; X64:       ## %bb.0: ## %entry
@@ -591,20 +591,20 @@ entry:
 }
 
 define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: G2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %esi, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %edx
-; X32-NEXT:    movl 4(%ecx), %esi
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    movl %edx, (%eax)
-; X32-NEXT:    movl %esi, 4(%eax)
-; X32-NEXT:    popl %esi
-; X32-NEXT:    retl
+; X86-LABEL: G2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %esi
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: G2:
 ; X64:       ## %bb.0: ## %entry
@@ -622,10 +622,10 @@ entry:
 }
 
 define <4 x i32> @H(<4 x i32> %a) {
-; X32-LABEL: H:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X32-NEXT:    retl
+; X86-LABEL: H:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: H:
 ; X64:       ## %bb.0: ## %entry
@@ -637,11 +637,11 @@ entry:
 }
 
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: I:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    retl
+; X86-LABEL: I:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: I:
 ; X64:       ## %bb.0: ## %entry
@@ -655,13 +655,13 @@ entry:
 }
 
 define <2 x double> @I2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: I2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    vmovlps %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: I2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    vmovlps %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: I2:
 ; X64:       ## %bb.0: ## %entry
@@ -677,14 +677,14 @@ entry:
 }
 
 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
-; X32-LABEL: _RR:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %xmm0
-; X32-NEXT:    movl (%eax), %eax
-; X32-NEXT:    movl %eax, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: _RR:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %xmm0
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    movl %eax, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: _RR:
 ; X64:       ## %bb.0: ## %entry
@@ -705,11 +705,11 @@ entry:
 }
 
 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
-; X32-LABEL: _RR2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: _RR2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: _RR2:
 ; X64:       ## %bb.0: ## %entry
@@ -727,11 +727,11 @@ entry:
 ; (via the insertelements).
 
 define <8 x float> @splat_concat1(float* %p) {
-; X32-LABEL: splat_concat1:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat1:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat1:
 ; X64:       ## %bb.0:
@@ -747,11 +747,11 @@ define <8 x float> @splat_concat1(float* %p) {
 }
 
 define <8 x float> @splat_concat2(float* %p) {
-; X32-LABEL: splat_concat2:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat2:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat2:
 ; X64:       ## %bb.0:
@@ -771,11 +771,11 @@ define <8 x float> @splat_concat2(float* %p) {
 }
 
 define <4 x double> @splat_concat3(double* %p) {
-; X32-LABEL: splat_concat3:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat3:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat3:
 ; X64:       ## %bb.0:
@@ -789,11 +789,11 @@ define <4 x double> @splat_concat3(double* %p) {
 }
 
 define <4 x double> @splat_concat4(double* %p) {
-; X32-LABEL: splat_concat4:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat4:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat4:
 ; X64:       ## %bb.0:
@@ -810,11 +810,11 @@ define <4 x double> @splat_concat4(double* %p) {
 
 ; PR34041
 define <4 x double> @broadcast_shuffle_1000(double* %p) {
-; X32-LABEL: broadcast_shuffle_1000:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_shuffle_1000:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_shuffle_1000:
 ; X64:       ## %bb.0:
@@ -827,11 +827,11 @@ define <4 x double> @broadcast_shuffle_1000(double* %p) {
 }
 
 define <4 x double> @broadcast_shuffle1032(double* %p) {
-; X32-LABEL: broadcast_shuffle1032:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_shuffle1032:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_shuffle1032:
 ; X64:       ## %bb.0:
@@ -845,15 +845,15 @@ define <4 x double> @broadcast_shuffle1032(double* %p) {
 }
 
 define void @broadcast_v16i32(i32* %a, <16 x i32>* %b) {
-; X32-LABEL: broadcast_v16i32:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %ymm0
-; X32-NEXT:    vmovups %ymm0, 32(%eax)
-; X32-NEXT:    vmovups %ymm0, (%eax)
-; X32-NEXT:    vzeroupper
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_v16i32:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %ymm0
+; X86-NEXT:    vmovups %ymm0, 32(%eax)
+; X86-NEXT:    vmovups %ymm0, (%eax)
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_v16i32:
 ; X64:       ## %bb.0:
@@ -874,21 +874,21 @@ define void @broadcast_v16i32(i32* %a, <16 x i32>* %b) {
 ; Broadcast scale factor for xyz vector - slp will have vectorized xy.
 ;
 define double @broadcast_scale_xyz(double* nocapture readonly, double* nocapture readonly) nounwind {
-; X32-LABEL: broadcast_scale_xyz:
-; X32:       ## %bb.0:
-; X32-NEXT:    subl $12, %esp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    vmulpd (%eax), %xmm0, %xmm1
-; X32-NEXT:    vmulsd 16(%eax), %xmm0, %xmm0
-; X32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
-; X32-NEXT:    vaddsd %xmm2, %xmm1, %xmm1
-; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
-; X32-NEXT:    vmovsd %xmm0, (%esp)
-; X32-NEXT:    fldl (%esp)
-; X32-NEXT:    addl $12, %esp
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_scale_xyz:
+; X86:       ## %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    vmulpd (%eax), %xmm0, %xmm1
+; X86-NEXT:    vmulsd 16(%eax), %xmm0, %xmm0
+; X86-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
+; X86-NEXT:    vaddsd %xmm2, %xmm1, %xmm1
+; X86-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
+; X86-NEXT:    vmovsd %xmm0, (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_scale_xyz:
 ; X64:       ## %bb.0:
@@ -919,24 +919,24 @@ define double @broadcast_scale_xyz(double* nocapture readonly, double* nocapture
 ; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
 ;
 define float @broadcast_lifetime() nounwind {
-; X32-LABEL: broadcast_lifetime:
-; X32:       ## %bb.0:
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    subl $40, %esp
-; X32-NEXT:    leal {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl %esi, (%esp)
-; X32-NEXT:    calll _gfunc
-; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X32-NEXT:    movl %esi, (%esp)
-; X32-NEXT:    calll _gfunc
-; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    vsubss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 4-byte Folded Reload
-; X32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    flds {{[0-9]+}}(%esp)
-; X32-NEXT:    addl $40, %esp
-; X32-NEXT:    popl %esi
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_lifetime:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $40, %esp
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, (%esp)
+; X86-NEXT:    calll _gfunc
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT:    movl %esi, (%esp)
+; X86-NEXT:    calll _gfunc
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vsubss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 4-byte Folded Reload
+; X86-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_lifetime:
 ; X64:       ## %bb.0:
@@ -973,14 +973,14 @@ define float @broadcast_lifetime() nounwind {
 }
 
 define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind {
-; X32-LABEL: broadcast_x86_mmx:
-; X32:       ## %bb.0: ## %bb
-; X32-NEXT:    subl $12, %esp
-; X32-NEXT:    movq %mm0, (%esp)
-; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; X32-NEXT:    addl $12, %esp
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_x86_mmx:
+; X86:       ## %bb.0: ## %bb
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movq %mm0, (%esp)
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_x86_mmx:
 ; X64:       ## %bb.0: ## %bb

From 0e5ea403e8deeb5374a9072aaa12292b9c0bed30 Mon Sep 17 00:00:00 2001
From: Ben Langmuir 
Date: Mon, 24 Jan 2022 16:05:49 -0800
Subject: [PATCH 571/946] Fix running orc-rt tests with
 LLVM_BUILD_EXTERNAL_COMPILER_RT

Add missing dependency on llvm-jitlink when building compiler-rt with
LLVM_BUILD_EXTERNAL_COMPILER_RT. Previously we would
non-deterministically fail the tests due to the missing binary.

rdar://87247681

Differential Revision: https://reviews.llvm.org/D118087
---
 clang/runtime/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/runtime/CMakeLists.txt b/clang/runtime/CMakeLists.txt
index 0388008792511..ca7e17927ee1f 100644
--- a/clang/runtime/CMakeLists.txt
+++ b/clang/runtime/CMakeLists.txt
@@ -132,7 +132,7 @@ if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/)
   if(LLVM_INCLUDE_TESTS)
     # Add binaries that compiler-rt tests depend on.
     set(COMPILER_RT_TEST_DEPENDENCIES
-      FileCheck count not llvm-nm llvm-objdump llvm-symbolizer)
+      FileCheck count not llvm-nm llvm-objdump llvm-symbolizer llvm-jitlink)
 
     # Add top-level targets for various compiler-rt test suites.
     set(COMPILER_RT_TEST_SUITES check-fuzzer check-asan check-hwasan check-asan-dynamic check-dfsan

From 64ba462b6e398bdb33464963f7d6274320f84370 Mon Sep 17 00:00:00 2001
From: Stanislav Gatev 
Date: Mon, 24 Jan 2022 16:17:22 +0000
Subject: [PATCH 572/946] [clang][dataflow] Add a transfer function for
 InitListExpr

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D118119
---
 .../clang/Analysis/FlowSensitive/Value.h      |  7 +-
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 29 +++++++
 .../Analysis/FlowSensitive/TransferTest.cpp   | 87 +++++++++++++++++++
 3 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Value.h b/clang/include/clang/Analysis/FlowSensitive/Value.h
index d1de2b64fd95a..47e4d31c832bb 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Value.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Value.h
@@ -100,15 +100,18 @@ class StructValue final : public Value {
     return Val->getKind() == Kind::Struct;
   }
 
-  /// Returns the child value for `D`.
+  /// Returns the child value that is assigned for `D`.
   Value &getChild(const ValueDecl &D) const {
     auto It = Children.find(&D);
     assert(It != Children.end());
     return *It->second;
   }
 
+  /// Assigns `Val` as the child value for `D`.
+  void setChild(const ValueDecl &D, Value &Val) { Children[&D] = &Val; }
+
 private:
-  const llvm::DenseMap Children;
+  llvm::DenseMap Children;
 };
 
 } // namespace dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 32a05333923f5..7c5e063278d78 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -22,9 +22,11 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
 #include 
 #include 
+#include 
 
 namespace clang {
 namespace dataflow {
@@ -414,6 +416,33 @@ class TransferVisitor : public ConstStmtVisitor {
       Env.setValue(Loc, *Val);
   }
 
+  void VisitInitListExpr(const InitListExpr *S) {
+    QualType Type = S->getType();
+
+    auto &Loc = Env.createStorageLocation(*S);
+    Env.setStorageLocation(*S, Loc);
+
+    auto *Val = Env.createValue(Type);
+    if (Val == nullptr)
+      return;
+
+    Env.setValue(Loc, *Val);
+
+    if (Type->isStructureOrClassType()) {
+      for (auto IT : llvm::zip(Type->getAsRecordDecl()->fields(), S->inits())) {
+        const FieldDecl *Field = std::get<0>(IT);
+        assert(Field != nullptr);
+
+        const Expr *Init = std::get<1>(IT);
+        assert(Init != nullptr);
+
+        if (Value *InitVal = Env.getValue(*Init, SkipPast::None))
+          cast(Val)->setChild(*Field, *InitVal);
+      }
+    }
+    // FIXME: Implement array initialization.
+  }
+
   // FIXME: Add support for:
   // - CXXBoolLiteralExpr
 
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index c1eaf281ddc49..cd3e58207680a 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1865,4 +1865,91 @@ TEST_F(TransferTest, VarDeclInDoWhile) {
               });
 }
 
+TEST_F(TransferTest, AggregateInitialization) {
+  std::string BracesCode = R"(
+    struct A {
+      int Foo;
+    };
+
+    struct B {
+      int Bar;
+      A Baz;
+      int Qux;
+    };
+
+    void target(int BarArg, int FooArg, int QuxArg) {
+      B Quux{BarArg, {FooArg}, QuxArg};
+      /*[[p]]*/
+    }
+  )";
+  std::string BraceEllisionCode = R"(
+    struct A {
+      int Foo;
+    };
+
+    struct B {
+      int Bar;
+      A Baz;
+      int Qux;
+    };
+
+    void target(int BarArg, int FooArg, int QuxArg) {
+      B Quux = {BarArg, FooArg, QuxArg};
+      /*[[p]]*/
+    }
+  )";
+  for (const std::string &Code : {BracesCode, BraceEllisionCode}) {
+    runDataflow(
+        Code, [](llvm::ArrayRef<
+                     std::pair>>
+                     Results,
+                 ASTContext &ASTCtx) {
+          ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
+          const Environment &Env = Results[0].second.Env;
+
+          const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+          ASSERT_THAT(FooDecl, NotNull());
+
+          const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
+          ASSERT_THAT(BarDecl, NotNull());
+
+          const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz");
+          ASSERT_THAT(BazDecl, NotNull());
+
+          const ValueDecl *QuxDecl = findValueDecl(ASTCtx, "Qux");
+          ASSERT_THAT(QuxDecl, NotNull());
+
+          const ValueDecl *FooArgDecl = findValueDecl(ASTCtx, "FooArg");
+          ASSERT_THAT(FooArgDecl, NotNull());
+
+          const ValueDecl *BarArgDecl = findValueDecl(ASTCtx, "BarArg");
+          ASSERT_THAT(BarArgDecl, NotNull());
+
+          const ValueDecl *QuxArgDecl = findValueDecl(ASTCtx, "QuxArg");
+          ASSERT_THAT(QuxArgDecl, NotNull());
+
+          const ValueDecl *QuuxDecl = findValueDecl(ASTCtx, "Quux");
+          ASSERT_THAT(QuuxDecl, NotNull());
+
+          const auto *FooArgVal =
+              cast(Env.getValue(*FooArgDecl, SkipPast::None));
+          const auto *BarArgVal =
+              cast(Env.getValue(*BarArgDecl, SkipPast::None));
+          const auto *QuxArgVal =
+              cast(Env.getValue(*QuxArgDecl, SkipPast::None));
+
+          const auto *QuuxVal =
+              cast(Env.getValue(*QuuxDecl, SkipPast::None));
+          ASSERT_THAT(QuuxVal, NotNull());
+
+          const auto *BazVal = cast(&QuuxVal->getChild(*BazDecl));
+          ASSERT_THAT(BazVal, NotNull());
+
+          EXPECT_EQ(&QuuxVal->getChild(*BarDecl), BarArgVal);
+          EXPECT_EQ(&BazVal->getChild(*FooDecl), FooArgVal);
+          EXPECT_EQ(&QuuxVal->getChild(*QuxDecl), QuxArgVal);
+        });
+  }
+}
+
 } // namespace

From ce368e1aa51f3d9f0a5f6ff0be3c02a9cf1e2d2e Mon Sep 17 00:00:00 2001
From: Alex Brachet 
Date: Tue, 25 Jan 2022 16:37:43 +0000
Subject: [PATCH 573/946] [libc][NFC] Workaround clang assertion in inline asm

The clobber list "cc" is added to inline assembly to workaround a clang assertion that triggers when building with a clang built with assertions enabled. See bug [53391](https://github.com/llvm/llvm-project/issues/53391).

See https://godbolt.org/z/z3bc6a9PM showing functionally same output assembly.

Reviewed By: sivachandra, lntue

Differential Revision: https://reviews.llvm.org/D118099
---
 libc/src/math/x86_64/cos.cpp | 2 +-
 libc/src/math/x86_64/sin.cpp | 2 +-
 libc/src/math/x86_64/tan.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/math/x86_64/cos.cpp b/libc/src/math/x86_64/cos.cpp
index 9e7e7227c6b1d..3b785a2f78cdf 100644
--- a/libc/src/math/x86_64/cos.cpp
+++ b/libc/src/math/x86_64/cos.cpp
@@ -13,7 +13,7 @@ namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(double, cos, (double x)) {
   double result;
-  __asm__ __volatile__("fcos" : "=t"(result) : "f"(x));
+  __asm__ __volatile__("fcos" : "=t"(result) : "f"(x) : "cc");
   return result;
 }
 
diff --git a/libc/src/math/x86_64/sin.cpp b/libc/src/math/x86_64/sin.cpp
index 22774f278956e..e94aa1a3f0925 100644
--- a/libc/src/math/x86_64/sin.cpp
+++ b/libc/src/math/x86_64/sin.cpp
@@ -13,7 +13,7 @@ namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(double, sin, (double x)) {
   double result;
-  __asm__ __volatile__("fsin" : "=t"(result) : "f"(x));
+  __asm__ __volatile__("fsin" : "=t"(result) : "f"(x) : "cc");
   return result;
 }
 
diff --git a/libc/src/math/x86_64/tan.cpp b/libc/src/math/x86_64/tan.cpp
index 9146473741f95..0503af7a16dde 100644
--- a/libc/src/math/x86_64/tan.cpp
+++ b/libc/src/math/x86_64/tan.cpp
@@ -16,7 +16,7 @@ LLVM_LIBC_FUNCTION(double, tan, (double x)) {
   double one;
   // The fptan instruction pushes the number 1 on to the FP stack after
   // computing tan. So, we read out the one before popping the actual result.
-  __asm__ __volatile__("fptan" : "=t"(one) : "f"(x));
+  __asm__ __volatile__("fptan" : "=t"(one) : "f"(x) : "cc");
   __asm__ __volatile__("fstpl %0" : "=m"(result));
   return result;
 }

From a22d870a4e96eb6b7c481fb119f283ba56f735fa Mon Sep 17 00:00:00 2001
From: Adrian Prantl 
Date: Tue, 25 Jan 2022 09:08:13 -0800
Subject: [PATCH 574/946] Add missing include diagnosed by moduels build.

---
 llvm/include/llvm/Support/FormatVariadicDetails.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h
index 08f8fc61f69bd..2cafc120c1d73 100644
--- a/llvm/include/llvm/Support/FormatVariadicDetails.h
+++ b/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -10,6 +10,7 @@
 #define LLVM_SUPPORT_FORMATVARIADICDETAILS_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include 

From f23d57a63266354902f36b79f2ad8557e201b3bc Mon Sep 17 00:00:00 2001
From: Leonard Grey 
Date: Mon, 24 Jan 2022 16:51:51 -0500
Subject: [PATCH 575/946] [lld-macho] Rename CallGraphSort.{h,cpp} to
 SectionPriorities

This is in preparation for moving the code that parses and processes
order files into this file.

See https://reviews.llvm.org/D117354 for context and discussion.
---
 lld/MachO/CMakeLists.txt                               | 2 +-
 lld/MachO/{CallGraphSort.cpp => SectionPriorities.cpp} | 4 ++--
 lld/MachO/{CallGraphSort.h => SectionPriorities.h}     | 6 +++---
 lld/MachO/Writer.cpp                                   | 2 +-
 llvm/utils/gn/secondary/lld/MachO/BUILD.gn             | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)
 rename lld/MachO/{CallGraphSort.cpp => SectionPriorities.cpp} (98%)
 rename lld/MachO/{CallGraphSort.h => SectionPriorities.h} (78%)

diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 61f5c70005b9f..4bd0816bca66f 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -10,7 +10,6 @@ add_lld_library(lldMachO
   Arch/ARM64Common.cpp
   Arch/ARM64_32.cpp
   Arch/X86_64.cpp
-  CallGraphSort.cpp
   ConcatOutputSection.cpp
   Driver.cpp
   DriverUtils.cpp
@@ -26,6 +25,7 @@ add_lld_library(lldMachO
   OutputSection.cpp
   OutputSegment.cpp
   Relocations.cpp
+  SectionPriorities.cpp
   SymbolTable.cpp
   Symbols.cpp
   SyntheticSections.cpp
diff --git a/lld/MachO/CallGraphSort.cpp b/lld/MachO/SectionPriorities.cpp
similarity index 98%
rename from lld/MachO/CallGraphSort.cpp
rename to lld/MachO/SectionPriorities.cpp
index 7a0192ea691e0..5e63ceb34d110 100644
--- a/lld/MachO/CallGraphSort.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -1,4 +1,4 @@
-//===- CallGraphSort.cpp --------------------------------------------------===//
+//===- SectionPriorities.cpp ----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -11,7 +11,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "CallGraphSort.h"
+#include "SectionPriorities.h"
 #include "Config.h"
 #include "InputFiles.h"
 #include "Symbols.h"
diff --git a/lld/MachO/CallGraphSort.h b/lld/MachO/SectionPriorities.h
similarity index 78%
rename from lld/MachO/CallGraphSort.h
rename to lld/MachO/SectionPriorities.h
index 034f54a260899..f510d315e2c45 100644
--- a/lld/MachO/CallGraphSort.h
+++ b/lld/MachO/SectionPriorities.h
@@ -1,4 +1,4 @@
-//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
+//===- SectionPriorities.h --------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLD_MACHO_CALL_GRAPH_SORT_H
-#define LLD_MACHO_CALL_GRAPH_SORT_H
+#ifndef LLD_MACHO_SECTION_PRIORITIES_H
+#define LLD_MACHO_SECTION_PRIORITIES_H
 
 #include "InputSection.h"
 #include "llvm/ADT/DenseMap.h"
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c76dc691346e6..1fafda99e39d2 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Writer.h"
-#include "CallGraphSort.h"
 #include "ConcatOutputSection.h"
 #include "Config.h"
 #include "InputFiles.h"
@@ -15,6 +14,7 @@
 #include "MapFile.h"
 #include "OutputSection.h"
 #include "OutputSegment.h"
+#include "SectionPriorities.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 4d8a050dcd8c6..6b457a97f9981 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -27,7 +27,6 @@ static_library("MachO") {
     "Arch/ARM64Common.cpp",
     "Arch/ARM64_32.cpp",
     "Arch/X86_64.cpp",
-    "CallGraphSort.cpp",
     "ConcatOutputSection.cpp",
     "Driver.cpp",
     "DriverUtils.cpp",
@@ -43,6 +42,7 @@ static_library("MachO") {
     "OutputSection.cpp",
     "OutputSegment.cpp",
     "Relocations.cpp",
+    "SectionPriorities.cpp",
     "SymbolTable.cpp",
     "Symbols.cpp",
     "SyntheticSections.cpp",

From a5c9d717807f2801b35f5d0e9501d6398efff42d Mon Sep 17 00:00:00 2001
From: Leonard Grey 
Date: Mon, 24 Jan 2022 17:27:56 -0500
Subject: [PATCH 576/946] [lld-macho] Move order file and call graph sorting
 into SectionPriorities

See https://reviews.llvm.org/D117354 for context and discussion.
---
 lld/MachO/Driver.cpp            |  74 +-----------------
 lld/MachO/SectionPriorities.cpp | 129 +++++++++++++++++++++++++++++++-
 lld/MachO/SectionPriorities.h   |  35 ++++++++-
 lld/MachO/Writer.cpp            |  48 ------------
 4 files changed, 163 insertions(+), 123 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 50f5c96c61f35..e4c9f4dd6024a 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -15,6 +15,7 @@
 #include "ObjC.h"
 #include "OutputSection.h"
 #include "OutputSegment.h"
+#include "SectionPriorities.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
@@ -460,60 +461,6 @@ static void addFileList(StringRef path, bool isLazy) {
 // entry (the one nearest to the front of the list.)
 //
 // The file can also have line comments that start with '#'.
-static void parseOrderFile(StringRef path) {
-  Optional buffer = readFile(path);
-  if (!buffer) {
-    error("Could not read order file at " + path);
-    return;
-  }
-
-  MemoryBufferRef mbref = *buffer;
-  size_t priority = std::numeric_limits::max();
-  for (StringRef line : args::getLines(mbref)) {
-    StringRef objectFile, symbol;
-    line = line.take_until([](char c) { return c == '#'; }); // ignore comments
-    line = line.ltrim();
-
-    CPUType cpuType = StringSwitch(line)
-                          .StartsWith("i386:", CPU_TYPE_I386)
-                          .StartsWith("x86_64:", CPU_TYPE_X86_64)
-                          .StartsWith("arm:", CPU_TYPE_ARM)
-                          .StartsWith("arm64:", CPU_TYPE_ARM64)
-                          .StartsWith("ppc:", CPU_TYPE_POWERPC)
-                          .StartsWith("ppc64:", CPU_TYPE_POWERPC64)
-                          .Default(CPU_TYPE_ANY);
-
-    if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)
-      continue;
-
-    // Drop the CPU type as well as the colon
-    if (cpuType != CPU_TYPE_ANY)
-      line = line.drop_until([](char c) { return c == ':'; }).drop_front();
-
-    constexpr std::array fileEnds = {".o:", ".o):"};
-    for (StringRef fileEnd : fileEnds) {
-      size_t pos = line.find(fileEnd);
-      if (pos != StringRef::npos) {
-        // Split the string around the colon
-        objectFile = line.take_front(pos + fileEnd.size() - 1);
-        line = line.drop_front(pos + fileEnd.size());
-        break;
-      }
-    }
-    symbol = line.trim();
-
-    if (!symbol.empty()) {
-      SymbolPriorityEntry &entry = config->priorities[symbol];
-      if (!objectFile.empty())
-        entry.objectFiles.insert(std::make_pair(objectFile, priority));
-      else
-        entry.anyObjectFile = std::max(entry.anyObjectFile, priority);
-    }
-
-    --priority;
-  }
-}
-
 // We expect sub-library names of the form "libfoo", which will match a dylib
 // with a path of .*/libfoo.{dylib, tbd}.
 // XXX ld64 seems to ignore the extension entirely when matching sub-libraries;
@@ -1081,25 +1028,6 @@ static void gatherInputSections() {
   assert(inputOrder <= UnspecifiedInputOrder);
 }
 
-static void extractCallGraphProfile() {
-  TimeTraceScope timeScope("Extract call graph profile");
-  for (const InputFile *file : inputFiles) {
-    auto *obj = dyn_cast_or_null(file);
-    if (!obj)
-      continue;
-    for (const CallGraphEntry &entry : obj->callGraph) {
-      assert(entry.fromIndex < obj->symbols.size() &&
-             entry.toIndex < obj->symbols.size());
-      auto *fromSym = dyn_cast_or_null(obj->symbols[entry.fromIndex]);
-      auto *toSym = dyn_cast_or_null(obj->symbols[entry.toIndex]);
-
-      if (!fromSym || !toSym)
-        continue;
-      config->callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
-    }
-  }
-}
-
 static void foldIdenticalLiterals() {
   // We always create a cStringSection, regardless of whether dedupLiterals is
   // true. If it isn't, we simply create a non-deduplicating CStringSection.
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 5e63ceb34d110..35510d7338e89 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -16,14 +16,20 @@
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "Target.h"
+#include "lld/Common/Args.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include 
 
 using namespace llvm;
+using namespace llvm::MachO;
+using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
@@ -241,12 +247,133 @@ DenseMap CallGraphSort::run() {
   return orderMap;
 }
 
+static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
+                                const InputFile *f) {
+  // We don't use toString(InputFile *) here because it returns the full path
+  // for object files, and we only want the basename.
+  StringRef filename;
+  if (f->archiveName.empty())
+    filename = path::filename(f->getName());
+  else
+    filename = saver().save(path::filename(f->archiveName) + "(" +
+                            path::filename(f->getName()) + ")");
+  return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
+}
+
+void macho::extractCallGraphProfile() {
+  TimeTraceScope timeScope("Extract call graph profile");
+  for (const InputFile *file : inputFiles) {
+    auto *obj = dyn_cast_or_null(file);
+    if (!obj)
+      continue;
+    for (const CallGraphEntry &entry : obj->callGraph) {
+      assert(entry.fromIndex < obj->symbols.size() &&
+             entry.toIndex < obj->symbols.size());
+      auto *fromSym = dyn_cast_or_null(obj->symbols[entry.fromIndex]);
+      auto *toSym = dyn_cast_or_null(obj->symbols[entry.toIndex]);
+
+      if (!fromSym || !toSym)
+        continue;
+      config->callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
+    }
+  }
+}
+
+void macho::parseOrderFile(StringRef path) {
+  Optional buffer = readFile(path);
+  if (!buffer) {
+    error("Could not read order file at " + path);
+    return;
+  }
+
+  MemoryBufferRef mbref = *buffer;
+  size_t priority = std::numeric_limits::max();
+  for (StringRef line : args::getLines(mbref)) {
+    StringRef objectFile, symbol;
+    line = line.take_until([](char c) { return c == '#'; }); // ignore comments
+    line = line.ltrim();
+
+    CPUType cpuType = StringSwitch(line)
+                          .StartsWith("i386:", CPU_TYPE_I386)
+                          .StartsWith("x86_64:", CPU_TYPE_X86_64)
+                          .StartsWith("arm:", CPU_TYPE_ARM)
+                          .StartsWith("arm64:", CPU_TYPE_ARM64)
+                          .StartsWith("ppc:", CPU_TYPE_POWERPC)
+                          .StartsWith("ppc64:", CPU_TYPE_POWERPC64)
+                          .Default(CPU_TYPE_ANY);
+
+    if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)
+      continue;
+
+    // Drop the CPU type as well as the colon
+    if (cpuType != CPU_TYPE_ANY)
+      line = line.drop_until([](char c) { return c == ':'; }).drop_front();
+
+    constexpr std::array fileEnds = {".o:", ".o):"};
+    for (StringRef fileEnd : fileEnds) {
+      size_t pos = line.find(fileEnd);
+      if (pos != StringRef::npos) {
+        // Split the string around the colon
+        objectFile = line.take_front(pos + fileEnd.size() - 1);
+        line = line.drop_front(pos + fileEnd.size());
+        break;
+      }
+    }
+    symbol = line.trim();
+
+    if (!symbol.empty()) {
+      SymbolPriorityEntry &entry = config->priorities[symbol];
+      if (!objectFile.empty())
+        entry.objectFiles.insert(std::make_pair(objectFile, priority));
+      else
+        entry.anyObjectFile = std::max(entry.anyObjectFile, priority);
+    }
+
+    --priority;
+  }
+}
+
 // Sort sections by the profile data provided by __LLVM,__cg_profile sections.
 //
 // This first builds a call graph based on the profile data then merges sections
 // according to the C³ heuristic. All clusters are then sorted by a density
 // metric to further improve locality.
-DenseMap macho::computeCallGraphProfileOrder() {
+static DenseMap computeCallGraphProfileOrder() {
   TimeTraceScope timeScope("Call graph profile sort");
   return CallGraphSort().run();
 }
+
+// Each section gets assigned the priority of the highest-priority symbol it
+// contains.
+DenseMap macho::buildInputSectionPriorities() {
+  if (config->callGraphProfileSort)
+    return computeCallGraphProfileOrder();
+  DenseMap sectionPriorities;
+
+  if (config->priorities.empty())
+    return sectionPriorities;
+
+  auto addSym = [&](Defined &sym) {
+    if (sym.isAbsolute())
+      return;
+
+    auto it = config->priorities.find(sym.getName());
+    if (it == config->priorities.end())
+      return;
+
+    SymbolPriorityEntry &entry = it->second;
+    size_t &priority = sectionPriorities[sym.isec];
+    priority =
+        std::max(priority, getSymbolPriority(entry, sym.isec->getFile()));
+  };
+
+  // TODO: Make sure this handles weak symbols correctly.
+  for (const InputFile *file : inputFiles) {
+    if (isa(file))
+      for (Symbol *sym : file->symbols)
+        if (auto *d = dyn_cast_or_null(sym))
+          addSym(*d);
+  }
+
+  return sectionPriorities;
+}
diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h
index f510d315e2c45..9cc4eff958cd7 100644
--- a/lld/MachO/SectionPriorities.h
+++ b/lld/MachO/SectionPriorities.h
@@ -15,7 +15,40 @@
 namespace lld {
 namespace macho {
 
-llvm::DenseMap computeCallGraphProfileOrder();
+// Reads every input section's call graph profile, and combines them into
+// config->callGraphProfile. If an order file is present, any edges where one
+// or both of the vertices are specified in the order file are discarded.
+void extractCallGraphProfile();
+
+// Reads the order file at `path` into config->priorities.
+//
+// An order file has one entry per line, in the following format:
+//
+//   ::
+//
+//  and  are optional. If not specified, then that entry
+// matches any symbol of that name. Parsing this format is not quite
+// straightforward because the symbol name itself can contain colons, so when
+// encountering a colon, we consider the preceding characters to decide if it
+// can be a valid CPU type or file path.
+//
+// If a symbol is matched by multiple entries, then it takes the lowest-ordered
+// entry (the one nearest to the front of the list.)
+//
+// The file can also have line comments that start with '#'.
+void parseOrderFile(StringRef path);
+
+// Returns layout priorities for some or all input sections. Sections are laid
+// out in decreasing order; that is, a higher priority section will be closer
+// to the beginning of its output section.
+//
+// If either an order file or a call graph profile are present, this is used
+// as the source of priorities. If both are present, the order file takes
+// precedence. If neither is present, an empty map is returned.
+//
+// Each section gets assigned the priority of the highest-priority symbol it
+// contains.
+llvm::DenseMap buildInputSectionPriorities();
 } // namespace macho
 } // namespace lld
 
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 1fafda99e39d2..2c0794e08ae3e 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -849,54 +849,6 @@ template  void Writer::createLoadCommands() {
                               : 0));
 }
 
-static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
-                                const InputFile *f) {
-  // We don't use toString(InputFile *) here because it returns the full path
-  // for object files, and we only want the basename.
-  StringRef filename;
-  if (f->archiveName.empty())
-    filename = path::filename(f->getName());
-  else
-    filename = saver().save(path::filename(f->archiveName) + "(" +
-                            path::filename(f->getName()) + ")");
-  return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
-}
-
-// Each section gets assigned the priority of the highest-priority symbol it
-// contains.
-static DenseMap buildInputSectionPriorities() {
-  if (config->callGraphProfileSort)
-    return computeCallGraphProfileOrder();
-  DenseMap sectionPriorities;
-
-  if (config->priorities.empty())
-    return sectionPriorities;
-
-  auto addSym = [&](Defined &sym) {
-    if (sym.isAbsolute())
-      return;
-
-    auto it = config->priorities.find(sym.getName());
-    if (it == config->priorities.end())
-      return;
-
-    SymbolPriorityEntry &entry = it->second;
-    size_t &priority = sectionPriorities[sym.isec];
-    priority =
-        std::max(priority, getSymbolPriority(entry, sym.isec->getFile()));
-  };
-
-  // TODO: Make sure this handles weak symbols correctly.
-  for (const InputFile *file : inputFiles) {
-    if (isa(file))
-      for (Symbol *sym : file->symbols)
-        if (auto *d = dyn_cast_or_null(sym))
-          addSym(*d);
-  }
-
-  return sectionPriorities;
-}
-
 // Sorting only can happen once all outputs have been collected. Here we sort
 // segments, output sections within each segment, and input sections within each
 // output segment.

From bd1fac2fafd7a1afacce05cd53a3741a2214f5f1 Mon Sep 17 00:00:00 2001
From: Ashley Hedberg 
Date: Tue, 25 Jan 2022 15:42:49 +0000
Subject: [PATCH 577/946] Add assert on End iteration distance to
 Rewriter::getRewrittenText.

I currently have code that is crashing in the second std::advance call,
and it was not straightforward to identify the problem, as the first line
of the stacktrace is in RopePieceBTreeIterator::operator++:

```

*** SIGILL; stack trace: ***
PC: clang/include/clang/Rewrite/Core/RewriteRope.h:119 clang::RopePieceBTreeIterator::operator++()
    ../include/c++/v1/__iterator/advance.h:35 std::__u::__advance<>()
    ../include/c++/v1/__iterator/advance.h:65 std::__u::advance<>()
    clang/lib/Rewrite/Rewriter.cpp:228 clang::Rewriter::getRewrittenText()
    clang/include/clang/Rewrite/Core/Rewriter.h:106 clang::Rewriter::getRewrittenText()
```

Adding an assertion produces a friendlier error message for the caller.

Reviewed By: gribozavr2

Differential Revision: https://reviews.llvm.org/D117579
---
 clang/lib/Rewrite/Rewriter.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Rewrite/Rewriter.cpp b/clang/lib/Rewrite/Rewriter.cpp
index 3b06afc76e16e..8950bfb7c4dcc 100644
--- a/clang/lib/Rewrite/Rewriter.cpp
+++ b/clang/lib/Rewrite/Rewriter.cpp
@@ -223,6 +223,7 @@ std::string Rewriter::getRewrittenText(CharSourceRange Range) const {
   RewriteBuffer::iterator Start = RB.begin();
   std::advance(Start, StartOff);
   RewriteBuffer::iterator End = Start;
+  assert(EndOff >= StartOff && "Invalid iteration distance");
   std::advance(End, EndOff-StartOff);
 
   return std::string(Start, End);

From dcc3e728ca018de785991d4ecb9efe4f6a18ca75 Mon Sep 17 00:00:00 2001
From: Andrew Litteken 
Date: Wed, 22 Dec 2021 15:37:48 -0600
Subject: [PATCH 578/946] [IROutliner] Allowing Phi Nodes in exit blocks

In addition to having multiple exit locations, there can be multiple blocks leading to the same exit location, which results in a potential phi node. If we find that multiple blocks within the region branch to the same block outside the region, resulting in a phi node, the code extractor pulls this phi node into the function and uses it as an output.

We make sure that this phi node is given an output slot, and that the two values are removed from the outputs if they are not used anywhere else outside of the region. Across the extracted regions, the phi nodes are combined into a single block for each potential output block, similar to the previous patch.

Reviewers: paquette

Differential Revision: https://reviews.llvm.org/D106995
---
 llvm/include/llvm/Transforms/IPO/IROutliner.h |  13 +-
 llvm/lib/Transforms/IPO/IROutliner.cpp        | 643 ++++++++++++++++--
 .../IROutliner/gvn-output-set-overload.ll     | 122 ++++
 ...matched-phi-exits-not-in-first-outlined.ll |  85 +++
 .../IROutliner/mismatched-phi-exits.ll        |  85 +++
 .../mismatched-phi-outputs-ordering.ll        | 150 ++++
 .../outlining-branches-phi-nodes.ll           | 165 +++++
 .../IROutliner/outlining-exits-to-phi-node.ll |  48 +-
 .../IROutliner/phi-nodes-output-overload.ll   | 112 +++
 .../IROutliner/region-inputs-in-phi-nodes.ll  | 104 +++
 10 files changed, 1456 insertions(+), 71 deletions(-)
 create mode 100644 llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll
 create mode 100644 llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll
 create mode 100644 llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll
 create mode 100644 llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
 create mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll
 create mode 100644 llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll

diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 110c0b4dcf16b..dcae4454f8281 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -95,6 +95,10 @@ struct OutlinableRegion {
   /// required for the following basic blocks in this case.
   bool EndsInBranch = false;
 
+  /// The PHIBlocks with their corresponding return block based on the return
+  /// value as the key.
+  DenseMap PHIBlocks;
+
   /// Mapping of the argument number in the deduplicated function
   /// to a given constant, which is used when creating the arguments to the call
   /// to the newly created deduplicated function.  This is handled separately
@@ -182,7 +186,14 @@ class IROutliner {
   IROutliner(function_ref GTTI,
              function_ref GIRSI,
              function_ref GORE)
-      : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
+      : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {
+    
+    // Check that the DenseMap implementation has not changed.
+    assert(DenseMapInfo::getEmptyKey() == (unsigned)-1 &&
+           "DenseMapInfo's empty key isn't -1!");
+    assert(DenseMapInfo::getTombstoneKey() == (unsigned)-2 &&
+           "DenseMapInfo's tombstone key isn't -2!");
+  }
   bool run(Module &M);
 
 private:
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 928b12013a545..c3104111864a4 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -106,6 +106,16 @@ struct OutlinableGroup {
   /// of the region.
   unsigned BranchesToOutside = 0;
 
+  /// Tracker counting backwards from the highest unsigned value possible to
+  /// avoid conflicting with the GVNs of assigned values.  We start at -3 since
+  /// -2 and -1 are assigned by the DenseMap.
+  unsigned PHINodeGVNTracker = -3;
+
+  DenseMap, SmallVector>>
+      PHINodeGVNToGVNs;
+  DenseMap GVNsToPHINodeGVN;
+
   /// The number of instructions that will be outlined by extracting \ref
   /// Regions.
   InstructionCost Benefit = 0;
@@ -356,6 +366,24 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
   return Benefit;
 }
 
+/// Check the \p OutputMappings structure for value \p Input, if it exists
+/// it has been used as an output for outlining, and has been renamed, and we
+/// return the new value, otherwise, we return the same value.
+///
+/// \param OutputMappings [in] - The mapping of values to their renamed value
+/// after being used as an output for an outlined region.
+/// \param Input [in] - The value to find the remapped value of, if it exists.
+/// \return The remapped value if it has been renamed, and the same value if has
+/// not.
+static Value *findOutputMapping(const DenseMap OutputMappings,
+                                Value *Input) {
+  DenseMap::const_iterator OutputMapping =
+      OutputMappings.find(Input);
+  if (OutputMapping != OutputMappings.end())
+    return OutputMapping->second;
+  return Input;
+}
+
 /// Find whether \p Region matches the global value numbering to Constant
 /// mapping found so far.
 ///
@@ -832,6 +860,209 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
   Region.NumExtractedInputs = OriginalIndex;
 }
 
+/// Check if the \p V has any uses outside of the region other than \p PN.
+///
+/// \param V [in] - The value to check.
+/// \param PHILoc [in] - The location in the PHINode of \p V.
+/// \param PN [in] - The PHINode using \p V.
+/// \param Exits [in] - The potential blocks we exit to from the outlined
+/// region.
+/// \param BlocksInRegion [in] - The basic blocks contained in the region.
+/// \returns true if \p V has any use soutside its region other than \p PN.
+static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
+                            SmallPtrSet &Exits,
+                            DenseSet &BlocksInRegion) {
+  // We check to see if the value is used by the PHINode from some other
+  // predecessor not included in the region.  If it is, we make sure
+  // to keep it as an output.
+  SmallVector IncomingNumbers(PN.getNumIncomingValues());
+  std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0);
+  if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
+        return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
+                !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
+      }))
+    return true;
+
+  // Check if the value is used by any other instructions outside the region.
+  return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
+    Instruction *I = dyn_cast(U);
+    if (!I)
+      return false;
+
+    // If the use of the item is inside the region, we skip it.  Uses
+    // inside the region give us useful information about how the item could be
+    // used as an output.
+    BasicBlock *Parent = I->getParent();
+    if (BlocksInRegion.contains(Parent))
+      return false;
+
+    // If it's not a PHINode then we definitely know the use matters.  This
+    // output value will not completely combined with another item in a PHINode
+    // as it is directly reference by another non-phi instruction
+    if (!isa(I))
+      return true;
+
+    // If we have a PHINode outside one of the exit locations, then it
+    // can be considered an outside use as well.  If there is a PHINode
+    // contained in the Exit where this values use matters, it will be
+    // caught when we analyze that PHINode.
+    if (!Exits.contains(Parent))
+      return true;
+
+    return false;
+  });
+}
+
+/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
+/// considered outputs. A PHINodes is an output when more than one incoming
+/// value has been marked by the CodeExtractor as an output.
+///
+/// \param CurrentExitFromRegion [in] - The block to analyze.
+/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
+/// region.
+/// \param RegionBlocks [in] - The basic blocks in the region.
+/// \param Outputs [in, out] - The existing outputs for the region, we may add
+/// PHINodes to this as we find that they replace output values.
+/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
+/// totally replaced  by a PHINode.
+/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
+/// in PHINodes, but have other uses, and should still be considered outputs.
+static void analyzeExitPHIsForOutputUses(
+    BasicBlock *CurrentExitFromRegion,
+    SmallPtrSet &PotentialExitsFromRegion,
+    DenseSet &RegionBlocks, SetVector &Outputs,
+    DenseSet &OutputsReplacedByPHINode,
+    DenseSet &OutputsWithNonPhiUses) {
+  for (PHINode &PN : CurrentExitFromRegion->phis()) {
+    // Find all incoming values from the outlining region.
+    SmallVector IncomingVals;
+    for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
+      if (RegionBlocks.contains(PN.getIncomingBlock(I)))
+        IncomingVals.push_back(I);
+
+    // Do not process PHI if there are no predecessors from region.
+    unsigned NumIncomingVals = IncomingVals.size();
+    if (NumIncomingVals == 0)
+      continue;
+
+    // If there is one predecessor, we mark it as a value that needs to be kept
+    // as an output.
+    if (NumIncomingVals == 1) {
+      Value *V = PN.getIncomingValue(*IncomingVals.begin());
+      OutputsWithNonPhiUses.insert(V);
+      OutputsReplacedByPHINode.erase(V);
+      continue;
+    }
+
+    // This PHINode will be used as an output value, so we add it to our list.
+    Outputs.insert(&PN);
+
+    // Not all of the incoming values should be ignored as other inputs and
+    // outputs may have uses in outlined region.  If they have other uses
+    // outside of the single PHINode we should not skip over it.
+    for (unsigned Idx : IncomingVals) {
+      Value *V = PN.getIncomingValue(Idx);
+      if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
+        OutputsWithNonPhiUses.insert(V);
+        OutputsReplacedByPHINode.erase(V);
+        continue;
+      }
+      if (!OutputsWithNonPhiUses.contains(V))
+        OutputsReplacedByPHINode.insert(V);
+    }
+  }
+}
+
+// Represents the type for the unsigned number denoting the output number for
+// phi node, along with the canonical number for the exit block.
+using ArgLocWithBBCanon = std::pair;
+// The list of canonical numbers for the incoming values to a PHINode.
+using CanonList = SmallVector;
+// The pair type representing the set of canonical values being combined in the
+// PHINode, along with the location data for the PHINode.
+using PHINodeData = std::pair;
+
+/// Encode \p PND as an integer for easy lookup based on the argument location,
+/// the parent BasicBlock canonical numbering, and the canonical numbering of
+/// the values stored in the PHINode.
+///
+/// \param PND - The data to hash.
+/// \returns The hash code of \p PND.
+static hash_code encodePHINodeData(PHINodeData &PND) {
+  return llvm::hash_combine(
+      llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
+      llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
+}
+
+/// Create a special GVN for PHINodes that will be used outside of
+/// the region.  We create a hash code based on the Canonical number of the
+/// parent BasicBlock, the canonical numbering of the values stored in the
+/// PHINode and the aggregate argument location.  This is used to find whether
+/// this PHINode type has been given a canonical numbering already.  If not, we
+/// assign it a value and store it for later use.  The value is returned to
+/// identify different output schemes for the set of regions.
+///
+/// \param Region - The region that \p PN is an output for.
+/// \param PN - The PHINode we are analyzing.
+/// \param AggArgIdx - The argument \p PN will be stored into.
+/// \returns An optional holding the assigned canonical number, or None if
+/// there is some attribute of the PHINode blocking it from being used.
+static Optional getGVNForPHINode(OutlinableRegion &Region,
+                                           PHINode *PN, unsigned AggArgIdx) {
+  OutlinableGroup &Group = *Region.Parent;
+  IRSimilarityCandidate &Cand = *Region.Candidate;
+  BasicBlock *PHIBB = PN->getParent();
+  CanonList PHIGVNs;
+  for (Value *Incoming : PN->incoming_values()) {
+    // If we cannot find a GVN, this means that the input to the PHINode is
+    // not included in the region we are trying to analyze, meaning, that if
+    // it was outlined, we would be adding an extra input.  We ignore this
+    // case for now, and so ignore the region.
+    Optional OGVN = Cand.getGVN(Incoming);
+    if (!OGVN.hasValue()) {
+      Region.IgnoreRegion = true;
+      return None;
+    }
+
+    // Collect the canonical numbers of the values in the PHINode.
+    unsigned GVN = OGVN.getValue();
+    OGVN = Cand.getCanonicalNum(GVN);
+    assert(OGVN.hasValue() && "No GVN found for incoming value?");
+    PHIGVNs.push_back(*OGVN);
+  }
+
+  // Now that we have the GVNs for the incoming values, we are going to combine
+  // them with the GVN of the incoming bock, and the output location of the
+  // PHINode to generate a hash value representing this instance of the PHINode.
+  DenseMap::iterator GVNToPHIIt;
+  DenseMap::iterator PHIToGVNIt;
+  Optional BBGVN = Cand.getGVN(PHIBB);
+  assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!");
+
+  BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
+  assert(BBGVN.hasValue() &&
+         "Could not find canonical number for the incoming block!");
+  // Create a pair of the exit block canonical value, and the aggregate
+  // argument location, connected to the canonical numbers stored in the
+  // PHINode.
+  PHINodeData TemporaryPair =
+      std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs);
+  hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
+
+  // Look for and create a new entry in our connection between canonical
+  // numbers for PHINodes, and the set of objects we just created.
+  GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
+  if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
+    bool Inserted = false;
+    std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
+        std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
+    std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
+        std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
+  }
+
+  return GVNToPHIIt->second;
+}
+
 /// Create a mapping of the output arguments for the \p Region to the output
 /// arguments of the overall outlined function.
 ///
@@ -844,35 +1075,25 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
   IRSimilarityCandidate &C = *Region.Candidate;
 
   SmallVector BE;
-  DenseSet BBSet;
-  C.getBasicBlocks(BBSet, BE);
+  DenseSet BlocksInRegion;
+  C.getBasicBlocks(BlocksInRegion, BE);
 
   // Find the exits to the region.
   SmallPtrSet Exits;
   for (BasicBlock *Block : BE)
     for (BasicBlock *Succ : successors(Block))
-      if (!BBSet.contains(Succ))
+      if (!BlocksInRegion.contains(Succ))
         Exits.insert(Succ);
 
   // After determining which blocks exit to PHINodes, we add these PHINodes to
   // the set of outputs to be processed.  We also check the incoming values of
   // the PHINodes for whether they should no longer be considered outputs.
-  for (BasicBlock *ExitBB : Exits) {
-    for (PHINode &PN : ExitBB->phis()) {
-      // Find all incoming values from the outlining region.
-      SmallVector IncomingVals;
-      for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
-        if (BBSet.contains(PN.getIncomingBlock(Idx)))
-          IncomingVals.push_back(Idx);
-
-      // Do not process PHI if there is one (or fewer) predecessor from region.
-      if (IncomingVals.size() <= 1)
-        continue;
-
-      Region.IgnoreRegion = true;
-      return;
-    }
-  }
+  DenseSet OutputsReplacedByPHINode;
+  DenseSet OutputsWithNonPhiUses;
+  for (BasicBlock *ExitBB : Exits)
+    analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
+                                 OutputsReplacedByPHINode,
+                                 OutputsWithNonPhiUses);
 
   // This counts the argument number in the extracted function.
   unsigned OriginalIndex = Region.NumExtractedInputs;
@@ -895,9 +1116,13 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
     // do not have to be in same order, but are functionally the same, we will
     // have to use a different scheme, as one-to-one correspondence is not
     // guaranteed.
-    unsigned GlobalValue = C.getGVN(Output).getValue();
     unsigned ArgumentSize = Group.ArgumentTypes.size();
 
+    // If the output is combined in a PHINode, we make sure to skip over it.
+    if (OutputsReplacedByPHINode.contains(Output))
+      continue;
+
+    unsigned AggArgIdx = 0;
     for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
       if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
         continue;
@@ -909,7 +1134,7 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
       AggArgsUsed.insert(Jdx);
       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
       Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
-      Region.GVNStores.push_back(GlobalValue);
+      AggArgIdx = Jdx;
       break;
     }
 
@@ -918,18 +1143,54 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
     // function to handle this output and create a mapping to it.
     if (!TypeFound) {
       Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
-      AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
+      // Mark the new pointer type as the last value in the aggregate argument
+      // list.
+      unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
+      AggArgsUsed.insert(ArgTypeIdx);
       Region.ExtractedArgToAgg.insert(
-          std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
+          std::make_pair(OriginalIndex, ArgTypeIdx));
       Region.AggArgToExtracted.insert(
-          std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
-      Region.GVNStores.push_back(GlobalValue);
+          std::make_pair(ArgTypeIdx, OriginalIndex));
+      AggArgIdx = ArgTypeIdx;
+    }
+
+    // TODO: Adapt to the extra input from the PHINode.
+    PHINode *PN = dyn_cast(Output);
+
+    Optional GVN;
+    if (PN && !BlocksInRegion.contains(PN->getParent())) {
+      // Values outside the region can be combined into PHINode when we
+      // have multiple exits. We collect both of these into a list to identify
+      // which values are being used in the PHINode. Each list identifies a
+      // different PHINode, and a different output. We store the PHINode as it's
+      // own canonical value.  These canonical values are also dependent on the
+      // output argument it is saved to.
+
+      // If two PHINodes have the same canonical values, but different aggregate
+      // argument locations, then they will have distinct Canonical Values.
+      GVN = getGVNForPHINode(Region, PN, AggArgIdx);
+      if (!GVN.hasValue())
+        return; 
+    } else {
+      // If we do not have a PHINode we use the global value numbering for the
+      // output value, to find the canonical number to add to the set of stored
+      // values.
+      GVN = C.getGVN(Output);
+      GVN = C.getCanonicalNum(*GVN);
     }
 
-    stable_sort(Region.GVNStores);
+    // Each region has a potentially unique set of outputs.  We save which
+    // values are output in a list of canonical values so we can differentiate
+    // among the different store schemes.
+    Region.GVNStores.push_back(*GVN);
+
     OriginalIndex++;
     TypeIndex++;
   }
+
+  // We sort the stored values to make sure that we are not affected by analysis
+  // order when determining what combination of items were stored.
+  stable_sort(Region.GVNStores);
 }
 
 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
@@ -1065,6 +1326,214 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
   return Call;
 }
 
+/// Find or create a BasicBlock in the outlined function containing PhiBlocks
+/// for \p RetVal.
+///
+/// \param Group - The OutlinableGroup containing the information about the
+/// overall outlined function.
+/// \param RetVal - The return value or exit option that we are currently
+/// evaluating.
+/// \returns The found or newly created BasicBlock to contain the needed
+/// PHINodes to be used as outputs.
+static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
+  DenseMap::iterator PhiBlockForRetVal,
+      ReturnBlockForRetVal;
+  PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
+  ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
+  assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
+         "Could not find output value!");
+  BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
+
+  // Find if a PHIBlock exists for this return value already.  If it is
+  // the first time we are analyzing this, we will not, so we record it.
+  PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
+  if (PhiBlockForRetVal != Group.PHIBlocks.end())
+    return PhiBlockForRetVal->second;
+  
+  // If we did not find a block, we create one, and insert it into the
+  // overall function and record it.
+  bool Inserted = false;
+  BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
+                                            ReturnBB->getParent());
+  std::tie(PhiBlockForRetVal, Inserted) =
+      Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
+
+  // We find the predecessors of the return block in the newly created outlined
+  // function in order to point them to the new PHIBlock rather than the already
+  // existing return block.
+  SmallVector BranchesToChange;
+  for (BasicBlock *Pred : predecessors(ReturnBB))
+    BranchesToChange.push_back(cast(Pred->getTerminator()));
+
+  // Now we mark the branch instructions found, and change the references of the
+  // return block to the newly created PHIBlock.
+  for (BranchInst *BI : BranchesToChange)
+    for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
+      if (BI->getSuccessor(Succ) != ReturnBB)
+        continue;
+      BI->setSuccessor(Succ, PHIBlock);
+    }
+
+  BranchInst::Create(ReturnBB, PHIBlock);
+
+  return PhiBlockForRetVal->second;
+}
+
+/// For the function call now representing the \p Region, find the passed value
+/// to that call that represents Argument \p A at the call location if the
+/// call has already been replaced with a call to the  overall, aggregate
+/// function.
+///
+/// \param A - The Argument to get the passed value for.
+/// \param Region - The extracted Region corresponding to the outlined function.
+/// \returns The Value representing \p A at the call site.
+static Value *
+getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
+                                           const OutlinableRegion &Region) {
+  // If we don't need to adjust the argument number at all (since the call
+  // has already been replaced by a call to the overall outlined function)
+  // we can just get the specified argument.
+  return Region.Call->getArgOperand(A->getArgNo());
+}
+
+/// For the function call now representing the \p Region, find the passed value
+/// to that call that represents Argument \p A at the call location if the
+/// call has only been replaced by the call to the aggregate function.
+///
+/// \param A - The Argument to get the passed value for.
+/// \param Region - The extracted Region corresponding to the outlined function.
+/// \returns The Value representing \p A at the call site.
+static Value *
+getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
+                                           const OutlinableRegion &Region) {
+  unsigned ArgNum = A->getArgNo();
+  
+  // If it is a constant, we can look at our mapping from when we created
+  // the outputs to figure out what the constant value is.
+  if (Region.AggArgToConstant.count(ArgNum))
+    return Region.AggArgToConstant.find(ArgNum)->second;
+  
+  // If it is not a constant, and we are not looking at the overall function, we
+  // need to adjust which argument we are looking at.
+  ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
+  return Region.Call->getArgOperand(ArgNum);
+}
+
+/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
+///
+/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
+/// \param Region [in] - The OutlinableRegion containing \p PN.
+/// \param OutputMappings [in] - The mapping of output values from outlined
+/// region to their original values.
+/// \param CanonNums [out] - The canonical numbering for the incoming values to
+/// \p PN.
+/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
+/// of \p Region rather than the overall function's call.
+static void
+findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
+                    const DenseMap &OutputMappings,
+                    DenseSet &CanonNums,
+                    bool ReplacedWithOutlinedCall = true) {
+  // Iterate over the incoming values.
+  for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
+    Value *IVal = PN->getIncomingValue(Idx);
+    // If we have an argument as incoming value, we need to grab the passed
+    // value from the call itself.
+    if (Argument *A = dyn_cast(IVal)) {
+      if (ReplacedWithOutlinedCall)
+        IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
+      else
+        IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
+    }
+
+    // Get the original value if it has been replaced by an output value.
+    IVal = findOutputMapping(OutputMappings, IVal);
+
+    // Find and add the canonical number for the incoming value.
+    Optional GVN = Region.Candidate->getGVN(IVal);
+    assert(GVN.hasValue() && "No GVN for incoming value");
+    Optional CanonNum = Region.Candidate->getCanonicalNum(*GVN);
+    assert(CanonNum.hasValue() && "No Canonical Number for GVN");
+    CanonNums.insert(*CanonNum);
+  }
+}
+
+/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
+/// in order to condense the number of instructions added to the outlined
+/// function.
+///
+/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
+/// \param Region [in] - The OutlinableRegion containing \p PN. 
+/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
+/// \p PN in.
+/// \param OutputMappings [in] - The mapping of output values from outlined
+/// region to their original values.
+/// \return the newly found or created PHINode in \p OverallPhiBlock.
+static PHINode*
+findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
+                       BasicBlock *OverallPhiBlock,
+                       const DenseMap &OutputMappings) {
+  OutlinableGroup &Group = *Region.Parent;
+  
+  DenseSet PNCanonNums;
+  // We have to use the extracted function since we have merged this region into
+  // the overall function yet.  We make sure to reassign the argument numbering
+  // since it is possible that the argument ordering is different between the
+  // functions.
+  findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
+                      /* ReplacedWithOutlinedCall = */ false);
+
+  OutlinableRegion *FirstRegion = Group.Regions[0];
+  DenseSet CurrentCanonNums;
+  // Find the Canonical Numbering for each PHINode, if it matches, we replace
+  // the uses of the PHINode we are searching for, with the found PHINode.
+  for (PHINode &CurrPN : OverallPhiBlock->phis()) {
+    CurrentCanonNums.clear();
+    findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
+                        /* ReplacedWithOutlinedCall = */ true);
+
+    if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) {
+          return CurrentCanonNums.contains(CanonNum);
+        }))
+      return &CurrPN;
+  }
+
+  // If we've made it here, it means we weren't able to replace the PHINode, so
+  // we must insert it ourselves.
+  PHINode *NewPN = cast(PN.clone());
+  NewPN->insertBefore(&*OverallPhiBlock->begin());
+  for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
+       Idx++) {
+    Value *IncomingVal = NewPN->getIncomingValue(Idx);
+    BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
+
+    // Find corresponding basic block in the overall function for the incoming
+    // block.
+    Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI();
+    assert(FirstNonPHI && "Incoming block is empty?");
+    Value *CorrespondingVal =
+        Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI);
+    assert(CorrespondingVal && "Value is nullptr?");
+    BasicBlock *BlockToUse = cast(CorrespondingVal)->getParent();
+    NewPN->setIncomingBlock(Idx, BlockToUse);
+
+    // If we have an argument we make sure we replace using the argument from
+    // the correct function.
+    if (Argument *A = dyn_cast(IncomingVal)) {
+      Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
+      NewPN->setIncomingValue(Idx, Val);
+      continue;
+    }
+    
+    // Find the corresponding value in the overall function.
+    IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
+    Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
+    assert(Val && "Value is nullptr?");
+    NewPN->setIncomingValue(Idx, Val);
+  }
+  return NewPN;
+}
+
 // Within an extracted function, replace the argument uses of the extracted
 // region with the arguments of the function for an OutlinableGroup.
 //
@@ -1077,6 +1546,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
 static void
 replaceArgumentUses(OutlinableRegion &Region,
                     DenseMap &OutputBBs,
+                    const DenseMap &OutputMappings,
                     bool FirstFunction = false) {
   OutlinableGroup &Group = *Region.Parent;
   assert(Region.ExtractedFunction && "Region has no extracted function?");
@@ -1146,12 +1616,46 @@ replaceArgumentUses(OutlinableRegion &Region,
       LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
                         << *OutputBB << "\n");
 
-      if (FirstFunction)
+      // If this is storing a PHINode, we must make sure it is included in the
+      // overall function.
+      if (!isa(ValueOperand)) {
+        if (FirstFunction)
+          continue;
+        Value *CorrVal =
+            Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
+        assert(CorrVal && "Value is nullptr?");
+        NewI->setOperand(0, CorrVal);
+        continue;
+      }
+      PHINode *PN = cast(SI->getValueOperand());
+      // If it has a value, it was not split by the code extractor, which
+      // is what we are looking for.
+      if (Region.Candidate->getGVN(PN).hasValue())
         continue;
-      Value *CorrVal =
-          Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
-      assert(CorrVal && "Value is nullptr?");
-      NewI->setOperand(0, CorrVal);
+
+      // We record the parent block for the PHINode in the Region so that
+      // we can exclude it from checks later on.
+      Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
+
+      // If this is the first function, we do not need to worry about mergiing
+      // this with any other block in the overall outlined function, so we can
+      // just continue.
+      if (FirstFunction) {
+        BasicBlock *PHIBlock = PN->getParent();
+        Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
+        continue;
+      }
+
+      // We look for the aggregate block that contains the PHINodes leading into
+      // this exit path. If we can't find one, we create one.
+      BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
+
+      // For our PHINode, we find the combined canonical numbering, and
+      // attempt to find a matching PHINode in the overall PHIBlock.  If we
+      // cannot, we copy the PHINode and move it into this new block.
+      PHINode *NewPN =
+          findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings);
+      NewI->setOperand(0, NewPN);
     }
 
     // If we added an edge for basic blocks without a predecessor, we remove it
@@ -1392,7 +1896,12 @@ void createSwitchStatement(
     Module &M, OutlinableGroup &OG, DenseMap &EndBBs,
     std::vector> &OutputStoreBBs) {
   // We only need the switch statement if there is more than one store
-  // combination.
+  // combination, or there is more than one set of output blocks.  The first
+  // will occur when we store different sets of values for two different
+  // regions.  The second will occur when we have two outputs that are combined
+  // in a PHINode outside of the region in one outlined instance, and are used
+  // seaparately in another. This will create the same set of OutputGVNs, but
+  // will generate two different output schemes.
   if (OG.OutputGVNCombinations.size() > 1) {
     Function *AggFunc = OG.OutlinedFunction;
     // Create a final block for each different return block.
@@ -1435,8 +1944,14 @@ void createSwitchStatement(
     return;
   }
 
+  assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
+
   // If there needs to be stores, move them from the output blocks to their
-  // corresponding ending block.
+  // corresponding ending block.  We do not check that the OutputGVNCombinations
+  // is equal to 1 here since that could just been the case where there are 0
+  // outputs. Instead, we check whether there is more than one set of output
+  // blocks since this is the only case where we would have to move the
+  // stores, and erase the extraneous blocks.
   if (OutputStoreBBs.size() == 1) {
     LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
                       << *OG.OutlinedFunction << "\n");
@@ -1468,10 +1983,13 @@ void createSwitchStatement(
 /// set of stores needed for the different functions.
 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
 /// once outlining is complete.
+/// \param [in] OutputMappings - Extracted functions to erase from module
+/// once outlining is complete.
 static void fillOverallFunction(
     Module &M, OutlinableGroup &CurrentGroup,
     std::vector> &OutputStoreBBs,
-    std::vector &FuncsToRemove) {
+    std::vector &FuncsToRemove,
+    const DenseMap &OutputMappings) {
   OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
 
   // Move first extracted function's instructions into new function.
@@ -1491,7 +2009,7 @@ static void fillOverallFunction(
                              CurrentGroup.OutlinedFunction, "output_block_0");
   CurrentOS->OutputBlockNum = 0;
 
-  replaceArgumentUses(*CurrentOS, NewBBs, true);
+  replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
   replaceConstants(*CurrentOS);
 
   // We first identify if any output blocks are empty, if they are we remove
@@ -1525,7 +2043,8 @@ void IROutliner::deduplicateExtractedSections(
 
   OutlinableRegion *CurrentOS;
 
-  fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
+  fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
+                      OutputMappings);
 
   std::vector SortedKeys;
   for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
@@ -1539,8 +2058,7 @@ void IROutliner::deduplicateExtractedSections(
     createAndInsertBasicBlocks(
         CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
         "output_block_" + Twine(static_cast(Idx)));
-
-    replaceArgumentUses(*CurrentOS, NewBBs);
+    replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
     alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
                                 CurrentGroup.EndBBs, OutputMappings,
                                 OutputStoreBBs);
@@ -1708,6 +2226,34 @@ IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
   return RegionBenefit;
 }
 
+/// For the \p OutputCanon number passed in find the value represented by this
+/// canonical number. If it is from a PHINode, we pick the first incoming
+/// value and return that Value instead.
+///
+/// \param Region - The OutlinableRegion to get the Value from.
+/// \param OutputCanon - The canonical number to find the Value from.
+/// \returns The Value represented by a canonical number \p OutputCanon in \p
+/// Region.
+static Value *findOutputValueInRegion(OutlinableRegion &Region,
+                                      unsigned OutputCanon) {
+  OutlinableGroup &CurrentGroup = *Region.Parent;
+  // If the value is greater than the value in the tracker, we have a
+  // PHINode and will instead use one of the incoming values to find the
+  // type.
+  if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
+    auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
+    assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
+           "Could not find GVN set for PHINode number!");
+    assert(It->second.second.size() > 0 && "PHINode does not have any values!");
+    OutputCanon = *It->second.second.begin();
+  }
+  Optional OGVN = Region.Candidate->fromCanonicalNum(OutputCanon);
+  assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?");
+  Optional OV = Region.Candidate->fromGVN(*OGVN);
+  assert(OV.hasValue() && "Could not find value for GVN?");
+  return *OV;
+}
+
 InstructionCost
 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
   InstructionCost OverallCost = 0;
@@ -1715,10 +2261,8 @@ IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
 
     // Each output incurs a load after the call, so we add that to the cost.
-    for (unsigned OutputGVN : Region->GVNStores) {
-      Optional OV = Region->Candidate->fromGVN(OutputGVN);
-      assert(OV.hasValue() && "Could not find value for GVN?");
-      Value *V = OV.getValue();
+    for (unsigned OutputCanon : Region->GVNStores) {
+      Value *V = findOutputValueInRegion(*Region, OutputCanon);
       InstructionCost LoadCost =
           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
                               TargetTransformInfo::TCK_CodeSize);
@@ -1747,6 +2291,7 @@ static InstructionCost findCostForOutputBlocks(Module &M,
   InstructionCost OutputCost = 0;
   unsigned NumOutputBranches = 0;
 
+  OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
   IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
   DenseSet CandidateBlocks;
   Candidate.getBasicBlocks(CandidateBlocks);
@@ -1772,10 +2317,8 @@ static InstructionCost findCostForOutputBlocks(Module &M,
 
   for (const ArrayRef &OutputUse :
        CurrentGroup.OutputGVNCombinations) {
-    for (unsigned GVN : OutputUse) {
-      Optional OV = Candidate.fromGVN(GVN);
-      assert(OV.hasValue() && "Could not find value for GVN?");
-      Value *V = OV.getValue();
+    for (unsigned OutputCanon : OutputUse) {
+      Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
       InstructionCost StoreCost =
           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
                               TargetTransformInfo::TCK_CodeSize);
@@ -2035,8 +2578,8 @@ unsigned IROutliner::doOutline(Module &M) {
         continue;
 
       SmallVector BE;
-      DenseSet BBSet;
-      OS->Candidate->getBasicBlocks(BBSet, BE);
+      DenseSet BlocksInRegion;
+      OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
       OS->CE = new (ExtractorAllocator.Allocate())
           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
                         false, "outlined");
@@ -2146,8 +2689,8 @@ unsigned IROutliner::doOutline(Module &M) {
     OutlinedRegions.clear();
     for (OutlinableRegion *OS : CurrentGroup.Regions) {
       SmallVector BE;
-      DenseSet BBSet;
-      OS->Candidate->getBasicBlocks(BBSet, BE);
+      DenseSet BlocksInRegion;
+      OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
       OS->CE = new (ExtractorAllocator.Allocate())
           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
                         false, "outlined");
diff --git a/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll b/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll
new file mode 100644
index 0000000000000..494bb89008507
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do differentiate between outputs of the region stored in PHINodes
+; versus those stored outside of PHINodes.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  ret void
+next:
+  %1 = add i32 %c, 1
+  %2 = add i32 %e, 1
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32* null, i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[C_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[C_LOC]], i32* [[E_LOC]], i32 1)
+; CHECK-NEXT:    [[C_RELOAD:%.*]] = load i32, i32* [[C_LOC]], align 4
+; CHECK-NEXT:    [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[C_RELOAD]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[E_RELOAD]], 1
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT:%.*]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP3:%.*]], label [[FINAL_BLOCK_1:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
+; CHECK-NEXT:    i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP3]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_1:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[C]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       output_block_1_1:
+; CHECK-NEXT:    store i32 [[C]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       final_block_1:
+; CHECK-NEXT:    ret i1 true
+;
diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll
new file mode 100644
index 0000000000000..ef003bad6c0c6
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to extract blocks that contain PHINodes, and selectively
+; store into it's respective block, creating a new block if needed.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* null, i32 -1)
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[PHI_BLOCK:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[PHI_BLOCK]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[TMP3:%.*]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       phi_block:
+; CHECK-NEXT:    [[TMP3]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll
new file mode 100644
index 0000000000000..e51c281bab0c9
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to extract blocks that contain PHINodes, and selectively
+; store into it's respective block, only using if needed.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* null, i32 -1)
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void  @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_0:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll
new file mode 100644
index 0000000000000..4f81747d04bb6
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not extract similar regions that would involve the splitting
+; of phi nodes on exit.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+next:
+  %2 = add i32 %d, 1
+  %3 = add i32 %e, 1
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  ret void
+next:
+  %1 = add i32 %d, 1
+  %2 = add i32 %e, 1
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[D_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[D_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[LT_CAST2:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[E_LOC]], i32* [[D_LOC]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    [[D_RELOAD:%.*]] = load i32, i32* [[D_LOC]], align 4
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    call void @outlined_ir_func_1(i32 [[D_RELOAD]], i32 [[E_RELOAD]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[D_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[D_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[E_LOC]], i32* [[D_LOC]], i32* null, i32 1)
+; CHECK-NEXT:    [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    [[D_RELOAD:%.*]] = load i32, i32* [[D_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    call void @outlined_ir_func_1(i32 [[D_RELOAD]], i32 [[E_RELOAD]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT:%.*]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP4:%.*]], label [[FINAL_BLOCK_1:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
+; CHECK-NEXT:    i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP4]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
+; CHECK-NEXT:    i32 1, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_0:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 [[D]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       output_block_0_1:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[D]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       output_block_1_1:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       final_block_1:
+; CHECK-NEXT:    ret i1 true
+;
+;
+; CHECK-LABEL: @outlined_ir_func_1(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[NEXT_TO_OUTLINE:%.*]]
+; CHECK:       next_to_outline:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], 1
+; CHECK-NEXT:    br label [[NEXT_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       next_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
new file mode 100644
index 0000000000000..4e777e862543e
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Here we have multiple exits, but the different sources, same outputs are
+; needed, this checks that they are compressed, and moved into the appropriate
+; output blocks.
+
+define void @outline_outputs1() #0 {
+entry:
+  %output = alloca i32, align 4
+  %result = alloca i32, align 4
+  %output2 = alloca i32, align 4
+  %result2 = alloca i32, align 4
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  br label %block_2
+block_1:
+  %a2 = alloca i32, align 4
+  %b2 = alloca i32, align 4
+  br label %block_2
+block_2:
+  %a2val = load i32, i32* %a
+  %b2val = load i32, i32* %b
+  %add2 = add i32 2, %a2val
+  %mul2 = mul i32 2, %b2val
+  br label %block_5
+block_3:
+  %aval = load i32, i32* %a
+  %bval = load i32, i32* %b
+  %add = add i32 2, %aval
+  %mul = mul i32 2, %bval
+  br label %block_4
+block_4:
+  store i32 %add, i32* %output, align 4
+  store i32 %mul, i32* %result, align 4
+  br label %block_6
+block_5:
+  store i32 %add2, i32* %output, align 4
+  store i32 %mul2, i32* %result, align 4
+  br label %block_6
+block_6:
+  %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
+  ret void
+}
+
+define void @outline_outputs2() #0 {
+entry:
+  %output = alloca i32, align 4
+  %result = alloca i32, align 4
+  %output2 = alloca i32, align 4
+  %result2 = alloca i32, align 4
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  br label %block_2
+block_1:
+  %a2 = alloca i32, align 4
+  %b2 = alloca i32, align 4
+  br label %block_2
+block_2:
+  %a2val = load i32, i32* %a
+  %b2val = load i32, i32* %b
+  %add2 = add i32 2, %a2val
+  %mul2 = mul i32 2, %b2val
+  br label %block_5
+block_3:
+  %aval = load i32, i32* %a
+  %bval = load i32, i32* %b
+  %add = add i32 2, %aval
+  %mul = mul i32 2, %bval
+  br label %block_4
+block_4:
+  store i32 %add, i32* %output, align 4
+  store i32 %mul, i32* %result, align 4
+  br label %block_6
+block_5:
+  store i32 %add2, i32* %output, align 4
+  store i32 %mul2, i32* %result, align 4
+  br label %block_6
+block_6:
+  %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
+  ret void
+}
+
+; CHECK-LABEL: @outline_outputs1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIFF_CE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2:%.*]]
+; CHECK:       block_1:
+; CHECK-NEXT:    [[A2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2]]
+; CHECK:       block_2:
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]])
+; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK:       block_6:
+; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @outline_outputs2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIFF_CE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2:%.*]]
+; CHECK:       block_1:
+; CHECK-NEXT:    [[A2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2]]
+; CHECK:       block_2:
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]])
+; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK:       block_6:
+; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK:  define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[BLOCK_2_TO_OUTLINE:%.*]]
+; CHECK:       block_2_to_outline:
+; CHECK-NEXT:    [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 2, [[A2VAL]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i32 2, [[B2VAL]]
+; CHECK-NEXT:    br label [[BLOCK_5:%.*]]
+; CHECK:       block_3:
+; CHECK-NEXT:    [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 2, [[AVAL]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 2, [[BVAL]]
+; CHECK-NEXT:    br label [[BLOCK_4:%.*]]
+; CHECK:       block_4:
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    store i32 [[MUL]], i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    br label [[BLOCK_6_SPLIT:%.*]]
+; CHECK:       block_5:
+; CHECK-NEXT:    store i32 [[ADD2]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    store i32 [[MUL2]], i32* [[TMP3]], align 4
+; CHECK-NEXT:    br label [[BLOCK_6_SPLIT]]
+; CHECK:       block_6.split:
+; CHECK-NEXT:    [[DIFF_CE:%.*]] = phi i32 [ [[AVAL]], [[BLOCK_4]] ], [ [[A2VAL]], [[BLOCK_5]] ]
+; CHECK-NEXT:    br label [[BLOCK_6_EXITSTUB:%.*]]
+; CHECK:       block_6.exitStub:
+; CHECK-NEXT:    store i32 [[DIFF_CE]], i32* [[TMP4:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
index 00c99488e231d..77cb4a5bc476f 100644
--- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
@@ -37,42 +37,50 @@ first:
 }
 ; CHECK-LABEL: @function1(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]])
-; CHECK-NEXT:    br label [[FIRST]]
 ; CHECK:       first:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
 ;
 ;
 ; CHECK-LABEL: @function2(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]])
-; CHECK-NEXT:    br label [[FIRST]]
 ; CHECK:       first:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
 ;
 ;
-; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
 ; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[TEST_TO_OUTLINE:%.*]]
-; CHECK:       test_to_outline:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
 ; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
 ; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll b/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll
new file mode 100644
index 0000000000000..ecc1c36d6b9e3
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not extract similar regions that would involve the splitting
+; of phi nodes on exit.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  ret void
+next:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 1)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT:%.*]], label [[PHI_BLOCK:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[PHI_BLOCK]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_1:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_1:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[TMP3:%.*]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       phi_block:
+; CHECK-NEXT:    [[TMP3]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       final_block_1:
+; CHECK-NEXT:    ret i1 true
+;
diff --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
new file mode 100644
index 0000000000000..258bbfe131e3e
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to propogate inputs to the region into the split PHINode
+; outside of the region if necessary.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST_SPLIT:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    ret i1 true
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    ret i1 false
+;

From d7e183b225ecddeef2a28a59c1addb8e1825ffc6 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere 
Date: Thu, 18 Nov 2021 14:25:23 -0800
Subject: [PATCH 579/946] [lldb] Use new dyld SPIs to query the shared cache
 local symbols

rdar://85492172
---
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 311 ++++++------------
 1 file changed, 107 insertions(+), 204 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 20383d9646fdc..2e712cded5308 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -6,8 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringRef.h"
 
+#include 
+#include 
+#include 
+
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h"
@@ -155,28 +160,6 @@ struct lldb_copy_dyld_cache_header_v1 {
                     // and later
 };
 
-struct lldb_copy_dyld_cache_mapping_info {
-  uint64_t address;
-  uint64_t size;
-  uint64_t fileOffset;
-  uint32_t maxProt;
-  uint32_t initProt;
-};
-
-struct lldb_copy_dyld_cache_local_symbols_info {
-  uint32_t nlistOffset;
-  uint32_t nlistCount;
-  uint32_t stringsOffset;
-  uint32_t stringsSize;
-  uint32_t entriesOffset;
-  uint32_t entriesCount;
-};
-struct lldb_copy_dyld_cache_local_symbols_entry {
-  uint32_t dylibOffset;
-  uint32_t nlistStartIndex;
-  uint32_t nlistCount;
-};
-
 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
                                const char *alt_name, size_t reg_byte_size,
                                Stream &data) {
@@ -2257,6 +2240,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
   llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
   llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
   llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
+  UUID image_uuid;
 
   for (i = 0; i < m_header.ncmds; ++i) {
     const lldb::offset_t cmd_offset = offset;
@@ -2324,6 +2308,14 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                sizeof(function_starts_load_command));
       break;
 
+    case LC_UUID: {
+      const uint8_t *uuid_bytes = m_data.PeekData(offset, 16);
+
+      if (uuid_bytes)
+        image_uuid = UUID::fromOptionalData(uuid_bytes, 16);
+      break;
+    }
+
     default:
       break;
     }
@@ -2615,8 +2607,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                                              ? eh_frame_section_sp->GetID()
                                              : static_cast(NO_SECT);
 
-  lldb::offset_t nlist_data_offset = 0;
-
   uint32_t N_SO_index = UINT32_MAX;
 
   MachSymtabSectionInfo section_info(section_list);
@@ -2682,26 +2672,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
     // Next we need to determine the correct path for the dyld shared cache.
 
     ArchSpec header_arch = GetArchitecture();
-    char dsc_path[PATH_MAX];
-    char dsc_path_development[PATH_MAX];
-
-    snprintf(
-        dsc_path, sizeof(dsc_path), "%s%s%s",
-        "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
-                                                   */
-        "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
-        header_arch.GetArchitectureName());
-
-    snprintf(
-        dsc_path_development, sizeof(dsc_path), "%s%s%s%s",
-        "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
-                                                   */
-        "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
-        header_arch.GetArchitectureName(), ".development");
-
-    FileSpec dsc_nondevelopment_filespec(dsc_path);
-    FileSpec dsc_development_filespec(dsc_path_development);
-    FileSpec dsc_filespec;
 
     UUID dsc_uuid;
     UUID process_shared_cache_uuid;
@@ -2712,155 +2682,99 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                                 process_shared_cache_uuid);
     }
 
-    // First see if we can find an exact match for the inferior process
-    // shared cache UUID in the development or non-development shared caches
-    // on disk.
-    if (process_shared_cache_uuid.IsValid()) {
-      if (FileSystem::Instance().Exists(dsc_development_filespec)) {
-        UUID dsc_development_uuid = GetSharedCacheUUID(
-            dsc_development_filespec, byte_order, addr_byte_size);
-        if (dsc_development_uuid.IsValid() &&
-            dsc_development_uuid == process_shared_cache_uuid) {
-          dsc_filespec = dsc_development_filespec;
-          dsc_uuid = dsc_development_uuid;
-        }
-      }
-      if (!dsc_uuid.IsValid() &&
-          FileSystem::Instance().Exists(dsc_nondevelopment_filespec)) {
-        UUID dsc_nondevelopment_uuid = GetSharedCacheUUID(
-            dsc_nondevelopment_filespec, byte_order, addr_byte_size);
-        if (dsc_nondevelopment_uuid.IsValid() &&
-            dsc_nondevelopment_uuid == process_shared_cache_uuid) {
-          dsc_filespec = dsc_nondevelopment_filespec;
-          dsc_uuid = dsc_nondevelopment_uuid;
-        }
-      }
-    }
+    __block bool found_image = false;
+    __block void *nlist_buffer = nullptr;
+    __block unsigned nlist_count = 0;
+    __block char *string_table = nullptr;
+    __block vm_offset_t vm_nlist_memory = 0;
+    __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
+    __block vm_offset_t vm_string_memory = 0;
+    __block mach_msg_type_number_t vm_string_bytes_read = 0;
+
+    auto _ = llvm::make_scope_exit(^{
+      if (vm_nlist_memory)
+        vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
+      if (vm_string_memory)
+        vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
+    });
 
-    // Failing a UUID match, prefer the development dyld_shared cache if both
-    // are present.
-    if (!FileSystem::Instance().Exists(dsc_filespec)) {
-      if (FileSystem::Instance().Exists(dsc_development_filespec)) {
-        dsc_filespec = dsc_development_filespec;
-      } else {
-        dsc_filespec = dsc_nondevelopment_filespec;
-      }
-    }
+    typedef llvm::DenseMap UndefinedNameToDescMap;
+    typedef llvm::DenseMap SymbolIndexToName;
+    UndefinedNameToDescMap undefined_name_to_desc;
+    SymbolIndexToName reexport_shlib_needs_fixup;
 
-    /* The dyld_cache_header has a pointer to the
-       dyld_cache_local_symbols_info structure (localSymbolsOffset).
-       The dyld_cache_local_symbols_info structure gives us three things:
-         1. The start and count of the nlist records in the dyld_shared_cache
-       file
-         2. The start and size of the strings for these nlist records
-         3. The start and count of dyld_cache_local_symbols_entry entries
-
-       There is one dyld_cache_local_symbols_entry per dylib/framework in the
-       dyld shared cache.
-       The "dylibOffset" field is the Mach-O header of this dylib/framework in
-       the dyld shared cache.
-       The dyld_cache_local_symbols_entry also lists the start of this
-       dylib/framework's nlist records
-       and the count of how many nlist records there are for this
-       dylib/framework.
-    */
-
-    // Process the dyld shared cache header to find the unmapped symbols
-
-    DataBufferSP dsc_data_sp = MapFileData(
-        dsc_filespec, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
-    if (!dsc_uuid.IsValid()) {
-      dsc_uuid = GetSharedCacheUUID(dsc_filespec, byte_order, addr_byte_size);
-    }
-    if (dsc_data_sp) {
-      DataExtractor dsc_header_data(dsc_data_sp, byte_order, addr_byte_size);
+    dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
+      uuid_t cache_uuid;
+      dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
+      if (found_image)
+        return;
 
-      bool uuid_match = true;
-      if (dsc_uuid.IsValid() && process) {
         if (process_shared_cache_uuid.IsValid() &&
-            dsc_uuid != process_shared_cache_uuid) {
-          // The on-disk dyld_shared_cache file is not the same as the one in
-          // this process' memory, don't use it.
-          uuid_match = false;
-          ModuleSP module_sp(GetModule());
-          if (module_sp)
-            module_sp->ReportWarning("process shared cache does not match "
-                                     "on-disk dyld_shared_cache file, some "
-                                     "symbol names will be missing.");
-        }
-      }
+          process_shared_cache_uuid != UUID::fromOptionalData(&cache_uuid, 16))
+        return;
 
-      offset = offsetof(struct lldb_copy_dyld_cache_header_v1, mappingOffset);
-
-      uint32_t mappingOffset = dsc_header_data.GetU32(&offset);
-
-      // If the mappingOffset points to a location inside the header, we've
-      // opened an old dyld shared cache, and should not proceed further.
-      if (uuid_match &&
-          mappingOffset >= sizeof(struct lldb_copy_dyld_cache_header_v1)) {
-
-        DataBufferSP dsc_mapping_info_data_sp = MapFileData(
-            dsc_filespec, sizeof(struct lldb_copy_dyld_cache_mapping_info),
-            mappingOffset);
-
-        DataExtractor dsc_mapping_info_data(dsc_mapping_info_data_sp,
-                                            byte_order, addr_byte_size);
-        offset = 0;
-
-        // The File addresses (from the in-memory Mach-O load commands) for
-        // the shared libraries in the shared library cache need to be
-        // adjusted by an offset to match up with the dylibOffset identifying
-        // field in the dyld_cache_local_symbol_entry's.  This offset is
-        // recorded in mapping_offset_value.
-        const uint64_t mapping_offset_value =
-            dsc_mapping_info_data.GetU64(&offset);
-
-        offset =
-            offsetof(struct lldb_copy_dyld_cache_header_v1, localSymbolsOffset);
-        uint64_t localSymbolsOffset = dsc_header_data.GetU64(&offset);
-        uint64_t localSymbolsSize = dsc_header_data.GetU64(&offset);
-
-        if (localSymbolsOffset && localSymbolsSize) {
-          // Map the local symbols
-          DataBufferSP dsc_local_symbols_data_sp =
-              MapFileData(dsc_filespec, localSymbolsSize, localSymbolsOffset);
-
-          if (dsc_local_symbols_data_sp) {
-            DataExtractor dsc_local_symbols_data(dsc_local_symbols_data_sp,
-                                                 byte_order, addr_byte_size);
-
-            offset = 0;
-
-            typedef llvm::DenseMap UndefinedNameToDescMap;
-            typedef llvm::DenseMap SymbolIndexToName;
-            UndefinedNameToDescMap undefined_name_to_desc;
-            SymbolIndexToName reexport_shlib_needs_fixup;
-
-            // Read the local_symbols_infos struct in one shot
-            struct lldb_copy_dyld_cache_local_symbols_info local_symbols_info;
-            dsc_local_symbols_data.GetU32(&offset,
-                                          &local_symbols_info.nlistOffset, 6);
-
-            SectionSP text_section_sp(
-                section_list->FindSectionByName(GetSegmentNameTEXT()));
-
-            uint32_t header_file_offset =
-                (text_section_sp->GetFileAddress() - mapping_offset_value);
-
-            offset = local_symbols_info.entriesOffset;
-            for (uint32_t entry_index = 0;
-                 entry_index < local_symbols_info.entriesCount; entry_index++) {
-              struct lldb_copy_dyld_cache_local_symbols_entry
-                  local_symbols_entry;
-              local_symbols_entry.dylibOffset =
-                  dsc_local_symbols_data.GetU32(&offset);
-              local_symbols_entry.nlistStartIndex =
-                  dsc_local_symbols_data.GetU32(&offset);
-              local_symbols_entry.nlistCount =
-                  dsc_local_symbols_data.GetU32(&offset);
-
-              if (header_file_offset == local_symbols_entry.dylibOffset) {
-                unmapped_local_symbols_found = local_symbols_entry.nlistCount;
+      dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
+        uuid_t dsc_image_uuid;
+        if (found_image)
+          return;
+
+        dyld_image_copy_uuid(image, &dsc_image_uuid);
+        if (image_uuid != UUID::fromOptionalData(dsc_image_uuid, 16))
+          return;
+
+        found_image = true;
+
+        // Compute the size of the string table. We need to ask dyld for a
+        // new SPI to avoid this step.
+        dyld_image_local_nlist_content_4Symbolication(
+            image, ^(const void *nlistStart, uint64_t nlistCount,
+                     const char *stringTable) {
+              if (!nlistStart || !nlistCount)
+                return;
+
+              // The buffers passed here are valid only inside the block.
+              // Use vm_read to make a cheap copy of them available for our
+              // processing later.
+              kern_return_t ret =
+                  vm_read(mach_task_self(), (vm_address_t)nlistStart,
+                          nlist_byte_size * nlistCount, &vm_nlist_memory,
+                          &vm_nlist_bytes_read);
+              if (ret != KERN_SUCCESS)
+                return;
+              assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
+
+              // We don't know the size of the string table. It's cheaper
+              // to map the whol VM region than to determine the size by
+              // parsing all teh nlist entries.
+              vm_address_t string_address = (vm_address_t)stringTable;
+              vm_size_t region_size;
+              mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
+              vm_region_basic_info_data_t info;
+              memory_object_name_t object;
+              ret = vm_region_64(mach_task_self(), &string_address,
+                                 ®ion_size, VM_REGION_BASIC_INFO_64,
+                                 (vm_region_info_t)&info, &info_count, &object);
+              if (ret != KERN_SUCCESS)
+                return;
+
+              ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
+                            region_size -
+                                ((vm_address_t)stringTable - string_address),
+                            &vm_string_memory, &vm_string_bytes_read);
+              if (ret != KERN_SUCCESS)
+                return;
+
+              nlist_buffer = (void *)vm_nlist_memory;
+              string_table = (char *)vm_string_memory;
+              nlist_count = nlistCount;
+            });
+      });
+    });
+    if (nlist_buffer) {
+      DataExtractor dsc_local_symbols_data(nlist_buffer,
+                                           nlist_count * nlist_byte_size,
+                                           byte_order, addr_byte_size);
+      unmapped_local_symbols_found = nlist_count;
 
                 // The normal nlist code cannot correctly size the Symbols
                 // array, we need to allocate it here.
@@ -2869,13 +2783,10 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                     unmapped_local_symbols_found - m_dysymtab.nlocalsym);
                 num_syms = symtab.GetNumSymbols();
 
-                nlist_data_offset =
-                    local_symbols_info.nlistOffset +
-                    (nlist_byte_size * local_symbols_entry.nlistStartIndex);
-                uint32_t string_table_offset = local_symbols_info.stringsOffset;
+      lldb::offset_t nlist_data_offset = 0;
 
                 for (uint32_t nlist_index = 0;
-                     nlist_index < local_symbols_entry.nlistCount;
+                     nlist_index < nlist_count;
                      nlist_index++) {
                   /////////////////////////////
                   {
@@ -2887,8 +2798,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                     struct nlist_64 nlist = *nlist_maybe;
 
                     SymbolType type = eSymbolTypeInvalid;
-                    const char *symbol_name = dsc_local_symbols_data.PeekCStr(
-                        string_table_offset + nlist.n_strx);
+          const char *symbol_name = string_table + nlist.n_strx;
 
                     if (symbol_name == NULL) {
                       // No symbol should be NULL, even the symbols with no
@@ -2898,7 +2808,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                           Host::eSystemLogError,
                           "error: DSC unmapped local symbol[%u] has invalid "
                           "string table offset 0x%x in %s, ignoring symbol\n",
-                          entry_index, nlist.n_strx,
+                          nlist_index, nlist.n_strx,
                           module_sp->GetFileSpec().GetPath().c_str());
                       continue;
                     }
@@ -3759,8 +3669,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                   }
                   /////////////////////////////
                 }
-                break; // No more entries to consider
-              }
             }
 
             for (const auto &pos : reexport_shlib_needs_fixup) {
@@ -3774,14 +3682,9 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
               }
             }
           }
-        }
-      }
-    }
-  }
 
-  // Must reset this in case it was mutated above!
-  nlist_data_offset = 0;
 #endif
+  lldb::offset_t nlist_data_offset = 0;
 
   if (nlist_data.GetByteSize() > 0) {
 

From 575c5d2a99eabf4a76ab6d81e25537804aa50c9d Mon Sep 17 00:00:00 2001
From: Steven Wan 
Date: Tue, 25 Jan 2022 12:38:42 -0500
Subject: [PATCH 580/946] Disable Go binding test on AIX

Disable the Go binding test on AIX because building the binding on AIX with Clang is currently unsupported.

Reviewed By: ZarkoCA

Differential Revision: https://reviews.llvm.org/D117505
---
 llvm/test/Bindings/Go/go.test | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Bindings/Go/go.test b/llvm/test/Bindings/Go/go.test
index 2d31c2790697a..52ed348df1252 100644
--- a/llvm/test/Bindings/Go/go.test
+++ b/llvm/test/Bindings/Go/go.test
@@ -1,4 +1,5 @@
 ; RUN: llvm-go test llvm.org/llvm/bindings/go/llvm
 
 ; REQUIRES: shell, default_triple
-; UNSUPPORTED: asan, ubsan, msan
+;; Building Go bindings with Clang is currently unsupported on AIX.
+; UNSUPPORTED: asan, ubsan, msan, -aix

From b089e4072a012dc0c8233cada37326f686ca2604 Mon Sep 17 00:00:00 2001
From: eopXD 
Date: Sat, 22 Jan 2022 00:46:36 -0800
Subject: [PATCH 581/946] [RISCV] Don't allow i64 vector div by constant to use
 mulh with Zve64x

EEW=64 of mulh and its vairants requires V extension.

Authored by: Craig Topper  @craig.topper

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117947
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  13 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h      |   1 +
 llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll  | 283 ++++++++++--------
 llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll | 242 ++++++++-------
 llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll  | 314 +++++++++++---------
 llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll | 274 +++++++++--------
 6 files changed, 628 insertions(+), 499 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9b427703764ea..483d0abc8ad76 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -614,6 +614,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 
+      // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
+      if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
+        setOperationAction(ISD::MULHU, VT, Expand);
+        setOperationAction(ISD::MULHS, VT, Expand);
+      }
+
       setOperationAction(ISD::SMIN, VT, Legal);
       setOperationAction(ISD::SMAX, VT, Legal);
       setOperationAction(ISD::UMIN, VT, Legal);
@@ -910,8 +916,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::UMAX, VT, Custom);
         setOperationAction(ISD::ABS,  VT, Custom);
 
-        setOperationAction(ISD::MULHS, VT, Custom);
-        setOperationAction(ISD::MULHU, VT, Custom);
+        // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
+        if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
+          setOperationAction(ISD::MULHS, VT, Custom);
+          setOperationAction(ISD::MULHU, VT, Custom);
+        }
 
         setOperationAction(ISD::SADDSAT, VT, Custom);
         setOperationAction(ISD::UADDSAT, VT, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 8f32e88d57c07..044dda0a1ccc2 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -156,6 +156,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtF() const { return HasStdExtF; }
   bool hasStdExtD() const { return HasStdExtD; }
   bool hasStdExtC() const { return HasStdExtC; }
+  bool hasStdExtV() const { return HasStdExtV; }
   bool hasStdExtZba() const { return HasStdExtZba; }
   bool hasStdExtZbb() const { return HasStdExtZbb; }
   bool hasStdExtZbc() const { return HasStdExtZbc; }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
index 16fe495b981cf..fc089d690863d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define  @vdiv_vv_nxv1i8( %va,  %vb) {
 ; CHECK-LABEL: vdiv_vv_nxv1i8:
@@ -895,38 +897,45 @@ define  @vdiv_vx_nxv1i64( %va, i64 %b) {
 }
 
 define  @vdiv_vi_nxv1i64_0( %va) {
-; RV32-LABEL: vdiv_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v9
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v9, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v9
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v9, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v9
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI58_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI58_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v9, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI58_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI58_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v9, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v9
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = sdiv  %va, %splat
@@ -969,38 +978,45 @@ define  @vdiv_vx_nxv2i64( %va, i64 %b) {
 }
 
 define  @vdiv_vi_nxv2i64_0( %va) {
-; RV32-LABEL: vdiv_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v10
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v10, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v10
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v10, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v10
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI61_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI61_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v10, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI61_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI61_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v10, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v10
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = sdiv  %va, %splat
@@ -1043,38 +1059,45 @@ define  @vdiv_vx_nxv4i64( %va, i64 %b) {
 }
 
 define  @vdiv_vi_nxv4i64_0( %va) {
-; RV32-LABEL: vdiv_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v12
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v12, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v12
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v12, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v12
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI64_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI64_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v12, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI64_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI64_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v12, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v12
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = sdiv  %va, %splat
@@ -1117,41 +1140,47 @@ define  @vdiv_vx_nxv8i64( %va, i64 %b) {
 }
 
 define  @vdiv_vi_nxv8i64_0( %va) {
-; RV32-LABEL: vdiv_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v16
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v16, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v16
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v16, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v16
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI67_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI67_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v16, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI67_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI67_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v16, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v16
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = sdiv  %va, %splat
   ret  %vc
 }
-
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
index 5bf80bbc34859..2635e14c7e06c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define  @vdivu_vv_nxv1i8( %va,  %vb) {
 ; CHECK-LABEL: vdivu_vv_nxv1i8:
@@ -820,33 +822,40 @@ define  @vdivu_vx_nxv1i64( %va, i64 %b) {
 }
 
 define  @vdivu_vi_nxv1i64_0( %va) {
-; RV32-LABEL: vdivu_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v9
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v9
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = udiv  %va, %splat
@@ -916,33 +925,40 @@ define  @vdivu_vx_nxv2i64( %va, i64 %b) {
 }
 
 define  @vdivu_vi_nxv2i64_0( %va) {
-; RV32-LABEL: vdivu_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v10
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v10
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = udiv  %va, %splat
@@ -1012,33 +1028,40 @@ define  @vdivu_vx_nxv4i64( %va, i64 %b) {
 }
 
 define  @vdivu_vi_nxv4i64_0( %va) {
-; RV32-LABEL: vdivu_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v12
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v12
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = udiv  %va, %splat
@@ -1108,33 +1131,40 @@ define  @vdivu_vx_nxv8i64( %va, i64 %b) {
 }
 
 define  @vdivu_vi_nxv8i64_0( %va) {
-; RV32-LABEL: vdivu_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v16
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v16
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = udiv  %va, %splat
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
index f6e47b8272b41..9cd03815dc38e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define  @vrem_vv_nxv1i8( %va,  %vb) {
 ; CHECK-LABEL: vrem_vv_nxv1i8:
@@ -929,42 +931,49 @@ define  @vrem_vx_nxv1i64( %va, i64 %b) {
 }
 
 define  @vrem_vi_nxv1i64_0( %va) {
-; RV32-LABEL: vrem_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulh.vv v9, v8, v9
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v10, v9, a0
-; RV32-NEXT:    vsra.vi v9, v9, 1
-; RV32-NEXT:    vadd.vv v9, v9, v10
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v9, v8, v9
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v10, v9, a0
+; RV32-V-NEXT:    vsra.vi v9, v9, 1
+; RV32-V-NEXT:    vadd.vv v9, v9, v10
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI56_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI56_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulh.vx v9, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v10, v9, a0
-; RV64-NEXT:    vsra.vi v9, v9, 1
-; RV64-NEXT:    vadd.vv v9, v9, v10
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI56_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI56_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulh.vx v9, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v10, v9, a0
+; RV64-V-NEXT:    vsra.vi v9, v9, 1
+; RV64-V-NEXT:    vadd.vv v9, v9, v10
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = srem  %va, %splat
@@ -1007,42 +1016,49 @@ define  @vrem_vx_nxv2i64( %va, i64 %b) {
 }
 
 define  @vrem_vi_nxv2i64_0( %va) {
-; RV32-LABEL: vrem_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulh.vv v10, v8, v10
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v12, v10, a0
-; RV32-NEXT:    vsra.vi v10, v10, 1
-; RV32-NEXT:    vadd.vv v10, v10, v12
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v10
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v10, v8, v10
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v12, v10, a0
+; RV32-V-NEXT:    vsra.vi v10, v10, 1
+; RV32-V-NEXT:    vadd.vv v10, v10, v12
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI59_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI59_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulh.vx v10, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v12, v10, a0
-; RV64-NEXT:    vsra.vi v10, v10, 1
-; RV64-NEXT:    vadd.vv v10, v10, v12
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v10
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI59_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI59_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulh.vx v10, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v12, v10, a0
+; RV64-V-NEXT:    vsra.vi v10, v10, 1
+; RV64-V-NEXT:    vadd.vv v10, v10, v12
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = srem  %va, %splat
@@ -1085,42 +1101,49 @@ define  @vrem_vx_nxv4i64( %va, i64 %b) {
 }
 
 define  @vrem_vi_nxv4i64_0( %va) {
-; RV32-LABEL: vrem_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulh.vv v12, v8, v12
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v16, v12, a0
-; RV32-NEXT:    vsra.vi v12, v12, 1
-; RV32-NEXT:    vadd.vv v12, v12, v16
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v12
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v12, v8, v12
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v16, v12, a0
+; RV32-V-NEXT:    vsra.vi v12, v12, 1
+; RV32-V-NEXT:    vadd.vv v12, v12, v16
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI62_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI62_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulh.vx v12, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v16, v12, a0
-; RV64-NEXT:    vsra.vi v12, v12, 1
-; RV64-NEXT:    vadd.vv v12, v12, v16
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v12
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI62_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI62_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulh.vx v12, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v16, v12, a0
+; RV64-V-NEXT:    vsra.vi v12, v12, 1
+; RV64-V-NEXT:    vadd.vv v12, v12, v16
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = srem  %va, %splat
@@ -1163,42 +1186,49 @@ define  @vrem_vx_nxv8i64( %va, i64 %b) {
 }
 
 define  @vrem_vi_nxv8i64_0( %va) {
-; RV32-LABEL: vrem_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulh.vv v16, v8, v16
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v24, v16, a0
-; RV32-NEXT:    vsra.vi v16, v16, 1
-; RV32-NEXT:    vadd.vv v16, v16, v24
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v16, v8, v16
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v24, v16, a0
+; RV32-V-NEXT:    vsra.vi v16, v16, 1
+; RV32-V-NEXT:    vadd.vv v16, v16, v24
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI65_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI65_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulh.vx v16, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v24, v16, a0
-; RV64-NEXT:    vsra.vi v16, v16, 1
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v16
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI65_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI65_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulh.vx v16, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v24, v16, a0
+; RV64-V-NEXT:    vsra.vi v16, v16, 1
+; RV64-V-NEXT:    vadd.vv v16, v16, v24
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = srem  %va, %splat
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
index e67f25b6b5c7c..c049bc6b76999 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define  @vremu_vv_nxv1i8( %va,  %vb) {
 ; CHECK-LABEL: vremu_vv_nxv1i8:
@@ -854,37 +856,44 @@ define  @vremu_vx_nxv1i64( %va, i64 %b) {
 }
 
 define  @vremu_vi_nxv1i64_0( %va) {
-; RV32-LABEL: vremu_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulhu.vv v9, v8, v9
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v9, v9, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v9, v8, v9
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v9, v9, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulhu.vx v9, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v9, v9, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v9, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v9, v9, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = urem  %va, %splat
@@ -958,37 +967,44 @@ define  @vremu_vx_nxv2i64( %va, i64 %b) {
 }
 
 define  @vremu_vi_nxv2i64_0( %va) {
-; RV32-LABEL: vremu_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulhu.vv v10, v8, v10
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v10, v10, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v10
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v10, v8, v10
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v10, v10, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulhu.vx v10, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v10, v10, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v10
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v10, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v10, v10, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = urem  %va, %splat
@@ -1062,37 +1078,44 @@ define  @vremu_vx_nxv4i64( %va, i64 %b) {
 }
 
 define  @vremu_vi_nxv4i64_0( %va) {
-; RV32-LABEL: vremu_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulhu.vv v12, v8, v12
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v12, v12, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v12
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v12, v8, v12
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v12, v12, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulhu.vx v12, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v12, v12, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v12
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v12, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v12, v12, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = urem  %va, %splat
@@ -1166,37 +1189,44 @@ define  @vremu_vx_nxv8i64( %va, i64 %b) {
 }
 
 define  @vremu_vi_nxv8i64_0( %va) {
-; RV32-LABEL: vremu_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulhu.vv v16, v8, v16
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v16, v16, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v16, v8, v16
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v16, v16, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulhu.vx v16, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v16, v16, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v16
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v16, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v16, v16, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV64-V-NEXT:    ret
   %head = insertelement  undef, i64 -7, i32 0
   %splat = shufflevector  %head,  undef,  zeroinitializer
   %vc = urem  %va, %splat

From 87e68cae50d7ca28975ca6fb456cf0ab2ac915a1 Mon Sep 17 00:00:00 2001
From: David Blaikie 
Date: Tue, 25 Jan 2022 09:55:48 -0800
Subject: [PATCH 582/946] Improve relnotes for the DWARFv5 default change

---
 clang/docs/ReleaseNotes.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd2896de54d5..2272d7197ac57 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -256,9 +256,9 @@ DWARF Support in Clang
 ----------------------
 
 - The default DWARF version has increased from DWARFv4 to DWARFv5.  You can opt
-  back in to the old behavior with -gdwarf-4. Some platforms (Darwin, Android,
-  and SCE for instance) already opt out of this version bump as is suitable for
-  the platform
+  back in to the old behavior with ``-gdwarf-4`` or ``-fdebug-default-version=4``.
+  Some platforms (Darwin, Android, and SCE for instance) already opt out of this
+  version bump as is suitable for the platform
 
 Arm and AArch64 Support in Clang
 --------------------------------

From c2cd7cc63c5084cea943a7cfa7e1d7e5c62a34b2 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere 
Date: Tue, 25 Jan 2022 09:58:36 -0800
Subject: [PATCH 583/946] [lldb] Only include mach headers on Darwin

---
 lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 2e712cded5308..36ef7b520897b 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -9,10 +9,6 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringRef.h"
 
-#include 
-#include 
-#include 
-
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h"
@@ -60,6 +56,9 @@
 #include 
 // GetLLDBSharedCacheUUID() needs to call dlsym()
 #include 
+#include 
+#include 
+#include 
 #endif
 
 #ifndef __APPLE__

From 19d7a0b47b6849923576845f87a3278e012e049b Mon Sep 17 00:00:00 2001
From: Zinovy Nis 
Date: Sat, 15 Jan 2022 16:07:51 +0300
Subject: [PATCH 584/946] [clang-tidy] [bugprone-assert-side-effect] Ignore
 list for functions/methods

A semicolon-separated list of the names of functions or methods to be considered as not having side-effects was added for bugprone-assert-side-effect. It can be used to exclude methods like iterator::begin/end from being considered as having side-effects.

Differential Revision: https://reviews.llvm.org/D116478
---
 .../bugprone/AssertSideEffectCheck.cpp        | 22 ++++++++++++++-----
 .../bugprone/AssertSideEffectCheck.h          |  1 +
 clang-tools-extra/docs/ReleaseNotes.rst       | 12 +++++++---
 .../checks/bugprone-assert-side-effect.rst    | 10 +++++++++
 .../checkers/bugprone-assert-side-effect.cpp  | 18 ++++++++++++++-
 5 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
index 4e2359ff4f67b..eba6b29f56af9 100644
--- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AssertSideEffectCheck.h"
+#include "../utils/Matchers.h"
+#include "../utils/OptionsUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -25,7 +27,9 @@ namespace bugprone {
 
 namespace {
 
-AST_MATCHER_P(Expr, hasSideEffect, bool, CheckFunctionCalls) {
+AST_MATCHER_P2(Expr, hasSideEffect, bool, CheckFunctionCalls,
+               clang::ast_matchers::internal::Matcher,
+               IgnoredFunctionsMatcher) {
   const Expr *E = &Node;
 
   if (const auto *Op = dyn_cast(E)) {
@@ -55,7 +59,8 @@ AST_MATCHER_P(Expr, hasSideEffect, bool, CheckFunctionCalls) {
     bool Result = CheckFunctionCalls;
     if (const auto *FuncDecl = CExpr->getDirectCallee()) {
       if (FuncDecl->getDeclName().isIdentifier() &&
-          FuncDecl->getName() == "__builtin_expect") // exceptions come here
+          IgnoredFunctionsMatcher.matches(*FuncDecl, Finder,
+                                          Builder)) // exceptions come here
         Result = false;
       else if (const auto *MethodDecl = dyn_cast(FuncDecl))
         Result &= !MethodDecl->isConst();
@@ -72,8 +77,9 @@ AssertSideEffectCheck::AssertSideEffectCheck(StringRef Name,
                                              ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       CheckFunctionCalls(Options.get("CheckFunctionCalls", false)),
-      RawAssertList(Options.get("AssertMacros",
-                                "assert,NSAssert,NSCAssert")) {
+      RawAssertList(Options.get("AssertMacros", "assert,NSAssert,NSCAssert")),
+      IgnoredFunctions(utils::options::parseStringList(
+          "__builtin_expect;" + Options.get("IgnoredFunctions", ""))) {
   StringRef(RawAssertList).split(AssertMacros, ",", -1, false);
 }
 
@@ -81,11 +87,17 @@ AssertSideEffectCheck::AssertSideEffectCheck(StringRef Name,
 void AssertSideEffectCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "CheckFunctionCalls", CheckFunctionCalls);
   Options.store(Opts, "AssertMacros", RawAssertList);
+  Options.store(Opts, "IgnoredFunctions",
+                utils::options::serializeStringList(IgnoredFunctions));
 }
 
 void AssertSideEffectCheck::registerMatchers(MatchFinder *Finder) {
+  auto IgnoredFunctionsMatcher =
+      matchers::matchesAnyListedName(IgnoredFunctions);
+
   auto DescendantWithSideEffect =
-      traverse(TK_AsIs, hasDescendant(expr(hasSideEffect(CheckFunctionCalls))));
+      traverse(TK_AsIs, hasDescendant(expr(hasSideEffect(
+                            CheckFunctionCalls, IgnoredFunctionsMatcher))));
   auto ConditionWithSideEffect = hasCondition(DescendantWithSideEffect);
   Finder->addMatcher(
       stmt(
diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h
index 15d1a69cb8cd0..c240f362e71e6 100644
--- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h
@@ -42,6 +42,7 @@ class AssertSideEffectCheck : public ClangTidyCheck {
   const bool CheckFunctionCalls;
   const std::string RawAssertList;
   SmallVector AssertMacros;
+  const std::vector IgnoredFunctions;
 };
 
 } // namespace bugprone
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index cb622f9b09606..0e9491eab9f6a 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -133,7 +133,7 @@ New checks
 - New :doc:`readability-duplicate-include
   ` check.
 
-  Looks for duplicate includes and removes them.  
+  Looks for duplicate includes and removes them.
 
 - New :doc:`readability-identifier-length
   ` check.
@@ -167,7 +167,13 @@ New check aliases
 Changes in existing checks
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+- :doc:`bugprone-assert-side-effect `
+  check now supports an ``IgnoredFunctions`` option to explicitly consider
+  the specified semicolon-separated functions list as not having any
+  side-effects. Regular expressions for the list items are also accepted.
+
 - Removed default setting ``cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors = "true"``,
+  from :doc:`cppcoreguidelines-explicit-virtual-functions `
   to match the current state of the C++ Core Guidelines.
 
 - Removed suggestion ``use gsl::at`` from warning message in the
@@ -185,10 +191,10 @@ Changes in existing checks
 
 - Fixed a false positive in :doc:`bugprone-throw-keyword-missing
   ` when creating an exception object
-  using placement new
+  using placement new.
 
 - :doc:`cppcoreguidelines-narrowing-conversions `
-  check now supports a `WarnOnIntegerToFloatingPointNarrowingConversion`
+  check now supports a ``WarnOnIntegerToFloatingPointNarrowingConversion``
   option to control whether to warn on narrowing integer to floating-point
   conversions.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst
index dc7a3c9a4bd6c..8ba84ff61c6a9 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst
@@ -21,3 +21,13 @@ Options
    Whether to treat non-const member and non-member functions as they produce
    side effects. Disabled by default because it can increase the number of false
    positive warnings.
+
+.. option:: IgnoredFunctions
+
+   A semicolon-separated list of the names of functions or methods to be
+   considered as not having side-effects. Regular expressions are accepted,
+   e.g. `[Rr]ef(erence)?$` matches every type with suffix `Ref`, `ref`,
+   `Reference` and `reference`. The default is empty. If a name in the list
+   contains the sequence `::` it is matched against the qualified typename
+   (i.e. `namespace::Type`, otherwise it is matched against only
+   the type name (i.e. `Type`).
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp
index 85f471f6e9eb1..c327007651d4c 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s bugprone-assert-side-effect %t -- -config="{CheckOptions: [{key: bugprone-assert-side-effect.CheckFunctionCalls, value: true}, {key: bugprone-assert-side-effect.AssertMacros, value: 'assert,assert2,my_assert,convoluted_assert,msvc_assert'}]}" -- -fexceptions
+// RUN: %check_clang_tidy %s bugprone-assert-side-effect %t -- -config="{CheckOptions: [{key: bugprone-assert-side-effect.CheckFunctionCalls, value: true}, {key: bugprone-assert-side-effect.AssertMacros, value: 'assert,assert2,my_assert,convoluted_assert,msvc_assert'}, {key: bugprone-assert-side-effect.IgnoredFunctions, value: 'MyClass::badButIgnoredFunc'}]}" -- -fexceptions
 
 //===--- assert definition block ------------------------------------------===//
 int abort() { return 0; }
@@ -43,9 +43,12 @@ void print(...);
 
 //===----------------------------------------------------------------------===//
 
+bool badButIgnoredFunc(int a, int b) { return a * b > 0; }
+
 class MyClass {
 public:
   bool badFunc(int a, int b) { return a * b > 0; }
+  bool badButIgnoredFunc(int a, int b) { return a * b > 0; }
   bool goodFunc(int a, int b) const { return a * b > 0; }
 
   MyClass &operator=(const MyClass &rhs) { return *this; }
@@ -57,6 +60,11 @@ class MyClass {
   void operator delete(void *p) {}
 };
 
+class SomeoneElseClass {
+public:
+  bool badButIgnoredFunc(int a, int b) { return a * b > 0; }
+};
+
 bool freeFunction() {
   return true;
 }
@@ -85,8 +93,16 @@ int main() {
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
 
   MyClass mc;
+  SomeoneElseClass sec;
   assert(mc.badFunc(0, 1));
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
+  assert(mc.badButIgnoredFunc(0, 1));
+  // badButIgnoredFunc is not ignored as only class members are ignored by the config
+  assert(badButIgnoredFunc(0, 1));
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
+  // sec.badButIgnoredFunc is not ignored as only MyClass members are ignored by the config
+  assert(sec.badButIgnoredFunc(0, 1));
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
   assert(mc.goodFunc(0, 1));
 
   MyClass mc2;

From c415ff186dbb9891886c6ac1f03060ad537e7074 Mon Sep 17 00:00:00 2001
From: David Green 
Date: Tue, 25 Jan 2022 18:10:09 +0000
Subject: [PATCH 585/946] [AArch64] Add extra vecreduce.add tests, including
 extending reductions. NFC

This is all the reductions from i8 -> i64 with either sign or zero
extensions.
---
 llvm/test/CodeGen/AArch64/vecreduce-add.ll | 2095 ++++++++++++++++++++
 1 file changed, 2095 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/vecreduce-add.ll

diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
new file mode 100644
index 0000000000000..06077071468e2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -0,0 +1,2095 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-DOT
+
+define i32 @add_v4i32_v4i32(<4 x i32> %x) {
+; CHECK-LABEL: add_v4i32_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+  ret i32 %z
+}
+
+define i64 @add_v4i32_v4i64_zext(<4 x i32> %x) {
+; CHECK-LABEL: add_v4i32_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) {
+; CHECK-LABEL: add_v4i32_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i32_v2i64_zext(<2 x i32> %x) {
+; CHECK-LABEL: add_v2i32_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i32_v2i64_sext(<2 x i32> %x) {
+; CHECK-LABEL: add_v2i32_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i32 @add_v8i16_v8i32_zext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i16_v4i32_zext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i16_v4i32_sext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define zeroext i16 @add_v8i16_v8i16(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
+  ret i16 %z
+}
+
+define i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i16_v4i64_zext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i16_v4i64_sext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i16_v2i64_zext(<2 x i16> %x) {
+; CHECK-LABEL: add_v2i16_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i16_v2i64_sext(<2 x i16> %x) {
+; CHECK-LABEL: add_v2i16_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #48
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #48
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i32 @add_v16i8_v16i32_zext(<16 x i8> %x) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    uaddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v16i8_v16i32_sext(<16 x i8> %x) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v8i8_v8i32_zext(<8 x i8> %x) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v8i8_v8i32_sext(<8 x i8> %x) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i8_v4i32_zext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i8_v4i32_sext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-NEXT:    uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  ret i16 %z
+}
+
+define signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-NEXT:    saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    smov w0, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  ret i16 %z
+}
+
+define zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  ret i16 %z
+}
+
+define signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    smov w0, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  ret i16 %z
+}
+
+define zeroext i8 @add_v16i8_v16i8(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv b0, v0.16b
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
+  ret i8 %z
+}
+
+define i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v8i8_v8i64_zext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v8i8_v8i64_sext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i8_v4i64_zext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i8_v4i64_sext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v1.2d, v1.2d, #56
+; CHECK-NEXT:    ssra v1.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v1.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i8_v2i64_zext(<2 x i8> %x) {
+; CHECK-LABEL: add_v2i8_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i8_v2i64_sext(<2 x i8> %x) {
+; CHECK-LABEL: add_v2i8_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i64_v2i64(<2 x i64> %x) {
+; CHECK-LABEL: add_v2i64_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
+  ret i64 %z
+}
+
+define i32 @add_v4i32_v4i32_acc(<4 x i32> %x, i32 %a) {
+; CHECK-LABEL: add_v4i32_v4i32_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v4i32_v4i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v2i32_v2i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v8i16_v8i32_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, i16 %a) {
+; CHECK-LABEL: add_v8i16_v8i16_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v8i16_v8i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i16_v4i64_acc_zext(<4 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v4i16_v4i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i16_v4i64_acc_sext(<4 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v4i16_v4i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #48
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #48
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    uaddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v8i8_v8i32_acc_zext(<8 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v8i8_v8i32_acc_sext(<8 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) {
+; CHECK-LABEL: add_v4i8_v4i32_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, i32 %a) {
+; CHECK-LABEL: add_v4i8_v4i32_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-NEXT:    uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-NEXT:    saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, i8 %a) {
+; CHECK-LABEL: add_v16i8_v16i8_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
+  %r = add i8 %z, %a
+  ret i8 %r
+}
+
+define i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v16i8_v16i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v8i8_v8i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v8i8_v8i64_acc_sext(<8 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v8i8_v8i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i8_v4i64_acc_zext(<4 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v4i8_v4i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i8_v4i64_acc_sext(<4 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v4i8_v4i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v1.2d, v1.2d, #56
+; CHECK-NEXT:    ssra v1.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v1.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i64_v2i64_acc(<2 x i64> %x, i64 %a) {
+; CHECK-LABEL: add_v2i64_v2i64_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i32 @add_pair_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_pair_v4i32_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i64 @add_pair_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_pair_v4i32_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i32> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = zext <4 x i32> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_pair_v4i32_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v2.2d, v0.2s, #0
+; CHECK-NEXT:    sshll v3.2d, v1.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i32> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = sext <4 x i32> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: add_pair_v2i32_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i32> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = zext <2 x i32> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: add_pair_v2i32_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i32> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = sext <2 x i32> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i32 @add_pair_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v3.4s, v1.4h, #0
+; CHECK-NEXT:    uaddw2 v0.4s, v2.4s, v0.8h
+; CHECK-NEXT:    uaddw2 v1.4s, v3.4s, v1.8h
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = zext <8 x i16> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v3.4s, v1.4h, #0
+; CHECK-NEXT:    saddw2 v0.4s, v2.4s, v0.8h
+; CHECK-NEXT:    saddw2 v1.4s, v3.4s, v1.8h
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = sext <8 x i16> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = zext <4 x i16> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = sext <4 x i16> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define zeroext i16 @add_pair_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
+  %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %y)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define i64 @add_pair_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    uaddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = zext <8 x i16> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    saddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = sext <8 x i16> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = zext <4 x i16> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    sshll v2.2d, v0.2s, #0
+; CHECK-NEXT:    sshll v3.2d, v1.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = sext <4 x i16> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
+; CHECK-LABEL: add_pair_v2i16_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d2, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i16> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = zext <2 x i16> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %y) {
+; CHECK-LABEL: add_pair_v2i16_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #48
+; CHECK-NEXT:    shl v1.2d, v1.2d, #48
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #48
+; CHECK-NEXT:    ssra v0.2d, v1.2d, #48
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i16> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = sext <2 x i16> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i32 @add_pair_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll2 v2.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll2 v3.8h, v1.16b, #0
+; CHECK-BASE-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    uaddl2 v4.4s, v0.8h, v2.8h
+; CHECK-BASE-NEXT:    uaddl v0.4s, v0.4h, v2.4h
+; CHECK-BASE-NEXT:    uaddl2 v2.4s, v1.8h, v3.8h
+; CHECK-BASE-NEXT:    uaddl v1.4s, v1.4h, v3.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-BASE-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.16b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v3.4s, v1.16b, v2.16b
+; CHECK-DOT-NEXT:    udot v3.4s, v0.16b, v2.16b
+; CHECK-DOT-NEXT:    addv s0, v3.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %yy = zext <16 x i8> %y to <16 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll2 v2.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll2 v3.8h, v1.16b, #0
+; CHECK-BASE-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    saddl2 v4.4s, v0.8h, v2.8h
+; CHECK-BASE-NEXT:    saddl v0.4s, v0.4h, v2.4h
+; CHECK-BASE-NEXT:    saddl2 v2.4s, v1.8h, v3.8h
+; CHECK-BASE-NEXT:    saddl v1.4s, v1.4h, v3.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-BASE-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.16b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v3.4s, v1.16b, v2.16b
+; CHECK-DOT-NEXT:    sdot v3.4s, v0.16b, v2.16b
+; CHECK-DOT-NEXT:    addv s0, v3.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %yy = sext <16 x i8> %y to <16 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    ushll v3.4s, v1.4h, #0
+; CHECK-BASE-NEXT:    uaddw2 v0.4s, v2.4s, v0.8h
+; CHECK-BASE-NEXT:    uaddw2 v1.4s, v3.4s, v1.8h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.8b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v3.2s, v1.8b, v2.8b
+; CHECK-DOT-NEXT:    udot v3.2s, v0.8b, v2.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v3.2s, v3.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = zext <8 x i8> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    sshll v3.4s, v1.4h, #0
+; CHECK-BASE-NEXT:    saddw2 v0.4s, v2.4s, v0.8h
+; CHECK-BASE-NEXT:    saddw2 v1.4s, v3.4s, v1.8h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.8b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v3.2s, v1.8b, v2.8b
+; CHECK-DOT-NEXT:    sdot v3.2s, v0.8b, v2.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v3.2s, v3.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = sext <8 x i8> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    bic v1.4h, #255, lsl #8
+; CHECK-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = zext <4 x i8> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    shl v1.4s, v1.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    ssra v0.4s, v1.4s, #24
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = sext <4 x i8> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define zeroext i16 @add_pair_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v2.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v3.8h, v1.8b, #0
+; CHECK-NEXT:    uaddw2 v0.8h, v2.8h, v0.16b
+; CHECK-NEXT:    uaddw2 v1.8h, v3.8h, v1.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %yy = zext <16 x i8> %y to <16 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v2.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
+; CHECK-NEXT:    saddw2 v0.8h, v2.8h, v0.16b
+; CHECK-NEXT:    saddw2 v1.8h, v3.8h, v1.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %yy = sext <16 x i8> %y to <16 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define zeroext i16 @add_pair_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %yy = zext <8 x i8> %y to <8 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define signext i16 @add_pair_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %yy = sext <8 x i8> %y to <8 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define zeroext i8 @add_pair_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv b0, v0.16b
+; CHECK-NEXT:    addv b1, v1.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
+  %z2 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %y)
+  %z = add i8 %z1, %z2
+  ret i8 %z
+}
+
+define i64 @add_pair_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v2.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v3.8h, v1.16b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ushll2 v4.4s, v2.8h, #0
+; CHECK-NEXT:    ushll v2.4s, v2.4h, #0
+; CHECK-NEXT:    ushll2 v5.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v6.4s, v3.4h, #0
+; CHECK-NEXT:    ushll v7.4s, v1.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v3.8h, #0
+; CHECK-NEXT:    ushll2 v1.4s, v1.8h, #0
+; CHECK-NEXT:    uaddl2 v16.2d, v5.4s, v4.4s
+; CHECK-NEXT:    uaddl v4.2d, v5.2s, v4.2s
+; CHECK-NEXT:    uaddl2 v5.2d, v0.4s, v2.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    uaddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    uaddl2 v3.2d, v7.4s, v6.4s
+; CHECK-NEXT:    uaddl v6.2d, v7.2s, v6.2s
+; CHECK-NEXT:    add v5.2d, v5.2d, v16.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v2.2d, v3.2d, v2.2d
+; CHECK-NEXT:    add v1.2d, v6.2d, v1.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v5.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %yy = zext <16 x i8> %y to <16 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v2.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v3.8h, v1.16b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    sshll2 v4.4s, v2.8h, #0
+; CHECK-NEXT:    sshll v2.4s, v2.4h, #0
+; CHECK-NEXT:    sshll2 v5.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v6.4s, v3.4h, #0
+; CHECK-NEXT:    sshll v7.4s, v1.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v3.8h, #0
+; CHECK-NEXT:    sshll2 v1.4s, v1.8h, #0
+; CHECK-NEXT:    saddl2 v16.2d, v5.4s, v4.4s
+; CHECK-NEXT:    saddl v4.2d, v5.2s, v4.2s
+; CHECK-NEXT:    saddl2 v5.2d, v0.4s, v2.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    saddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    saddl2 v3.2d, v7.4s, v6.4s
+; CHECK-NEXT:    saddl v6.2d, v7.2s, v6.2s
+; CHECK-NEXT:    add v5.2d, v5.2d, v16.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v2.2d, v3.2d, v2.2d
+; CHECK-NEXT:    add v1.2d, v6.2d, v1.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v5.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %yy = sext <16 x i8> %y to <16 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    uaddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = zext <8 x i8> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    saddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = sext <8 x i8> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    bic v1.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = zext <4 x i8> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    ushll2 v1.2d, v1.4s, #0
+; CHECK-NEXT:    shl v2.2d, v2.2d, #56
+; CHECK-NEXT:    shl v3.2d, v3.2d, #56
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    sshr v2.2d, v2.2d, #56
+; CHECK-NEXT:    sshr v3.2d, v3.2d, #56
+; CHECK-NEXT:    ssra v2.2d, v0.2d, #56
+; CHECK-NEXT:    ssra v3.2d, v1.2d, #56
+; CHECK-NEXT:    add v0.2d, v2.2d, v3.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = sext <4 x i8> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: add_pair_v2i8_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d2, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i8> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = zext <2 x i8> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: add_pair_v2i8_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-NEXT:    ssra v0.2d, v1.2d, #56
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i8> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = sext <2 x i8> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: add_pair_v2i64_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %y)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)

From 970a191203e6d3d34c873beb64c333f2890b2025 Mon Sep 17 00:00:00 2001
From: eopXD 
Date: Fri, 21 Jan 2022 11:29:10 -0800
Subject: [PATCH 586/946] [Clang][RISCV] Guard vmulh, vsmul correctly

According to v-spec 1.0, `vmulh`, `vmulhu`, `vmulhsu` and `vsmul` are
NOT supported for EEW=64 in Zve64*.

This patch tries to guard it correctly.

Authored by: Craig Topper  @craig.topper
Co-Authored by: Eop Chen  @eopXD

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117913
---
 clang/include/clang/Basic/riscv_vector.td     |   4 +
 .../rvv-intrinsics-overloaded/vmul-eew64.c    | 440 ++++++++++++++++++
 .../RISCV/rvv-intrinsics-overloaded/vmul.c    | 434 +----------------
 .../rvv-intrinsics-overloaded/vsmul-eew64.c   | 159 +++++++
 .../RISCV/rvv-intrinsics-overloaded/vsmul.c   | 154 +-----
 .../CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c | 440 ++++++++++++++++++
 .../test/CodeGen/RISCV/rvv-intrinsics/vmul.c  | 434 +----------------
 .../RISCV/rvv-intrinsics/vsmul-eew64.c        | 159 +++++++
 .../test/CodeGen/RISCV/rvv-intrinsics/vsmul.c | 154 +-----
 clang/utils/TableGen/RISCVVEmitter.cpp        |  18 +-
 10 files changed, 1219 insertions(+), 1177 deletions(-)
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c

diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index bf268d89d19e1..7e9f610a0b186 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -1668,11 +1668,13 @@ defm vmax : RVVSignedBinBuiltinSet;
 
 // 12.10. Vector Single-Width Integer Multiply Instructions
 defm vmul : RVVIntBinBuiltinSet;
+let RequiredFeatures = ["FullMultiply"] in {
 defm vmulh : RVVSignedBinBuiltinSet;
 defm vmulhu : RVVUnsignedBinBuiltinSet;
 defm vmulhsu : RVVOutOp1BuiltinSet<"vmulhsu", "csil",
                                    [["vv", "v", "vvUv"],
                                     ["vx", "v", "vvUe"]]>;
+}
 
 // 12.11. Vector Integer Divide Instructions
 defm vdivu : RVVUnsignedBinBuiltinSet;
@@ -1759,7 +1761,9 @@ defm vasubu : RVVUnsignedBinBuiltinSet;
 defm vasub : RVVSignedBinBuiltinSet;
 
 // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+let RequiredFeatures = ["FullMultiply"] in {
 defm vsmul : RVVSignedBinBuiltinSet;
+}
 
 // 13.4. Vector Single-Width Scaling Shift Instructions
 defm vssrl : RVVUnsignedShiftBuiltinSet;
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c
new file mode 100644
index 0000000000000..a69e943a6a2bc
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c
@@ -0,0 +1,440 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: This test file contains eew=64 of vmulh, vmulhu, vmulhsu.
+// NOTE: The purpose of separating these 3 instructions from vmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include 
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
index 7d633ecde4265..b08b304c7ef5e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
 
@@ -1120,78 +1120,6 @@ vint32m8_t test_vmulh_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vmulh(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i8.nxv1i8.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1516,78 +1444,6 @@ vuint32m8_t test_vmulhu_vx_u32m8(vuint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhu(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i8.nxv1i8.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1912,78 +1768,6 @@ vint32m8_t test_vmulhsu_vx_i32m8(vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3100,78 +2884,6 @@ vint32m8_t test_vmulh_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t
   return vmulh(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3496,78 +3208,6 @@ vuint32m8_t test_vmulhu_vx_u32m8_m(vbool4_t mask, vuint32m8_t maskedoff, vuint32
   return vmulhu(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3891,75 +3531,3 @@ vint32m8_t test_vmulhsu_vv_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8
 vint32m8_t test_vmulhsu_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c
new file mode 100644
index 0000000000000..dc05b51b2a6aa
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c
@@ -0,0 +1,159 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: The purpose of separating these 3 instructions from vsmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include 
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
index 6c1479515c36f..3905826cb0d80 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
 
@@ -328,78 +328,6 @@ vint32m8_t test_vsmul_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vsmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -762,83 +690,3 @@ vint32m8_t test_vsmul_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff,
                                  vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c
new file mode 100644
index 0000000000000..97686762db666
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c
@@ -0,0 +1,440 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: This test file contains eew=64 of vmulh, vmulhu, vmulhsu.
+// NOTE: The purpose of separating these 3 instructions from vmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include 
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh_vv_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh_vv_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh_vv_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh_vv_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu_vv_u64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu_vv_u64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu_vv_u64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu_vv_u64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu_vv_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu_vv_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu_vv_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu_vv_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu_vv_u64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu_vv_u64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu_vv_u64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu_vv_u64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
index 5d7f7e504290e..c2c4522ca174b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
 
@@ -1120,78 +1120,6 @@ vint32m8_t test_vmulh_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vmulh_vx_i32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh_vv_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh_vv_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh_vv_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh_vv_i64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i8.nxv1i8.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1516,78 +1444,6 @@ vuint32m8_t test_vmulhu_vx_u32m8(vuint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhu_vx_u32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu_vv_u64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu_vv_u64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu_vv_u64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu_vv_u64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i8.nxv1i8.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1912,78 +1768,6 @@ vint32m8_t test_vmulhsu_vx_i32m8(vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu_vx_i32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu_vv_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu_vv_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu_vv_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu_vv_i64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3100,78 +2884,6 @@ vint32m8_t test_vmulh_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t
   return vmulh_vx_i32m8_m(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulh.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3496,78 +3208,6 @@ vuint32m8_t test_vmulhu_vx_u32m8_m(vbool4_t mask, vuint32m8_t maskedoff, vuint32
   return vmulhu_vx_u32m8_m(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu_vv_u64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu_vv_u64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu_vv_u64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu_vv_u64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3891,75 +3531,3 @@ vint32m8_t test_vmulhsu_vv_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8
 vint32m8_t test_vmulhsu_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu_vx_i32m8_m(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c
new file mode 100644
index 0000000000000..43983b15f18d5
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c
@@ -0,0 +1,159 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: The purpose of separating these 3 instructions from vsmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include 
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul_vv_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul_vv_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul_vv_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul_vv_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret  [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
index dd0ba266080a1..e1db9d589ec65 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include 
 
@@ -328,78 +328,6 @@ vint32m8_t test_vsmul_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul_vx_i32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul_vv_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv1i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul_vv_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv2i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul_vv_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv4i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64( [[OP1:%.*]],  [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul_vv_i64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.nxv8i64.i64.i64( [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vsmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -762,83 +690,3 @@ vint32m8_t test_vsmul_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff,
                                  vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul_vx_i32m8_m(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv1i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv2i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv4i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]],  [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call  @llvm.riscv.vsmul.mask.nxv8i64.i64.i64( [[MASKEDOFF:%.*]],  [[OP1:%.*]], i64 [[OP2:%.*]],  [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret  [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 67d946d73e419..4b80d6da72fa8 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -141,11 +141,12 @@ using RISCVPredefinedMacroT = uint8_t;
 
 enum RISCVPredefinedMacro : RISCVPredefinedMacroT {
   Basic = 0,
-  Zfh = 1 << 1,
-  RV64 = 1 << 2,
-  VectorMaxELen64 = 1 << 3,
-  VectorMaxELenFp32 = 1 << 4,
-  VectorMaxELenFp64 = 1 << 5,
+  V = 1 << 1,
+  Zfh = 1 << 2,
+  RV64 = 1 << 3,
+  VectorMaxELen64 = 1 << 4,
+  VectorMaxELenFp32 = 1 << 5,
+  VectorMaxELenFp64 = 1 << 6,
 };
 
 // TODO refactor RVVIntrinsic class design after support all intrinsic
@@ -808,6 +809,11 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
   for (auto Feature : RequiredFeatures) {
     if (Feature == "RV64")
       RISCVPredefinedMacros |= RISCVPredefinedMacro::RV64;
+    // Note: Full multiply instruction (mulh, mulhu, mulhsu, smul) for EEW=64
+    // require V.
+    if (Feature == "FullMultiply" &&
+        (RISCVPredefinedMacros & RISCVPredefinedMacro::VectorMaxELen64))
+      RISCVPredefinedMacros |= RISCVPredefinedMacro::V;
   }
 
   // Init OutputType and InputTypes
@@ -1314,6 +1320,8 @@ bool RVVEmitter::emitMacroRestrictionStr(RISCVPredefinedMacroT PredefinedMacros,
     return false;
   OS << "#if ";
   ListSeparator LS(" && ");
+  if (PredefinedMacros & RISCVPredefinedMacro::V)
+    OS << LS << "defined(__riscv_v)";
   if (PredefinedMacros & RISCVPredefinedMacro::Zfh)
     OS << LS << "defined(__riscv_zfh)";
   if (PredefinedMacros & RISCVPredefinedMacro::RV64)

From ee522345ae80c78f93ef5703b380f8496642c8db Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Tue, 25 Jan 2022 19:23:48 +0100
Subject: [PATCH 587/946] [libc++][doc] Update format implementation status.

---
 libcxx/docs/Status/FormatIssues.csv |  2 +-
 libcxx/docs/Status/FormatPaper.csv  | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv
index b979b0de3a18d..473aba8ea2050 100644
--- a/libcxx/docs/Status/FormatIssues.csv
+++ b/libcxx/docs/Status/FormatIssues.csv
@@ -1,6 +1,6 @@
 Number,Name,Assignee,Patch,Status,First released version
 `P0645 `_,"Text Formatting",Mark de Wever,,|Complete|,Clang 14
-`P1652 `_,"Printf corner cases in std::format",Mark de Wever,"`D103433 `__, `D114001 `__",|Review|,
+`P1652 `_,"Printf corner cases in std::format",Mark de Wever,"`D103433 `__, `D114001 `__",|Complete|,Clang 14
 `P1892 `_,"Extended locale-specific presentation specifiers for std::format",Mark de Wever,`D103368 `__,|Complete|,Clang 14
 `P1868 `_,"width: clarifying units of width and precision in std::format (Implements the unicode support.)",Mark de Wever,"`D103413 `__ `D103425 `__ `D103670 `__",|Complete|,Clang 14
 `P2216 `_,"std::format improvements",Mark de Wever,,|In Progress|,
diff --git a/libcxx/docs/Status/FormatPaper.csv b/libcxx/docs/Status/FormatPaper.csv
index f74ec44e6f607..e45777ef1cd77 100644
--- a/libcxx/docs/Status/FormatPaper.csv
+++ b/libcxx/docs/Status/FormatPaper.csv
@@ -6,21 +6,21 @@ Section,Description,Dependencies,Assignee,Patch,Status,First released version
 `[format.context] `_,"Class template basic_format_context",,Mark de Wever,`D103357 `__,|Complete|,Clang 14
 `[format.args] `_,"Class template basic_format_args",,Mark de Wever,`D103357 `__,|Complete|,Clang 14
 `[format.arg] `_,"Class template basic_format_arg",,Mark de Wever,`D103357 `__,|Complete|,Clang 14
-`[format.arg] `_,"Class template basic_format_arg - handle",,Unassigned,,|Not Started|,
-`[format.arg] `_,"Class template basic_format_arg - pointers",,Mark de Wever,,|In Progress|,
+`[format.arg] `_,"Class template basic_format_arg - handle",,Mark de Wever,,|Complete|,Clang 14
+`[format.arg] `_,"Class template basic_format_arg - pointers",,Mark de Wever,,|Complete|,Clang 14
 `[format.arg.store] `_,"Class template format-arg-store",,Mark de Wever,`D103357 `__,|Complete|,Clang 14
 `[format.formatter.spec] `_,"Formatter specializations - character types",,Mark de Wever,"`D96664 `__ `D103466 `__",|Complete|,Clang 14
 `[format.formatter.spec] `_,"Formatter specializations - string types",,Mark de Wever,"`D96664 `__ `D103425 `__",|Complete|,Clang 14
 `[format.formatter.spec] `_,"Formatter specializations - boolean type",,Mark de Wever,"`D96664 `__ `D103670 `__",|Complete|,Clang 14
 `[format.formatter.spec] `_,"Formatter specializations - integral types",,Mark de Wever,"`D96664 `__ `D103433 `__",|Complete|,Clang 14
-`[format.formatter.spec] `_,"Formatter specializations - floating-point types",`D70631 `__,Mark de Wever,`D114001 `__,|Review|,
-`[format.formatter.spec] `_,"Formatter specializations - pointer types",,Mark de Wever,,|In Progress|,
+`[format.formatter.spec] `_,"Formatter specializations - floating-point types",`D70631 `__,Mark de Wever,`D114001 `__,|Complete|,Clang 14
+`[format.formatter.spec] `_,"Formatter specializations - pointer types",,Mark de Wever,,|Complete|,Clang 14
 `[format.string.std] `_,"Standard format specifiers - character types",,Mark de Wever,`D103368 `__,|Complete|,Clang 14
 `[format.string.std] `_,"Standard format specifiers - string types",`D103379 `__,Mark de Wever,"`D103368 `__ `D103413 `__",|Complete|,Clang 14
 `[format.string.std] `_,"Standard format specifiers - boolean type",`D103379 `__,Mark de Wever,"`D103368 `__ `D103413 `__",|Complete|,Clang 14
 `[format.string.std] `_,"Standard format specifiers - integral types",,Mark de Wever,`D103368 `__,|Complete|,Clang 14
-`[format.string.std] `_,"Standard format specifiers - floating-point types",,Mark de Wever,`D114001 `__,|Review|,
-`[format.string.std] `_,"Standard format specifiers - pointer types",,Mark de Wever,,|In Progress|,
+`[format.string.std] `_,"Standard format specifiers - floating-point types",,Mark de Wever,`D114001 `__,|Complete|,Clang 14
+`[format.string.std] `_,"Standard format specifiers - pointer types",,Mark de Wever,,|Complete|,Clang 14
 `[format.functions] `_,"Format functions - format(string_view fmt, const Args&... args);",,Mark de Wever,`D96664 `__,|Complete|,Clang 14
 `[format.functions] `_,"Format functions - format(wstring_view fmt, const Args&... args);",,Mark de Wever,`D96664 `__,|Complete|,Clang 14
 `[format.functions] `_,"Format functions - format(const locale& loc, string_view fmt, const Args&... args);",,Mark de Wever,`D96664 `__,|Complete|,Clang 14

From 4eb909c884721a0be5078c87d35d550ad31c9fb7 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Tue, 25 Jan 2022 09:53:40 -0500
Subject: [PATCH 588/946] Cleanup header dependencies of
 llvm/Support/Compiler.h

 and  were introduced in aa60b3fd875c3 but the dependency
is now dead.

As a consequence you may need to include  where you use it while it
was auto-included as an implicit dependency before.

The impact on the codebase is small, as  is a very small header
(<100 SLOC) but it gets included everywhere, so that somehow counts (?)
---
 llvm/include/llvm/Support/Compiler.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index b31ba6bc7fc20..f4c277fae7cc2 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -17,9 +17,6 @@
 
 #include "llvm/Config/llvm-config.h"
 
-#ifdef __cplusplus
-#include 
-#endif
 #include 
 
 #if defined(_MSC_VER)

From 4cdc4416903bd4a818d70042d479442725eeebcc Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Tue, 25 Jan 2022 10:29:04 -0800
Subject: [PATCH 589/946] [ELF] Parallelize --compress-debug-sections=zlib

When linking a Debug build clang (265MiB SHF_ALLOC sections, 920MiB uncompressed
debug info), in a --threads=1 link "Compress debug sections" takes 2/3 time and
in a --threads=8 link "Compress debug sections" takes ~70% time.

This patch splits a section into 1MiB shards and calls zlib `deflake` parallelly.

DEFLATE blocks are a bit sequence. We need to ensure every shard starts
at a byte boundary for concatenation. We use Z_SYNC_FLUSH for all shards
but the last to flush the output to a byte boundary. (Z_FULL_FLUSH can
be used as well, but Z_FULL_FLUSH clears the hash table which just
wastes time.)

The last block requires the BFINAL flag. We call deflate with Z_FINISH
to set the flag as well as flush the output to a byte boundary. Under
the hood, all of Z_SYNC_FLUSH, Z_FULL_FLUSH, and Z_FINISH emit a
non-compressed block (called stored block in zlib). RFC1951 says "Any
bits of input up to the next byte boundary are ignored."

In a --threads=8 link, "Compress debug sections" is 5.7x as fast and the total
speed is 2.54x. Because the hash table for one shard is not shared with the next
shard, the output is slightly larger. Better compression ratio can be achieved
by preloading the window size from the previous shard as dictionary
(`deflateSetDictionary`), but that is overkill.

```
# 1MiB shards
% bloaty clang.new -- clang.old
    FILE SIZE        VM SIZE
 --------------  --------------
  +0.3%  +129Ki  [ = ]       0    .debug_str
  +0.1%  +105Ki  [ = ]       0    .debug_info
  +0.3%  +101Ki  [ = ]       0    .debug_line
  +0.2% +2.66Ki  [ = ]       0    .debug_abbrev
  +0.0% +1.19Ki  [ = ]       0    .debug_ranges
  +0.1%  +341Ki  [ = ]       0    TOTAL

# 2MiB shards
% bloaty clang.new -- clang.old
    FILE SIZE        VM SIZE
 --------------  --------------
  +0.2% +74.2Ki  [ = ]       0    .debug_line
  +0.1% +72.3Ki  [ = ]       0    .debug_str
  +0.0% +69.9Ki  [ = ]       0    .debug_info
  +0.1%    +976  [ = ]       0    .debug_abbrev
  +0.0%    +882  [ = ]       0    .debug_ranges
  +0.0%  +218Ki  [ = ]       0    TOTAL
```

Bonus in not using zlib::compress

* we can compress a debug section larger than 4GiB
* peak memory usage is lower because for most shards the output size is less
  than 50% input size (all less than 55% for a large binary I tested, but
  decreasing the initial output size does not decrease memory usage)

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D117853
---
 lld/ELF/CMakeLists.txt     |  5 ++
 lld/ELF/OutputSections.cpp | 99 ++++++++++++++++++++++++++++++++++----
 lld/ELF/OutputSections.h   |  8 ++-
 3 files changed, 101 insertions(+), 11 deletions(-)

diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index f85d0fb9f55e3..b37035d3e7429 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
 tablegen(LLVM Options.inc -gen-opt-parser-defs)
 add_public_tablegen_target(ELFOptionsTableGen)
 
+if(LLVM_ENABLE_ZLIB)
+  set(imported_libs ZLIB::ZLIB)
+endif()
+
 add_lld_library(lldELF
   AArch64ErrataFix.cpp
   Arch/AArch64.cpp
@@ -58,6 +62,7 @@ add_lld_library(lldELF
 
   LINK_LIBS
   lldCommon
+  ${imported_libs}
   ${LLVM_PTHREAD_LIB}
 
   DEPENDS
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 07ee7d84a2cd3..cffde5d61ac91 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -15,7 +15,7 @@
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
 #include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/Support/Compression.h"
+#include "llvm/Config/config.h" // LLVM_ENABLE_ZLIB
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Parallel.h"
@@ -23,6 +23,9 @@
 #include "llvm/Support/TimeProfiler.h"
 #include 
 #include 
+#if LLVM_ENABLE_ZLIB
+#include 
+#endif
 
 using namespace llvm;
 using namespace llvm::dwarf;
@@ -284,13 +287,45 @@ static void fill(uint8_t *buf, size_t size,
   memcpy(buf + i, filler.data(), size - i);
 }
 
+#if LLVM_ENABLE_ZLIB
+static SmallVector deflateShard(ArrayRef in, int level,
+                                            int flush) {
+  // 15 and 8 are default. windowBits=-15 is negative to generate raw deflate
+  // data with no zlib header or trailer.
+  z_stream s = {};
+  deflateInit2(&s, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
+  s.next_in = const_cast(in.data());
+  s.avail_in = in.size();
+
+  // Allocate a buffer of half of the input size, and grow it by 1.5x if
+  // insufficient.
+  SmallVector out;
+  size_t pos = 0;
+  out.resize_for_overwrite(std::max(in.size() / 2, 64));
+  do {
+    if (pos == out.size())
+      out.resize_for_overwrite(out.size() * 3 / 2);
+    s.next_out = out.data() + pos;
+    s.avail_out = out.size() - pos;
+    (void)deflate(&s, flush);
+    pos = s.next_out - out.data();
+  } while (s.avail_out == 0);
+  assert(s.avail_in == 0);
+
+  out.truncate(pos);
+  deflateEnd(&s);
+  return out;
+}
+#endif
+
 // Compress section contents if this section contains debug info.
 template  void OutputSection::maybeCompress() {
+#if LLVM_ENABLE_ZLIB
   using Elf_Chdr = typename ELFT::Chdr;
 
   // Compress only DWARF debug sections.
   if (!config->compressDebugSections || (flags & SHF_ALLOC) ||
-      !name.startswith(".debug_"))
+      !name.startswith(".debug_") || size == 0)
     return;
 
   llvm::TimeTraceScope timeScope("Compress debug sections");
@@ -309,13 +344,42 @@ template  void OutputSection::maybeCompress() {
   // -O2 is given, we use level 6 to compress debug info more by ~15%. We found
   // that level 7 to 9 doesn't make much difference (~1% more compression) while
   // they take significant amount of time (~2x), so level 6 seems enough.
-  if (Error e = zlib::compress(toStringRef(buf), compressedData,
-                               config->optimize >= 2 ? 6 : 1))
-    fatal("compress failed: " + llvm::toString(std::move(e)));
+  const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED;
+
+  // Split input into 1-MiB shards.
+  constexpr size_t shardSize = 1 << 20;
+  const size_t numShards = (size + shardSize - 1) / shardSize;
+  auto shardsIn = std::make_unique[]>(numShards);
+  for (size_t i = 0, start = 0, end; start != buf.size(); ++i, start = end) {
+    end = std::min(start + shardSize, buf.size());
+    shardsIn[i] = makeArrayRef(buf.data() + start, end - start);
+  }
+
+  // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all
+  // shards but the last to flush the output to a byte boundary to be
+  // concatenated with the next shard.
+  auto shardsOut = std::make_unique[]>(numShards);
+  auto shardsAdler = std::make_unique(numShards);
+  parallelForEachN(0, numShards, [&](size_t i) {
+    shardsOut[i] = deflateShard(shardsIn[i], level,
+                                i != numShards - 1 ? Z_SYNC_FLUSH : Z_FINISH);
+    shardsAdler[i] = adler32(1, shardsIn[i].data(), shardsIn[i].size());
+  });
+
+  // Update section size and combine Alder-32 checksums.
+  uint32_t checksum = 1;       // Initial Adler-32 value
+  size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header
+  for (size_t i = 0; i != numShards; ++i) {
+    size += shardsOut[i].size();
+    checksum = adler32_combine(checksum, shardsAdler[i], shardsIn[i].size());
+  }
+  size += 4; // checksum
 
-  // Update section headers.
-  size = sizeof(Elf_Chdr) + compressedData.size();
+  compressed.shards = std::move(shardsOut);
+  compressed.numShards = numShards;
+  compressed.checksum = checksum;
   flags |= SHF_COMPRESSED;
+#endif
 }
 
 static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) {
@@ -339,10 +403,25 @@ template  void OutputSection::writeTo(uint8_t *buf) {
   // If --compress-debug-section is specified and if this is a debug section,
   // we've already compressed section contents. If that's the case,
   // just write it down.
-  if (!compressedData.empty()) {
+  if (compressed.shards) {
     memcpy(buf, zDebugHeader.data(), zDebugHeader.size());
-    memcpy(buf + zDebugHeader.size(), compressedData.data(),
-           compressedData.size());
+    buf += zDebugHeader.size();
+    size -= zDebugHeader.size();
+
+    // Compute shard offsets.
+    auto offsets = std::make_unique(compressed.numShards);
+    offsets[0] = 2; // zlib header
+    for (size_t i = 1; i != compressed.numShards; ++i)
+      offsets[i] = offsets[i - 1] + compressed.shards[i - 1].size();
+
+    buf[0] = 0x78; // CMF
+    buf[1] = 0x01; // FLG: best speed
+    parallelForEachN(0, compressed.numShards, [&](size_t i) {
+      memcpy(buf + offsets[i], compressed.shards[i].data(),
+             compressed.shards[i].size());
+    });
+
+    write32be(buf + size - 4, compressed.checksum);
     return;
   }
 
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index 4f589d8432e43..957e6768ff6ea 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -25,6 +25,12 @@ struct PhdrEntry;
 class InputSection;
 class InputSectionBase;
 
+struct CompressedData {
+  std::unique_ptr[]> shards;
+  uint32_t numShards = 0;
+  uint32_t checksum = 0;
+};
+
 // This represents a section in an output file.
 // It is composed of multiple InputSections.
 // The writer creates multiple OutputSections and assign them unique,
@@ -113,7 +119,7 @@ class OutputSection final : public SectionCommand, public SectionBase {
 private:
   // Used for implementation of --compress-debug-sections option.
   SmallVector zDebugHeader;
-  SmallVector compressedData;
+  CompressedData compressed;
 
   std::array getFiller();
 };

From 93230ac1d2cf32419ce88fdd850f92a02bec5553 Mon Sep 17 00:00:00 2001
From: Casey Carter 
Date: Sat, 1 Jan 2022 21:53:37 -0800
Subject: [PATCH 590/946] [libcxx][test] Use bool allocators for
 vector::get_allocator test

... to be consistent with other `get_allocator` tests, and to avoid requiring `vector>` to be valid.
---
 .../containers/sequences/vector.bool/get_allocator.pass.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp
index 566d1f81bafc7..f518b4601eef3 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp
@@ -20,13 +20,13 @@
 
 int main(int, char**) {
     {
-        std::allocator alloc;
+        std::allocator alloc;
         const std::vector vb(alloc);
         assert(vb.get_allocator() == alloc);
     }
     {
-        other_allocator alloc(1);
-        const std::vector > vb(alloc);
+        other_allocator alloc(1);
+        const std::vector > vb(alloc);
         assert(vb.get_allocator() == alloc);
     }
 

From e5a315f57acf5580aa8819123300d90b4f7a160a Mon Sep 17 00:00:00 2001
From: MaheshRavishankar 
Date: Tue, 25 Jan 2022 10:36:34 -0800
Subject: [PATCH 591/946] [mlir][Linalg] Disallow ops with index semantics in
 `PushExpandingReshape`.

This pattern is not written to handle operations with `linalg.index`
operations in its body, i.e. operations that have index semantics.

Differential Revision: https://reviews.llvm.org/D117856
---
 .../Linalg/Transforms/ElementwiseOpFusion.cpp |  2 +-
 .../Dialect/Linalg/fusion-push-reshape.mlir   | 27 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index be34ef8bbd625..aaa5d4c386208 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -994,7 +994,7 @@ struct PushExpandingReshape : public OpRewritePattern {
   LogicalResult matchAndRewrite(GenericOp genericOp,
                                 PatternRewriter &rewriter) const override {
     // Only apply to elementwise linalg on tensor.
-    if (!genericOp.hasTensorSemantics() ||
+    if (!genericOp.hasTensorSemantics() || genericOp.hasIndexSemantics() ||
         genericOp.getNumParallelLoops() != genericOp.getNumLoops())
       return failure();
     // Only support identity output maps. It could be extended to permuations if
diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
index 9e96c98e7850b..0c02ff8c54d1f 100644
--- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
@@ -124,3 +124,30 @@ func @type_correctness(%arg0 : tensor<6x5xi32>, %arg1 : tensor<5xf32>,
 //  CHECK-SAME:   outs(%{{.+}} : tensor<6x5xf32>)
 //       CHECK:   tensor.expand_shape %[[OP]]
 //  CHECK-SAME:   tensor<6x5xf32> into tensor<2x3x5xf32>
+
+// -----
+
+func @generic_op_index_semantics(%A: tensor, %B: tensor<16xi64>, %init: tensor) -> tensor {
+  %0 = tensor.expand_shape %A [[0, 1], [2]]
+      : tensor into tensor
+  %2 = linalg.generic {indexing_maps = [
+    affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>,
+    affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+    iterator_types = ["parallel", "parallel", "parallel"]}
+  ins(%0, %B : tensor, tensor<16xi64>)
+  outs(%init : tensor) {
+  ^bb0(%arg1: i64, %arg2: i64, %arg3: i64):  // no predecessors
+    %index = linalg.index 0 : index
+    %1 = arith.index_cast %index : index to i64
+    %add = arith.addi %arg1, %1 : i64
+    %s = arith.subi %add, %arg2 : i64
+    linalg.yield %s : i64
+  } -> tensor
+  return %2 : tensor
+}
+//      CHECK: func @generic_op_index_semantics
+// CHECK-SAME:     %[[ARG0:.+]]: tensor
+//      CHECK:   %[[RESHAPE:.+]] = tensor.expand_shape %[[ARG0]]
+//      CHECK:   %[[RESULT:.+]] = linalg.generic
+// CHECK-SAME:       ins(%[[RESHAPE]]
+//      CHECK:   return %[[RESULT]]

From ce5b04cc048aa9b82355bbea21a062462553eb47 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer 
Date: Tue, 25 Jan 2022 19:36:52 +0100
Subject: [PATCH 592/946] [Support] #include  for std::align_val_t

This is only used when aligned new is enabled.
---
 llvm/lib/Support/MemAlloc.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/MemAlloc.cpp b/llvm/lib/Support/MemAlloc.cpp
index 7aaa0dc6e205a..07a26cf26480b 100644
--- a/llvm/lib/Support/MemAlloc.cpp
+++ b/llvm/lib/Support/MemAlloc.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/MemAlloc.h"
+#include 
 
 // These are out of line to have __cpp_aligned_new not affect ABI.
 

From a09be08594a8b980f3cd0ad7e8aea1f9545517ae Mon Sep 17 00:00:00 2001
From: David Tenty 
Date: Mon, 24 Jan 2022 23:20:18 -0500
Subject: [PATCH 593/946] [compiler-rt][profile][AIX] pass extra link opts for
 test

The AIX linker doesn't export any symbols by default, so an export list is usually used. Since clang doesn't have the tools to auto-generate an export list yet, just pass the linker an extra opt to tell it to export everything. This is  generally not recommended for real shared libs, but is fine for the purpose of this test.

Differential Revision: https://reviews.llvm.org/D118101
---
 .../test/profile/Posix/instrprof-get-filename-merge-mode.c | 2 +-
 compiler-rt/test/profile/Posix/lit.local.cfg.py            | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c
index 7e26e3e6b5dd3..afd113c5fa7c5 100644
--- a/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c
+++ b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c
@@ -1,6 +1,6 @@
 // Test __llvm_profile_get_filename when the on-line merging mode is enabled.
 //
-// RUN: %clang_pgogen -fPIC -shared -o %t.dso %p/../Inputs/instrprof-get-filename-dso.c
+// RUN: %clang_pgogen -fPIC -shared %shared_linker_xopts -o %t.dso %p/../Inputs/instrprof-get-filename-dso.c
 // RUN: %clang_pgogen -o %t %s %t.dso
 // RUN: env LLVM_PROFILE_FILE="%t-%m.profraw" %run %t
 
diff --git a/compiler-rt/test/profile/Posix/lit.local.cfg.py b/compiler-rt/test/profile/Posix/lit.local.cfg.py
index 60a9460820a62..eb79b385cc486 100644
--- a/compiler-rt/test/profile/Posix/lit.local.cfg.py
+++ b/compiler-rt/test/profile/Posix/lit.local.cfg.py
@@ -7,3 +7,10 @@ def getRoot(config):
 
 if root.host_os in ['Windows']:
   config.unsupported = True
+
+# AIX usually usually makes use of an explicit export list when linking a shared
+# object, but for the purposes of these tests just export all symbols.
+if root.host_os in ['AIX']:
+  config.substitutions.append(('%shared_linker_xopts', '-Wl,-bexpfull'))
+else:
+  config.substitutions.append(('%shared_linker_xopts', ''))

From ff8a4766ac256e3361ef66c0765ef5b041f4dd5f Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Tue, 25 Jan 2022 19:59:24 +0100
Subject: [PATCH 594/946] [libc++][nfc] Update formatting of some tests.

These tests were formatted with older clang-format settings, this
updates them to the current settings.

In order to implement P2216 a lot of changes to these tests are
required. This makes it easier to review those patches.
---
 .../format.functions/format.locale.pass.cpp   |  22 +-
 .../format/format.functions/format.pass.cpp   |  28 +-
 .../format/format.functions/format_tests.h    | 506 ++++++------------
 .../format_to.locale.pass.cpp                 |  13 +-
 .../format.functions/format_to.pass.cpp       |  13 +-
 .../format_to_n.locale.pass.cpp               |  25 +-
 .../format.functions/format_to_n.pass.cpp     |  22 +-
 .../formatted_size.locale.pass.cpp            |   7 +-
 .../format.functions/formatted_size.pass.cpp  |   7 +-
 .../locale-specific_form.pass.cpp             |  39 +-
 .../format.functions/vformat.locale.pass.cpp  |  13 +-
 .../format/format.functions/vformat.pass.cpp  |  10 +-
 .../vformat_to.locale.pass.cpp                |  29 +-
 .../format.functions/vformat_to.pass.cpp      |  28 +-
 14 files changed, 273 insertions(+), 489 deletions(-)

diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp
index 4da2f2ec6ee7f..197fddaa265f6 100644
--- a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp
@@ -27,33 +27,31 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   std::basic_string out = std::format(std::locale(), fmt, args...);
   if constexpr (std::same_as)
     if (out != expected)
-      std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                << expected << "\nActual output   " << out << '\n';
+      std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                << '\n';
   assert(out == expected);
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::format(std::locale(), fmt, args...);
     if constexpr (std::same_as)
-      std::cerr << "\nFormat string   " << fmt
-                << "\nDidn't throw an exception.\n";
+      std::cerr << "\nFormat string   " << fmt << "\nDidn't throw an exception.\n";
     assert(false);
   } catch (std::format_error& e) {
-#ifdef _LIBCPP_VERSION
+#  ifdef _LIBCPP_VERSION
     if constexpr (std::same_as)
       if (e.what() != what)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected exception "
-                  << what << "\nActual exception   " << e.what() << '\n';
-#endif
+        std::cerr << "\nFormat string   " << fmt << "\nExpected exception " << what << "\nActual exception   "
+                  << e.what() << '\n';
+#  endif
     LIBCPP_ASSERT(e.what() == what);
     return;
   }
diff --git a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp
index 7963b1500f1c5..cf566670501bf 100644
--- a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp
@@ -25,44 +25,42 @@
 #include 
 #include 
 #ifndef _LIBCPP_HAS_NO_LOCALIZATION
-#include 
+#  include 
 #endif
 #include 
 
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   std::basic_string out = std::format(fmt, args...);
 #ifndef _LIBCPP_HAS_NO_LOCALIZATION
   if constexpr (std::same_as)
     if (out != expected)
-      std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                << expected << "\nActual output   " << out << '\n';
+      std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                << '\n';
 #endif
   assert(out == expected);
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::format(fmt, args...);
-#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  ifndef _LIBCPP_HAS_NO_LOCALIZATION
     if constexpr (std::same_as)
-      std::cerr << "\nFormat string   " << fmt
-                << "\nDidn't throw an exception.\n";
-#endif
+      std::cerr << "\nFormat string   " << fmt << "\nDidn't throw an exception.\n";
+#  endif
     assert(false);
   } catch (std::format_error& e) {
-#if defined(_LIBCPP_VERSION) && !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+#  if defined(_LIBCPP_VERSION) && !defined(_LIBCPP_HAS_NO_LOCALIZATION)
     if constexpr (std::same_as)
       if (e.what() != what)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected exception "
-                  << what << "\nActual exception   " << e.what() << '\n';
-#endif
+        std::cerr << "\nFormat string   " << fmt << "\nExpected exception " << what << "\nActual exception   "
+                  << e.what() << '\n';
+#  endif
     LIBCPP_ASSERT(e.what() == what);
     return;
   }
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 8deed4da43635..3b2d0c71f11cb 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -134,10 +134,10 @@ template 
 std::vector> invalid_types(std::string valid) {
   std::vector> result;
 
-#define CASE(T)                                                                \
-  case #T[0]:                                                                  \
-    result.push_back(STR("Invalid formatter type {:" #T "}"));                 \
-    break;
+#define CASE(T)                                                                                                        \
+case #T[0]:                                                                                                            \
+  result.push_back(STR("Invalid formatter type {:" #T "}"));                                                           \
+  break;
 
   for (auto type : "aAbBcdeEfFgGopsxX") {
     if (valid.find(type) != std::string::npos)
@@ -173,18 +173,15 @@ std::vector> invalid_types(std::string valid) {
 }
 
 template 
-void format_test_string(T world, T universe, TestFunction check,
-                        ExceptionTest check_exception) {
+void format_test_string(T world, T universe, TestFunction check, ExceptionTest check_exception) {
 
   // *** Valid input tests ***
   // Unsed argument is ignored. TODO FMT what does the Standard mandate?
   check(STR("hello world"), STR("hello {}"), world, universe);
-  check(STR("hello world and universe"), STR("hello {} and {}"), world,
-        universe);
+  check(STR("hello world and universe"), STR("hello {} and {}"), world, universe);
   check(STR("hello world"), STR("hello {0}"), world, universe);
   check(STR("hello universe"), STR("hello {1}"), world, universe);
-  check(STR("hello universe and world"), STR("hello {1} and {0}"), world,
-        universe);
+  check(STR("hello universe and world"), STR("hello {1} and {0}"), world, universe);
 
   check(STR("hello world"), STR("hello {:_>}"), world);
   check(STR("hello    world"), STR("hello {:>8}"), world);
@@ -225,97 +222,69 @@ void format_test_string(T world, T universe, TestFunction check,
   check(STR("hello uni#####"), STR("hello {:#<8.3s}"), universe);
 
   // *** sign ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("hello {:-}"), world);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:-}"), world);
 
   // *** alternate form ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("hello {:#}"), world);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:#}"), world);
 
   // *** zero-padding ***
-  check_exception("A format-spec width field shouldn't have a leading zero",
-                  STR("hello {:0}"), world);
+  check_exception("A format-spec width field shouldn't have a leading zero", STR("hello {:0}"), world);
 
   // *** width ***
 #ifdef _LIBCPP_VERSION
   // This limit isn't specified in the Standard.
-  static_assert(std::__format::__number_max == 2'147'483'647,
-                "Update the assert and the test.");
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:2147483648}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:5000000000}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:10000000000}"), world);
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  check_exception("The numeric value of the format-spec is too large", STR("{:2147483648}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:5000000000}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:10000000000}"), world);
 #endif
 
-  check_exception(
-      "A format-spec width field replacement should have a positive value",
-      STR("hello {:{}}"), world, 0);
-  check_exception(
-      "A format-spec arg-id replacement shouldn't have a negative value",
-      STR("hello {:{}}"), world, -1);
-  check_exception(
-      "A format-spec arg-id replacement exceeds the maximum supported value",
-      STR("hello {:{}}"), world, unsigned(-1));
+  check_exception("A format-spec width field replacement should have a positive value", STR("hello {:{}}"), world, 0);
+  check_exception("A format-spec arg-id replacement shouldn't have a negative value", STR("hello {:{}}"), world, -1);
+  check_exception("A format-spec arg-id replacement exceeds the maximum supported value", STR("hello {:{}}"), world,
+                  unsigned(-1));
   check_exception("Argument index out of bounds", STR("hello {:{}}"), world);
-  check_exception(
-      "A format-spec arg-id replacement argument isn't an integral type",
-      STR("hello {:{}}"), world, universe);
-  check_exception(
-      "Using manual argument numbering in automatic argument numbering mode",
-      STR("hello {:{0}}"), world, 1);
-  check_exception(
-      "Using automatic argument numbering in manual argument numbering mode",
-      STR("hello {0:{}}"), world, 1);
+  check_exception("A format-spec arg-id replacement argument isn't an integral type", STR("hello {:{}}"), world,
+                  universe);
+  check_exception("Using manual argument numbering in automatic argument numbering mode", STR("hello {:{0}}"), world,
+                  1);
+  check_exception("Using automatic argument numbering in manual argument numbering mode", STR("hello {0:{}}"), world,
+                  1);
   // Arg-id may not have leading zeros.
   check_exception("Invalid arg-id", STR("hello {0:{01}}"), world, 1);
 
   // *** precision ***
 #ifdef _LIBCPP_VERSION
   // This limit isn't specified in the Standard.
-  static_assert(std::__format::__number_max == 2'147'483'647,
-                "Update the assert and the test.");
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:.2147483648}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:.5000000000}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:.10000000000}"), world);
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  check_exception("The numeric value of the format-spec is too large", STR("{:.2147483648}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:.5000000000}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:.10000000000}"), world);
 #endif
 
   // Precision 0 allowed, but not useful for string arguments.
   check(STR("hello "), STR("hello {:.{}}"), world, 0);
   // Precision may have leading zeros. Secondly tests the value is still base 10.
   check(STR("hello 0123456789"), STR("hello {:.000010}"), STR("0123456789abcdef"));
-  check_exception(
-      "A format-spec arg-id replacement shouldn't have a negative value",
-      STR("hello {:.{}}"), world, -1);
-  check_exception(
-      "A format-spec arg-id replacement exceeds the maximum supported value",
-      STR("hello {:.{}}"), world, ~0u);
+  check_exception("A format-spec arg-id replacement shouldn't have a negative value", STR("hello {:.{}}"), world, -1);
+  check_exception("A format-spec arg-id replacement exceeds the maximum supported value", STR("hello {:.{}}"), world,
+                  ~0u);
   check_exception("Argument index out of bounds", STR("hello {:.{}}"), world);
-  check_exception(
-      "A format-spec arg-id replacement argument isn't an integral type",
-      STR("hello {:.{}}"), world, universe);
-  check_exception(
-      "Using manual argument numbering in automatic argument numbering mode",
-      STR("hello {:.{0}}"), world, 1);
-  check_exception(
-      "Using automatic argument numbering in manual argument numbering mode",
-      STR("hello {0:.{}}"), world, 1);
+  check_exception("A format-spec arg-id replacement argument isn't an integral type", STR("hello {:.{}}"), world,
+                  universe);
+  check_exception("Using manual argument numbering in automatic argument numbering mode", STR("hello {:.{0}}"), world,
+                  1);
+  check_exception("Using automatic argument numbering in manual argument numbering mode", STR("hello {0:.{}}"), world,
+                  1);
   // Arg-id may not have leading zeros.
   check_exception("Invalid arg-id", STR("hello {0:.{01}}"), world, 1);
 
   // *** locale-specific form ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("hello {:L}"), world);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:L}"), world);
 
   // *** type ***
   for (const auto& fmt : invalid_types("s"))
-    check_exception(
-        "The format-spec type has a type not supported for a string argument",
-        fmt, world);
+    check_exception("The format-spec type has a type not supported for a string argument", fmt, world);
 }
 
 template 
@@ -364,13 +333,10 @@ void format_string_tests(TestFunction check, ExceptionTest check_exception) {
   // Testing the char const[] is a bit tricky due to array to pointer decay.
   // Since there are separate tests in format.formatter.spec the array is not
   // tested here.
-  format_test_string(world.c_str(), universe.c_str(), check,
+  format_test_string(world.c_str(), universe.c_str(), check, check_exception);
+  format_test_string(const_cast(world.c_str()), const_cast(universe.c_str()), check,
                             check_exception);
-  format_test_string(const_cast(world.c_str()),
-                            const_cast(universe.c_str()), check,
-                            check_exception);
-  format_test_string(std::basic_string_view(world),
-                            std::basic_string_view(universe), check,
+  format_test_string(std::basic_string_view(world), std::basic_string_view(universe), check,
                             check_exception);
   format_test_string(world, universe, check, check_exception);
   format_test_string_unicode(check);
@@ -399,60 +365,41 @@ void format_test_bool(TestFunction check, ExceptionTest check_exception) {
   check(STR("answer is '-false--'"), STR("answer is '{:-^8s}'"), false);
 
   // *** Sign ***
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"),
-                  true);
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"),
-                  true);
-  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"),
-                  true);
-
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:-s}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:+s}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{: s}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), true);
+
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-s}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+s}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: s}"), true);
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#}"), true);
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#s}"), true);
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#}"), true);
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#s}"), true);
 
   // *** zero-padding ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0}"), true);
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0s}"), true);
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0}"), true);
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0s}"), true);
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), true);
-
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.s}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0s}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42s}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), true);
+
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.s}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0s}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42s}"), true);
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types("bBcdosxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a bool argument",
-        fmt, true);
+    check_exception("The format-spec type has a type not supported for a bool argument", fmt, true);
 }
 
 template 
-void format_test_bool_as_char(TestFunction check,
-                              ExceptionTest check_exception) {
+void format_test_bool_as_char(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '\1     '"), STR("answer is '{:6c}'"), true);
   check(STR("answer is '     \1'"), STR("answer is '{:>6c}'"), true);
@@ -463,47 +410,31 @@ void format_test_bool_as_char(TestFunction check,
   check(STR("answer is '\1-----'"), STR("answer is '{:-<6c}'"), true);
   check(STR("answer is '--\1---'"), STR("answer is '{:-^6c}'"), true);
 
-  check(std::basic_string(CSTR("answer is '\0     '"), 18),
-        STR("answer is '{:6c}'"), false);
-  check(std::basic_string(CSTR("answer is '\0     '"), 18),
-        STR("answer is '{:6c}'"), false);
-  check(std::basic_string(CSTR("answer is '     \0'"), 18),
-        STR("answer is '{:>6c}'"), false);
-  check(std::basic_string(CSTR("answer is '\0     '"), 18),
-        STR("answer is '{:<6c}'"), false);
-  check(std::basic_string(CSTR("answer is '  \0   '"), 18),
-        STR("answer is '{:^6c}'"), false);
-
-  check(std::basic_string(CSTR("answer is '-----\0'"), 18),
-        STR("answer is '{:->6c}'"), false);
-  check(std::basic_string(CSTR("answer is '\0-----'"), 18),
-        STR("answer is '{:-<6c}'"), false);
-  check(std::basic_string(CSTR("answer is '--\0---'"), 18),
-        STR("answer is '{:-^6c}'"), false);
+  check(std::basic_string(CSTR("answer is '\0     '"), 18), STR("answer is '{:6c}'"), false);
+  check(std::basic_string(CSTR("answer is '\0     '"), 18), STR("answer is '{:6c}'"), false);
+  check(std::basic_string(CSTR("answer is '     \0'"), 18), STR("answer is '{:>6c}'"), false);
+  check(std::basic_string(CSTR("answer is '\0     '"), 18), STR("answer is '{:<6c}'"), false);
+  check(std::basic_string(CSTR("answer is '  \0   '"), 18), STR("answer is '{:^6c}'"), false);
+
+  check(std::basic_string(CSTR("answer is '-----\0'"), 18), STR("answer is '{:->6c}'"), false);
+  check(std::basic_string(CSTR("answer is '\0-----'"), 18), STR("answer is '{:-<6c}'"), false);
+  check(std::basic_string(CSTR("answer is '--\0---'"), 18), STR("answer is '{:-^6c}'"), false);
 
   // *** Sign ***
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:-c}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:+c}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{: c}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-c}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+c}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: c}"), true);
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#c}"), true);
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#c}"), true);
 
   // *** zero-padding ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0c}"), true);
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0c}"), true);
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.c}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0c}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42c}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), true);
 
   // *** locale-specific form ***
   // Note it has no effect but it's allowed.
@@ -511,14 +442,11 @@ void format_test_bool_as_char(TestFunction check,
 
   // *** type ***
   for (const auto& fmt : invalid_types("bBcdosxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a bool argument",
-        fmt, true);
+    check_exception("The format-spec type has a type not supported for a bool argument", fmt, true);
 }
 
 template 
-void format_test_bool_as_integer(TestFunction check,
-                                 ExceptionTest check_exception) {
+void format_test_bool_as_integer(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '1'"), STR("answer is '{:<1d}'"), true);
   check(STR("answer is '1 '"), STR("answer is '{:<2d}'"), true);
@@ -591,26 +519,20 @@ void format_test_bool_as_integer(TestFunction check,
   check(STR("answer is 0X0000000000"), STR("answer is {:#012X}"), false);
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), true);
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types("bBcdosxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a bool argument",
-        fmt, true);
+    check_exception("The format-spec type has a type not supported for a bool argument", fmt, true);
 }
 
 template 
-void format_test_integer_as_integer(TestFunction check,
-                                    ExceptionTest check_exception) {
+void format_test_integer_as_integer(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '42'"), STR("answer is '{:<1}'"), I(42));
   check(STR("answer is '42'"), STR("answer is '{:<2}'"), I(42));
@@ -729,26 +651,20 @@ void format_test_integer_as_integer(TestFunction check,
   check(STR("answer is +0X00000002A"), STR("answer is {:+#012X}"), I(42));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), I(0));
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for an integer argument",
-        fmt, 42);
+    check_exception("The format-spec type has a type not supported for an integer argument", fmt, 42);
 }
 
 template 
-void format_test_integer_as_char(TestFunction check,
-                                 ExceptionTest check_exception) {
+void format_test_integer_as_char(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '*     '"), STR("answer is '{:6c}'"), I(42));
   check(STR("answer is '     *'"), STR("answer is '{:>6c}'"), I(42));
@@ -761,28 +677,20 @@ void format_test_integer_as_char(TestFunction check,
 
   // *** Sign ***
   check(STR("answer is *"), STR("answer is {:c}"), I(42));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("answer is {:-c}"), I(42));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("answer is {:+c}"), I(42));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("answer is {: c}"), I(42));
+  check_exception("A sign field isn't allowed in this format-spec", STR("answer is {:-c}"), I(42));
+  check_exception("A sign field isn't allowed in this format-spec", STR("answer is {:+c}"), I(42));
+  check_exception("A sign field isn't allowed in this format-spec", STR("answer is {: c}"), I(42));
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("answer is {:#c}"), I(42));
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("answer is {:#c}"), I(42));
 
   // *** zero-padding & width ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("answer is {:01c}"), I(42));
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("answer is {:01c}"), I(42));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.c}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0c}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42c}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), I(0));
 
   // *** locale-specific form ***
   // Note it has no effect but it's allowed.
@@ -790,9 +698,7 @@ void format_test_integer_as_char(TestFunction check,
 
   // *** type ***
   for (const auto& fmt : invalid_types("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for an integer argument",
-        fmt, I(42));
+    check_exception("The format-spec type has a type not supported for an integer argument", fmt, I(42));
 
   // *** Validate range ***
   // TODO FMT Update test after adding 128-bit support.
@@ -800,18 +706,16 @@ void format_test_integer_as_char(TestFunction check,
     // The code has some duplications to keep the if statement readable.
     if constexpr (std::signed_integral) {
       if constexpr (std::signed_integral && sizeof(I) > sizeof(CharT)) {
-        check_exception("Integral value outside the range of the char type",
-                        STR("{:c}"), std::numeric_limits::min());
-        check_exception("Integral value outside the range of the char type",
-                        STR("{:c}"), std::numeric_limits::max());
-      } else if constexpr (std::unsigned_integral &&
-                           sizeof(I) >= sizeof(CharT)) {
-        check_exception("Integral value outside the range of the char type",
-                        STR("{:c}"), std::numeric_limits::max());
+        check_exception("Integral value outside the range of the char type", STR("{:c}"),
+                        std::numeric_limits::min());
+        check_exception("Integral value outside the range of the char type", STR("{:c}"),
+                        std::numeric_limits::max());
+      } else if constexpr (std::unsigned_integral && sizeof(I) >= sizeof(CharT)) {
+        check_exception("Integral value outside the range of the char type", STR("{:c}"),
+                        std::numeric_limits::max());
       }
     } else if constexpr (sizeof(I) > sizeof(CharT)) {
-      check_exception("Integral value outside the range of the char type",
-                      STR("{:c}"), std::numeric_limits::max());
+      check_exception("Integral value outside the range of the char type", STR("{:c}"), std::numeric_limits::max());
     }
   }
 }
@@ -823,8 +727,7 @@ void format_test_integer(TestFunction check, ExceptionTest check_exception) {
 }
 
 template 
-void format_test_signed_integer(TestFunction check,
-                                ExceptionTest check_exception) {
+void format_test_signed_integer(TestFunction check, ExceptionTest check_exception) {
   format_test_integer(check, check_exception);
   format_test_integer(check, check_exception);
   format_test_integer(check, check_exception);
@@ -839,62 +742,49 @@ void format_test_signed_integer(TestFunction check,
   check(STR("-128"), STR("{:#}"), std::numeric_limits::min());
   check(STR("-0x80"), STR("{:#x}"), std::numeric_limits::min());
 
-  check(STR("-0b1000000000000000"), STR("{:#b}"),
-        std::numeric_limits::min());
+  check(STR("-0b1000000000000000"), STR("{:#b}"), std::numeric_limits::min());
   check(STR("-0100000"), STR("{:#o}"), std::numeric_limits::min());
   check(STR("-32768"), STR("{:#}"), std::numeric_limits::min());
   check(STR("-0x8000"), STR("{:#x}"), std::numeric_limits::min());
 
-  check(STR("-0b10000000000000000000000000000000"), STR("{:#b}"),
-        std::numeric_limits::min());
-  check(STR("-020000000000"), STR("{:#o}"),
-        std::numeric_limits::min());
+  check(STR("-0b10000000000000000000000000000000"), STR("{:#b}"), std::numeric_limits::min());
+  check(STR("-020000000000"), STR("{:#o}"), std::numeric_limits::min());
   check(STR("-2147483648"), STR("{:#}"), std::numeric_limits::min());
   check(STR("-0x80000000"), STR("{:#x}"), std::numeric_limits::min());
 
   check(STR("-0b100000000000000000000000000000000000000000000000000000000000000"
             "0"),
         STR("{:#b}"), std::numeric_limits::min());
-  check(STR("-01000000000000000000000"), STR("{:#o}"),
-        std::numeric_limits::min());
-  check(STR("-9223372036854775808"), STR("{:#}"),
-        std::numeric_limits::min());
-  check(STR("-0x8000000000000000"), STR("{:#x}"),
-        std::numeric_limits::min());
+  check(STR("-01000000000000000000000"), STR("{:#o}"), std::numeric_limits::min());
+  check(STR("-9223372036854775808"), STR("{:#}"), std::numeric_limits::min());
+  check(STR("-0x8000000000000000"), STR("{:#x}"), std::numeric_limits::min());
 
   check(STR("0b1111111"), STR("{:#b}"), std::numeric_limits::max());
   check(STR("0177"), STR("{:#o}"), std::numeric_limits::max());
   check(STR("127"), STR("{:#}"), std::numeric_limits::max());
   check(STR("0x7f"), STR("{:#x}"), std::numeric_limits::max());
 
-  check(STR("0b111111111111111"), STR("{:#b}"),
-        std::numeric_limits::max());
+  check(STR("0b111111111111111"), STR("{:#b}"), std::numeric_limits::max());
   check(STR("077777"), STR("{:#o}"), std::numeric_limits::max());
   check(STR("32767"), STR("{:#}"), std::numeric_limits::max());
   check(STR("0x7fff"), STR("{:#x}"), std::numeric_limits::max());
 
-  check(STR("0b1111111111111111111111111111111"), STR("{:#b}"),
-        std::numeric_limits::max());
+  check(STR("0b1111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits::max());
   check(STR("017777777777"), STR("{:#o}"), std::numeric_limits::max());
   check(STR("2147483647"), STR("{:#}"), std::numeric_limits::max());
   check(STR("0x7fffffff"), STR("{:#x}"), std::numeric_limits::max());
 
-  check(
-      STR("0b111111111111111111111111111111111111111111111111111111111111111"),
-      STR("{:#b}"), std::numeric_limits::max());
-  check(STR("0777777777777777777777"), STR("{:#o}"),
-        std::numeric_limits::max());
-  check(STR("9223372036854775807"), STR("{:#}"),
-        std::numeric_limits::max());
-  check(STR("0x7fffffffffffffff"), STR("{:#x}"),
+  check(STR("0b111111111111111111111111111111111111111111111111111111111111111"), STR("{:#b}"),
         std::numeric_limits::max());
+  check(STR("0777777777777777777777"), STR("{:#o}"), std::numeric_limits::max());
+  check(STR("9223372036854775807"), STR("{:#}"), std::numeric_limits::max());
+  check(STR("0x7fffffffffffffff"), STR("{:#x}"), std::numeric_limits::max());
 
   // TODO FMT Add __int128_t test after implementing full range.
 }
 
 template 
-void format_test_unsigned_integer(TestFunction check,
-                                  ExceptionTest check_exception) {
+void format_test_unsigned_integer(TestFunction check, ExceptionTest check_exception) {
   format_test_integer(check, check_exception);
   format_test_integer(check, check_exception);
   format_test_integer(check, check_exception);
@@ -909,28 +799,21 @@ void format_test_unsigned_integer(TestFunction check,
   check(STR("255"), STR("{:#}"), std::numeric_limits::max());
   check(STR("0xff"), STR("{:#x}"), std::numeric_limits::max());
 
-  check(STR("0b1111111111111111"), STR("{:#b}"),
-        std::numeric_limits::max());
+  check(STR("0b1111111111111111"), STR("{:#b}"), std::numeric_limits::max());
   check(STR("0177777"), STR("{:#o}"), std::numeric_limits::max());
   check(STR("65535"), STR("{:#}"), std::numeric_limits::max());
   check(STR("0xffff"), STR("{:#x}"), std::numeric_limits::max());
 
-  check(STR("0b11111111111111111111111111111111"), STR("{:#b}"),
-        std::numeric_limits::max());
-  check(STR("037777777777"), STR("{:#o}"),
-        std::numeric_limits::max());
+  check(STR("0b11111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits::max());
+  check(STR("037777777777"), STR("{:#o}"), std::numeric_limits::max());
   check(STR("4294967295"), STR("{:#}"), std::numeric_limits::max());
   check(STR("0xffffffff"), STR("{:#x}"), std::numeric_limits::max());
 
-  check(
-      STR("0b1111111111111111111111111111111111111111111111111111111111111111"),
-      STR("{:#b}"), std::numeric_limits::max());
-  check(STR("01777777777777777777777"), STR("{:#o}"),
-        std::numeric_limits::max());
-  check(STR("18446744073709551615"), STR("{:#}"),
-        std::numeric_limits::max());
-  check(STR("0xffffffffffffffff"), STR("{:#x}"),
+  check(STR("0b1111111111111111111111111111111111111111111111111111111111111111"), STR("{:#b}"),
         std::numeric_limits::max());
+  check(STR("01777777777777777777777"), STR("{:#o}"), std::numeric_limits::max());
+  check(STR("18446744073709551615"), STR("{:#}"), std::numeric_limits::max());
+  check(STR("0xffffffffffffffff"), STR("{:#x}"), std::numeric_limits::max());
 
   // TODO FMT Add __uint128_t test after implementing full range.
 }
@@ -959,46 +842,30 @@ void format_test_char(TestFunction check, ExceptionTest check_exception) {
   check(STR("answer is '--*---'"), STR("answer is '{:-^6c}'"), CharT('*'));
 
   // *** Sign ***
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"),
-                  CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"),
-                  CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"),
-                  CharT('*'));
-
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:-c}"), CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:+c}"), CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{: c}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), CharT('*'));
+
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-c}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+c}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: c}"), CharT('*'));
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#}"), CharT('*'));
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#c}"), CharT('*'));
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#}"), CharT('*'));
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#c}"), CharT('*'));
 
   // *** zero-padding ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0}"), CharT('*'));
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0c}"), CharT('*'));
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0}"), CharT('*'));
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0c}"), CharT('*'));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), CharT('*'));
-
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.c}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0c}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42c}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), CharT('*'));
+
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), CharT('*'));
 
   // *** locale-specific form ***
   // Note it has no effect but it's allowed.
@@ -1007,14 +874,11 @@ void format_test_char(TestFunction check, ExceptionTest check_exception) {
 
   // *** type ***
   for (const auto& fmt : invalid_types("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a char argument",
-        fmt, CharT('*'));
+    check_exception("The format-spec type has a type not supported for a char argument", fmt, CharT('*'));
 }
 
 template 
-void format_test_char_as_integer(TestFunction check,
-                                 ExceptionTest check_exception) {
+void format_test_char_as_integer(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '42'"), STR("answer is '{:<1d}'"), CharT('*'));
 
@@ -1067,21 +931,16 @@ void format_test_char_as_integer(TestFunction check,
   check(STR("answer is +0X00000002A"), STR("answer is {:+#012X}"), CharT('*'));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.d}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0d}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42d}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.d}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0d}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42d}"), CharT('*'));
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a char argument",
-        fmt, '*');
+    check_exception("The format-spec type has a type not supported for a char argument", fmt, '*');
 }
 
 template 
@@ -2650,42 +2509,34 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
 
   // ** Test invalid format strings ***
   check_exception("The format string terminates at a '{'", STR("{"));
-  check_exception("The replacement field misses a terminating '}'", STR("{:"),
-                  42);
+  check_exception("The replacement field misses a terminating '}'", STR("{:"), 42);
 
-  check_exception("The format string contains an invalid escape sequence",
-                  STR("}"));
-  check_exception("The format string contains an invalid escape sequence",
-                  STR("{:}-}"), 42);
+  check_exception("The format string contains an invalid escape sequence", STR("}"));
+  check_exception("The format string contains an invalid escape sequence", STR("{:}-}"), 42);
 
-  check_exception("The format string contains an invalid escape sequence",
-                  STR("} "));
+  check_exception("The format string contains an invalid escape sequence", STR("} "));
 
-  check_exception(
-      "The arg-id of the format-spec starts with an invalid character",
-      STR("{-"), 42);
+  check_exception("The arg-id of the format-spec starts with an invalid character", STR("{-"), 42);
   check_exception("Argument index out of bounds", STR("hello {}"));
   check_exception("Argument index out of bounds", STR("hello {0}"));
   check_exception("Argument index out of bounds", STR("hello {1}"), 42);
 
   // *** Test char format argument ***
   // The `char` to `wchar_t` formatting is tested separately.
-  check(STR("hello 09azAZ!"), STR("hello {}{}{}{}{}{}{}"), CharT('0'),
-        CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'));
+  check(STR("hello 09azAZ!"), STR("hello {}{}{}{}{}{}{}"), CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'),
+        CharT('Z'), CharT('!'));
 
   format_test_char(check, check_exception);
   format_test_char_as_integer(check, check_exception);
 
   // *** Test string format argument ***
   {
-    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'),
-                      CharT('A'), CharT('Z'), CharT('!'), 0};
+    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'), 0};
     CharT* data = buffer;
     check(STR("hello 09azAZ!"), STR("hello {}"), data);
   }
   {
-    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'),
-                      CharT('A'), CharT('Z'), CharT('!'), 0};
+    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'), 0};
     const CharT* data = buffer;
     check(STR("hello 09azAZ!"), STR("hello {}"), data);
   }
@@ -2718,20 +2569,14 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   {
     // Note 128-bit support is only partly implemented test the range
     // conditions here.
-    std::basic_string min =
-        std::format(STR("{}"), std::numeric_limits::min());
-    check(min, STR("{}"),
-          static_cast<__int128_t>(std::numeric_limits::min()));
-    std::basic_string max =
-        std::format(STR("{}"), std::numeric_limits::max());
-    check(max, STR("{}"),
-          static_cast<__int128_t>(std::numeric_limits::max()));
-    check_exception(
-        "128-bit value is outside of implemented range", STR("{}"),
-        static_cast<__int128_t>(std::numeric_limits::min()) - 1);
-    check_exception(
-        "128-bit value is outside of implemented range", STR("{}"),
-        static_cast<__int128_t>(std::numeric_limits::max()) + 1);
+    std::basic_string min = std::format(STR("{}"), std::numeric_limits::min());
+    check(min, STR("{}"), static_cast<__int128_t>(std::numeric_limits::min()));
+    std::basic_string max = std::format(STR("{}"), std::numeric_limits::max());
+    check(max, STR("{}"), static_cast<__int128_t>(std::numeric_limits::max()));
+    check_exception("128-bit value is outside of implemented range", STR("{}"),
+                    static_cast<__int128_t>(std::numeric_limits::min()) - 1);
+    check_exception("128-bit value is outside of implemented range", STR("{}"),
+                    static_cast<__int128_t>(std::numeric_limits::max()) + 1);
   }
 #endif
   format_test_signed_integer(check, check_exception);
@@ -2747,15 +2592,10 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   {
     // Note 128-bit support is only partly implemented test the range
     // conditions here.
-    std::basic_string max =
-        std::format(STR("{}"), std::numeric_limits::max());
-    check(max, STR("{}"),
-          static_cast<__uint128_t>(
-              std::numeric_limits::max()));
+    std::basic_string max = std::format(STR("{}"), std::numeric_limits::max());
+    check(max, STR("{}"), static_cast<__uint128_t>(std::numeric_limits::max()));
     check_exception("128-bit value is outside of implemented range", STR("{}"),
-                    static_cast<__uint128_t>(
-                        std::numeric_limits::max()) +
-                        1);
+                    static_cast<__uint128_t>(std::numeric_limits::max()) + 1);
   }
 #endif
   format_test_unsigned_integer(check, check_exception);
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp
index 98356fdd8bd66..b403105cb121d 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp
@@ -30,8 +30,7 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   {
     std::basic_string out(expected.size(), CharT(' '));
@@ -42,14 +41,12 @@ auto test = [](std::basic_string expected,
   {
     std::list out;
     std::format_to(std::back_inserter(out), std::locale(), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector out;
     std::format_to(std::back_inserter(out), std::locale(), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
@@ -61,8 +58,8 @@ auto test = [](std::basic_string expected,
   }
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string out;
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp
index d8d24f92d60cc..cf46a0092eb16 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp
@@ -31,8 +31,7 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   {
     std::basic_string out(expected.size(), CharT(' '));
@@ -43,14 +42,12 @@ auto test = [](std::basic_string expected,
   {
     std::list out;
     std::format_to(std::back_inserter(out), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector out;
     std::format_to(std::back_inserter(out), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
@@ -62,8 +59,8 @@ auto test = [](std::basic_string expected,
   }
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string out;
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp
index 206450b53c466..1dc1745a1fd3f 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp
@@ -32,13 +32,11 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   {
     std::list out;
-    std::format_to_n_result result = std::format_to_n(
-        std::back_inserter(out), 0, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 0, std::locale(), fmt, args...);
     // To avoid signedness warnings make sure formatted_size uses the same type
     // as result.size.
     using diff_type = decltype(result.size);
@@ -49,20 +47,17 @@ auto test = [](std::basic_string expected,
   }
   {
     std::vector out;
-    std::format_to_n_result result = std::format_to_n(
-        std::back_inserter(out), 5, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 5, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
     diff_type size = std::min(5, formatted_size);
 
     assert(result.size == formatted_size);
-    assert(std::equal(out.begin(), out.end(), expected.begin(),
-                      expected.begin() + size));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.begin() + size));
   }
   {
     std::basic_string out;
-    std::format_to_n_result result = std::format_to_n(
-        std::back_inserter(out), 1000, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
     diff_type size = std::min(1000, formatted_size);
@@ -73,8 +68,7 @@ auto test = [](std::basic_string expected,
   {
     // Test the returned iterator.
     std::basic_string out(10, CharT(' '));
-    std::format_to_n_result result =
-        std::format_to_n(out.begin(), 10, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(out.begin(), 10, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
     diff_type size = std::min(10, formatted_size);
@@ -88,8 +82,7 @@ auto test = [](std::basic_string expected,
                   "If the difference type isn't negative the test will fail "
                   "due to using a large positive value.");
     CharT buffer[1] = {CharT(0)};
-    std::format_to_n_result result =
-        std::format_to_n(buffer, -1, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(buffer, -1, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
 
@@ -99,8 +92,8 @@ auto test = [](std::basic_string expected,
   }
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string out;
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp
index 6f69af4d99df9..a964fc2f6edf7 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp
@@ -29,13 +29,11 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   {
     std::list out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 0, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 0, fmt, args...);
     // To avoid signedness warnings make sure formatted_size uses the same type
     // as result.size.
     using diff_type = decltype(result.size);
@@ -46,20 +44,17 @@ auto test = [](std::basic_string expected,
   }
   {
     std::vector out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 5, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 5, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min(5, formatted_size);
 
     assert(result.size == formatted_size);
-    assert(std::equal(out.begin(), out.end(), expected.begin(),
-                      expected.begin() + size));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.begin() + size));
   }
   {
     std::basic_string out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min(1000, formatted_size);
@@ -70,8 +65,7 @@ auto test = [](std::basic_string expected,
   {
     // Test the returned iterator.
     std::basic_string out(10, CharT(' '));
-    std::format_to_n_result result =
-        std::format_to_n(out.begin(), 10, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(out.begin(), 10, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min(10, formatted_size);
@@ -95,8 +89,8 @@ auto test = [](std::basic_string expected,
   }
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string out;
diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp
index b31170b62c596..34f13191f17f4 100644
--- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp
@@ -28,15 +28,14 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   size_t size = std::formatted_size(std::locale(), fmt, args...);
   assert(size == expected.size());
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::formatted_size(std::locale(), fmt, args...);
diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp
index befb4ce626109..4171573d1348f 100644
--- a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp
@@ -25,15 +25,14 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   size_t size = std::formatted_size(fmt, args...);
   assert(size == expected.size());
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::formatted_size(fmt, args...);
diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
index 5d86e46be7e6b..6f21454975117 100644
--- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
@@ -126,21 +126,19 @@ struct numpunct : std::numpunct {
 #endif
 
 template 
-void test(std::basic_string expected, std::basic_string fmt,
-          const Args&... args) {
+void test(std::basic_string expected, std::basic_string fmt, const Args&... args) {
   // *** format ***
   {
     std::basic_string out = std::format(fmt, args...);
     if constexpr (std::same_as)
       if (out != expected)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                  << expected << "\nActual output   " << out << '\n';
+        std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                  << '\n';
     assert(out == expected);
   }
   // *** vformat ***
   {
-    std::basic_string out =
-        std::vformat(fmt, std::make_format_args>(args...));
+    std::basic_string out = std::vformat(fmt, std::make_format_args>(args...));
     assert(out == expected);
   }
   // *** format_to ***
@@ -153,16 +151,14 @@ void test(std::basic_string expected, std::basic_string fmt,
   // *** vformat_to ***
   {
     std::basic_string out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), fmt,
-                              std::make_format_args>(args...));
+    auto it = std::vformat_to(out.begin(), fmt, std::make_format_args>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   // *** format_to_n ***
   {
     std::basic_string out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min(1000, formatted_size);
@@ -178,21 +174,19 @@ void test(std::basic_string expected, std::basic_string fmt,
 }
 
 template 
-void test(std::basic_string expected, std::locale loc,
-          std::basic_string fmt, const Args&... args) {
+void test(std::basic_string expected, std::locale loc, std::basic_string fmt, const Args&... args) {
   // *** format ***
   {
     std::basic_string out = std::format(loc, fmt, args...);
     if constexpr (std::same_as)
       if (out != expected)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                  << expected << "\nActual output   " << out << '\n';
+        std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                  << '\n';
     assert(out == expected);
   }
   // *** vformat ***
   {
-    std::basic_string out = std::vformat(
-        loc, fmt, std::make_format_args>(args...));
+    std::basic_string out = std::vformat(loc, fmt, std::make_format_args>(args...));
     assert(out == expected);
   }
   // *** format_to ***
@@ -205,16 +199,14 @@ void test(std::basic_string expected, std::locale loc,
   // *** vformat_to ***
   {
     std::basic_string out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), loc, fmt,
-                              std::make_format_args>(args...));
+    auto it = std::vformat_to(out.begin(), loc, fmt, std::make_format_args>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   // *** format_to_n ***
   {
     std::basic_string out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 1000, loc, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, loc, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(loc, fmt, args...);
     diff_type size = std::min(1000, formatted_size);
@@ -239,13 +231,13 @@ struct numpunct_unicode : std::numpunct {
   string_type do_falsename() const override { return "ungültig"; }
 };
 
-#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
 template <>
 struct numpunct_unicode : std::numpunct {
   string_type do_truename() const override { return L"gültig"; }
   string_type do_falsename() const override { return L"ungültig"; }
 };
-#endif
+#  endif
 #endif // TEST_HAS_NO_UNICODE
 
 template 
@@ -268,8 +260,7 @@ void test_bool() {
   test(STR("false"), std::locale(LOCALE_en_US_UTF_8), STR("{:L}"), false);
 
 #ifndef TEST_HAS_NO_UNICODE
-  std::locale loc_unicode =
-      std::locale(std::locale(), new numpunct_unicode());
+  std::locale loc_unicode = std::locale(std::locale(), new numpunct_unicode());
 
   test(STR("gültig"), loc_unicode, STR("{:L}"), true);
   test(STR("ungültig"), loc_unicode, STR("{:L}"), false);
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp
index c9dbab1002c4e..3bad47da7af95 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp
@@ -24,20 +24,17 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
-  std::basic_string out = std::vformat(
-      std::locale(), fmt, std::make_format_args>(args...));
+  std::basic_string out = std::vformat(std::locale(), fmt, std::make_format_args>(args...));
   assert(out == expected);
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
-    (void) std::vformat(std::locale(), fmt,
-                        std::make_format_args>(args...));
+    (void)std::vformat(std::locale(), fmt, std::make_format_args>(args...));
     assert(false);
   } catch ([[maybe_unused]] std::format_error& e) {
     LIBCPP_ASSERT(e.what() == what);
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp
index 8688d560c41c6..effcd722ba9d4 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp
@@ -23,16 +23,14 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
-  std::basic_string out =
-      std::vformat(fmt, std::make_format_args>(args...));
+  std::basic_string out = std::vformat(fmt, std::make_format_args>(args...));
   assert(out == expected);
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     TEST_IGNORE_NODISCARD std::vformat(fmt, std::make_format_args>(args...));
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp
index 737f618085591..b441656cfc599 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp
@@ -30,49 +30,40 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   {
     std::basic_string out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), std::locale(), fmt,
-                              std::make_format_args>(args...));
+    auto it = std::vformat_to(out.begin(), std::locale(), fmt, std::make_format_args>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   {
     std::list out;
-    std::vformat_to(std::back_inserter(out), std::locale(), fmt,
-                    std::make_format_args>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector out;
-    std::vformat_to(std::back_inserter(out), std::locale(), fmt,
-                    std::make_format_args>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
     CharT out[4096];
-    CharT* it =
-        std::vformat_to(out, std::locale(), fmt,
-                        std::make_format_args>(args...));
+    CharT* it = std::vformat_to(out, std::locale(), fmt, std::make_format_args>(args...));
     assert(std::distance(out, it) == int(expected.size()));
     // Convert to std::string since output contains '\0' for boolean tests.
     assert(std::basic_string(out, it) == expected);
   }
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string out;
-    std::vformat_to(std::back_inserter(out), std::locale(), fmt,
-                    std::make_format_args>(args...));
+    std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args>(args...));
     assert(false);
   } catch ([[maybe_unused]] std::format_error& e) {
     LIBCPP_ASSERT(e.what() == what);
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp
index 0bec98a13b7a2..e6880d780f513 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp
@@ -31,48 +31,40 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = [](std::basic_string expected,
-                                           std::basic_string fmt,
+auto test = [](std::basic_string expected, std::basic_string fmt,
                                            const Args&... args) {
   {
     std::basic_string out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), fmt,
-                              std::make_format_args>(args...));
+    auto it = std::vformat_to(out.begin(), fmt, std::make_format_args>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   {
     std::list out;
-    std::vformat_to(std::back_inserter(out), fmt,
-                    std::make_format_args>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), fmt, std::make_format_args>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector out;
-    std::vformat_to(std::back_inserter(out), fmt,
-                    std::make_format_args>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), fmt, std::make_format_args>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
     CharT out[4096];
-    CharT* it = std::vformat_to(
-        out, fmt, std::make_format_args>(args...));
+    CharT* it = std::vformat_to(out, fmt, std::make_format_args>(args...));
     assert(std::distance(out, it) == int(expected.size()));
     // Convert to std::string since output contains '\0' for boolean tests.
     assert(std::basic_string(out, it) == expected);
   }
 };
 
-auto test_exception = [](
-    std::string_view what, std::basic_string fmt, const Args&... args) {
+auto test_exception =
+    [](std::string_view what, std::basic_string fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string out;
-    std::vformat_to(std::back_inserter(out), fmt,
-                    std::make_format_args>(args...));
+    std::vformat_to(std::back_inserter(out), fmt, std::make_format_args>(args...));
     assert(false);
   } catch ([[maybe_unused]] std::format_error& e) {
     LIBCPP_ASSERT(e.what() == what);

From 9c2891a8eddb7380a2809af297789eb07713df19 Mon Sep 17 00:00:00 2001
From: Ellis Hoag 
Date: Mon, 24 Jan 2022 20:08:35 -0800
Subject: [PATCH 595/946] [InstrProf][correlation] Read DWARFv5 `OP_addrx`
 location

Correctly read `OP_addrx` type encodings for DWARFv5 locations.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D118098
---
 .../profile/Linux/instrprof-debug-info-correlate.c | 14 ++++++++++----
 llvm/lib/ProfileData/InstrProfCorrelator.cpp       | 12 +++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
index 9aa24d72376ca..4f46586a16999 100644
--- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
+++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
@@ -1,12 +1,18 @@
 // REQUIRES: zlib
 
 // Value profiling is currently not supported in lightweight mode.
-// RUN: %clang_pgogen -o %t -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
-// RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
-// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
-
 // RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal
 // RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw
 
+// RUN: %clang_pgogen -o %t.d4 -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: env LLVM_PROFILE_FILE=%t.d4.proflite %run %t.d4
+// RUN: llvm-profdata merge -o %t.d4.profdata --debug-info=%t.d4 %t.d4.proflite
+
+// RUN: diff %t.normal.profdata %t.d4.profdata
+
+// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
+// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
+
 // RUN: diff %t.normal.profdata %t.profdata
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 8be2cbff3a20c..5f06541916ddd 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -167,13 +167,19 @@ DwarfInstrProfCorrelator::getLocation(const DWARFDie &Die) const {
     return {};
   }
   auto &DU = *Die.getDwarfUnit();
+  auto AddressSize = DU.getAddressByteSize();
   for (auto &Location : *Locations) {
-    auto AddressSize = DU.getAddressByteSize();
     DataExtractor Data(Location.Expr, DICtx->isLittleEndian(), AddressSize);
     DWARFExpression Expr(Data, AddressSize);
-    for (auto &Op : Expr)
-      if (Op.getCode() == dwarf::DW_OP_addr)
+    for (auto &Op : Expr) {
+      if (Op.getCode() == dwarf::DW_OP_addr) {
         return Op.getRawOperand(0);
+      } else if (Op.getCode() == dwarf::DW_OP_addrx) {
+        uint64_t Index = Op.getRawOperand(0);
+        if (auto SA = DU.getAddrOffsetSectionItem(Index))
+          return SA->Address;
+      }
+    }
   }
   return {};
 }

From 4be86d18c0fc97a5eab26628e2a830f914c591eb Mon Sep 17 00:00:00 2001
From: Andrew Litteken 
Date: Wed, 22 Dec 2021 17:42:10 -0600
Subject: [PATCH 596/946] [IROutliner] Disallow outlining calls that return
 twice.

Functions that return twice can cause the IR Outliner to miscompile the given program. These function rely on information about the stack to be the same, and this may not necessarily be the case if called from an outlined function. So, we simply call these instructions illegal for the outliner to remove.

Reviewers: paquette

Differential Revision: https://reviews.llvm.org/D110007
---
 llvm/include/llvm/Transforms/IPO/IROutliner.h |  5 ++
 .../IROutliner/illegal-returns-twice.ll       | 66 +++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 llvm/test/Transforms/IROutliner/illegal-returns-twice.ll

diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index dcae4454f8281..16f597ab898e5 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -367,6 +367,11 @@ class IROutliner {
       Function *F = CI.getCalledFunction();
       if (!F || CI.isIndirectCall() || !F->hasName())
         return false;
+      // Returning twice can cause issues with the state of the function call
+      // that were not expected when the function was used, so we do not include
+      // the call in outlined functions.
+      if (CI.canReturnTwice())
+        return false;
       return true;
     }
     // TODO: Handle FreezeInsts.  Since a frozen value could be frozen inside
diff --git a/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll b/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll
new file mode 100644
index 0000000000000..31113ab5cd644
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we do not outline functions that are marked as returns
+; twice, since these can alter the frame of the function and affect how the
+; outliner behaves, causing miscompiles.
+
+; Function Attrs: optsize returns_twice
+declare i32 @setjmp(i32*) local_unnamed_addr #1
+@tmp_jmpb = global [37 x i32] zeroinitializer, align 16
+
+define void @function1() {
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %call = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @function2() {
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %call = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+attributes #1 = { optsize returns_twice }

From c39d22d1968cf07e54b5816ba76dccef8acaace1 Mon Sep 17 00:00:00 2001
From: Petr Hosek 
Date: Tue, 25 Jan 2022 09:37:14 -0800
Subject: [PATCH 597/946] [CMake] Set sanitizer test C++ library on Linux

We always want to use the in-tree libc++ for tests.

Differential Revision: https://reviews.llvm.org/D118161
---
 clang/cmake/caches/Fuchsia-stage2.cmake | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index aed39d0ca9fff..d998db64d87ab 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -134,6 +134,8 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
     set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS OFF CACHE BOOL "")
     set(RUNTIMES_${target}_SANITIZER_CXX_ABI "libc++" CACHE STRING "")
     set(RUNTIMES_${target}_SANITIZER_CXX_ABI_INTREE ON CACHE BOOL "")
+    set(RUNTIMES_${target}_SANITIZER_TEST_CXX "libc++" CACHE STRING "")
+    set(RUNTIMES_${target}_SANITIZER_TEST_CXX_INTREE ON CACHE BOOL "")
     set(RUNTIMES_${target}_COMPILER_RT_TEST_COMPILER_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "")
     set(RUNTIMES_${target}_SANITIZER_COMMON_TEST_TARGET_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "")
     set(RUNTIMES_${target}_TSAN_TEST_TARGET_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "")

From 0ad19a833177861be55fefaff725ab89c8695d01 Mon Sep 17 00:00:00 2001
From: JackAKirk 
Date: Mon, 24 Jan 2022 12:32:36 -0800
Subject: [PATCH 598/946] [CUDA,NVPTX] Corrected fragment size for tf32 LD B
 matrix.

Signed-off-by: JackAKirk 

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D118023
---
 clang/lib/CodeGen/CGBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cd35e7cbe76f7..a80a55e054a3b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17190,7 +17190,7 @@ static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
   case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
     return MMA_LDST(4, m16n16k8_load_a_tf32);
   case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
-    return MMA_LDST(2, m16n16k8_load_b_tf32);
+    return MMA_LDST(4, m16n16k8_load_b_tf32);
   case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
     return MMA_LDST(8, m16n16k8_load_c_f32);
 

From ea1ac183f47ebc03051e63f76c4cb3ad3c5b9f3a Mon Sep 17 00:00:00 2001
From: MaheshRavishankar 
Date: Tue, 25 Jan 2022 10:58:53 -0800
Subject: [PATCH 599/946] [mlir][Linalg] Fix incorrect fusion with reshape ops
 by linearization.

Fusion of reshape ops by linearization incorrectly inverted the
indexing map before linearizing dimensions. This leads to incorrect
indexing maps used in the fused operation.

Differential Revision: https://reviews.llvm.org/D117908
---
 .../Linalg/Transforms/ElementwiseOpFusion.cpp | 12 ++--
 .../Linalg/reshape_linearization_fusion.mlir  | 58 +++++++++++++++++++
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index aaa5d4c386208..6532343830255 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -915,11 +915,9 @@ struct FoldProducerReshapeOpByLinearization
       // the operands of the consumers that arent fused are the same.
       SmallVector fusedIndexMaps = genericOp.getIndexingMaps();
 
-      // Accepted consumer maps are either identity or permutation.
-      auto invMap = inversePermutation(fusedIndexMaps[en.index()]);
-
       // Compute the indexing map to use for the result of the producer.
-      AffineMap modifiedMap = linearizeCollapsedDims(invMap, reshapeOp);
+      AffineMap modifiedMap =
+          linearizeCollapsedDims(fusedIndexMaps[en.index()], reshapeOp);
       // The modified map cannot have symbols.
       if (modifiedMap.getNumSymbols())
         return failure();
@@ -1166,11 +1164,9 @@ struct FoldConsumerReshapeOpByLinearization
     // those for the operands of the producer.
     SmallVector fusedIndexMaps = producer.getIndexingMaps();
 
-    auto invMap = inversePermutation(
-        producer.getTiedIndexingMap(producer.getOutputOperand(0)));
-
     // Compute the indexing map to use for the operand of the producer.
-    AffineMap modifiedMap = linearizeCollapsedDims(invMap, reshapeOp);
+    AffineMap modifiedMap = linearizeCollapsedDims(
+        producer.getTiedIndexingMap(producer.getOutputOperand(0)), reshapeOp);
     for (AffineExpr expr : modifiedMap.getResults()) {
       if (!expr.isPureAffine()) {
         return rewriter.notifyMatchFailure(
diff --git a/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir
index e2ec8ffa5c3c7..37edb04a7f587 100644
--- a/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir
@@ -1,5 +1,11 @@
 // RUN: mlir-opt -split-input-file -linalg-fold-reshape-ops-by-linearization %s | FileCheck %s
 
+// Note: These tests fuse the reshape ops by linearization. This can create
+// indexing maps which are hard to analyse later on. These patterns are useful
+// only if the folded dimensions in the reshape op are unit extent. Tests here
+// are more general for testing purposes, but use of these pattern for non-unit
+// dimensions should be deprecated.
+
 #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 func @generic_op_reshape_producer_fusion(%arg0 : tensor)
   -> tensor {
@@ -227,3 +233,55 @@ func @generic_op_permultation_reshape_consumer_fusion_unused_dim(%arg0 : tensor<
 //  CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP1]]]
 //  CHECK-SAME:     ins(%[[ARG0]] : tensor<6x1xf32>)
 //  CHECK-SAME:     outs(%[[T1]] : tensor<6xi32>)
+
+// -----
+
+#map0 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d4, d0, d6, d3, d5, d1)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+func @permuted_dims_fusion_expand_shape(%arg0 : tensor<3x8x7x240xf32>) -> tensor<4x6x3x8x2x5x7xf32> {
+  %0 = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6]]
+      : tensor<3x8x7x240xf32> into tensor<3x2x4x7x8x5x6xf32>
+  %1 = linalg.init_tensor [4, 6, 3, 8, 2, 5, 7] : tensor<4x6x3x8x2x5x7xf32>
+  %2 = linalg.generic {
+      indexing_maps = [#map0, #map1],
+      iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
+      ins(%0 : tensor<3x2x4x7x8x5x6xf32>) outs(%1 : tensor<4x6x3x8x2x5x7xf32>) {
+      ^bb0(%arg1 : f32, %arg2 : f32):
+        linalg.yield %arg1 : f32
+      } -> tensor<4x6x3x8x2x5x7xf32>
+  return %2 : tensor<4x6x3x8x2x5x7xf32>
+}
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d0 + d4 * 4, d6, d1 + d3 * 30 + d5 * 6)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+//      CHECK: func @permuted_dims_fusion_expand_shape(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<3x8x7x240xf32>)
+//      CHECK:   %[[RESULT:.+]] = linalg.generic
+// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
+// CHECK-SAME:       ins(%[[ARG0]] :
+//      CHECK:   return %[[RESULT]]
+
+// -----
+
+#map0 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d4, d0, d6, d3, d5, d1)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+func @permuted_dims_fusion_collapse_shape(%arg0 : tensor<4x6x3x8x2x5x7xf32>) -> tensor<3x8x7x240xf32> {
+  %0 = linalg.init_tensor [3, 2, 4, 7, 8, 5, 6] : tensor<3x2x4x7x8x5x6xf32>
+  %1 = linalg.generic {
+      indexing_maps = [#map1, #map0],
+      iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
+      ins(%arg0 : tensor<4x6x3x8x2x5x7xf32>) outs(%0 : tensor<3x2x4x7x8x5x6xf32>) {
+      ^bb0(%arg1 : f32, %arg2 : f32):
+        linalg.yield %arg1 : f32
+      } -> tensor<3x2x4x7x8x5x6xf32>
+  %2 = tensor.collapse_shape %1 [[0], [1, 2], [3], [4, 5, 6]]
+      : tensor<3x2x4x7x8x5x6xf32> into tensor<3x8x7x240xf32>
+  return %2 : tensor<3x8x7x240xf32>
+}
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d0 + d4 * 4, d6, d1 + d3 * 30 + d5 * 6)>
+//      CHECK: func @permuted_dims_fusion_collapse_shape(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<4x6x3x8x2x5x7xf32>)
+//      CHECK:   %[[RESULT:.+]] = linalg.generic
+// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
+// CHECK-SAME:       ins(%[[ARG0]] :
+//      CHECK:   return %[[RESULT]]

From 0944c196c58f62299540983e2d10cd7bef60691a Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum 
Date: Wed, 29 Dec 2021 15:46:43 +0000
Subject: [PATCH 600/946] [libTooling] Adds more support for constructing
 object access expressions.

This patch adds a `buildAccess` function, which constructs a string with the
proper operator to use based on the expression's form and type. It also adds two
predicates related to smart pointers, which are needed by `buildAccess` but are
also of general value.

We deprecate `buildDot` and `buildArrow` in favor of the more general
`buildAccess`. These will be removed in a future patch.

Differential Revision: https://reviews.llvm.org/D116377
---
 .../Tooling/Transformer/SourceCodeBuilders.h  |  35 ++++
 .../Transformer/SourceCodeBuilders.cpp        |  83 +++++++-
 clang/lib/Tooling/Transformer/Stencil.cpp     |  72 +------
 .../Tooling/SourceCodeBuildersTest.cpp        | 191 +++++++++++++++++-
 clang/unittests/Tooling/StencilTest.cpp       |  42 ++--
 5 files changed, 323 insertions(+), 100 deletions(-)

diff --git a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h
index b6d9bd0e2d5d3..ab0eb71ef44e2 100644
--- a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h
+++ b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h
@@ -43,6 +43,15 @@ inline bool needParensBeforeDotOrArrow(const Expr &E) {
 /// Determines whether printing this expression to the right of a unary operator
 /// requires a parentheses to preserve its meaning.
 bool needParensAfterUnaryOperator(const Expr &E);
+
+// Recognizes known types (and sugared versions thereof) that overload the `*`
+// and `->` operator. Below is the list of currently included types, but it is
+// subject to change:
+//
+// * std::unique_ptr, std::shared_ptr, std::weak_ptr,
+// * std::optional, absl::optional, llvm::Optional,
+// * absl::StatusOr, llvm::Expected.
+bool isKnownPointerLikeType(QualType Ty, ASTContext &Context);
 /// @}
 
 /// \name Basic code-string generation utilities.
@@ -69,6 +78,8 @@ llvm::Optional buildAddressOf(const Expr &E,
 ///  `x` becomes `x.`
 ///  `*a` becomes `a->`
 ///  `a+b` becomes `(a+b).`
+///
+/// DEPRECATED. Use `buildAccess`.
 llvm::Optional buildDot(const Expr &E, const ASTContext &Context);
 
 /// Adds an arrow to the end of the given expression, but adds parentheses
@@ -77,8 +88,32 @@ llvm::Optional buildDot(const Expr &E, const ASTContext &Context);
 ///  `x` becomes `x->`
 ///  `&a` becomes `a.`
 ///  `a+b` becomes `(a+b)->`
+///
+/// DEPRECATED. Use `buildAccess`.
 llvm::Optional buildArrow(const Expr &E,
                                        const ASTContext &Context);
+
+/// Specifies how to classify pointer-like types -- like values or like pointers
+/// -- with regard to generating member-access syntax.
+enum class PLTClass : bool {
+  Value,
+  Pointer,
+};
+
+/// Adds an appropriate access operator (`.`, `->` or nothing, in the case of
+/// implicit `this`) to the end of the given expression. Adds parentheses when
+/// needed by the syntax and simplifies when possible. If `PLTypeClass` is
+/// `Pointer`, for known pointer-like types (see `isKnownPointerLikeType`),
+/// treats `operator->` and `operator*` like the built-in `->` and `*`
+/// operators.
+///
+///  `x` becomes `x->` or `x.`, depending on `E`'s type
+///  `a+b` becomes `(a+b)->` or `(a+b).`, depending on `E`'s type
+///  `&a` becomes `a.`
+///  `*a` becomes `a->`
+llvm::Optional
+buildAccess(const Expr &E, ASTContext &Context,
+            PLTClass Classification = PLTClass::Pointer);
 /// @}
 
 } // namespace tooling
diff --git a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
index a1c99b60216b7..65ade4387a5eb 100644
--- a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
@@ -10,6 +10,8 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Tooling/Transformer/SourceCode.h"
 #include "llvm/ADT/Twine.h"
 #include 
@@ -60,6 +62,16 @@ bool tooling::needParensAfterUnaryOperator(const Expr &E) {
   return false;
 }
 
+bool tooling::isKnownPointerLikeType(QualType Ty, ASTContext &Context) {
+  using namespace ast_matchers;
+  const auto PointerLikeTy = type(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(cxxRecordDecl(hasAnyName(
+          "::std::unique_ptr", "::std::shared_ptr", "::std::weak_ptr",
+          "::std::optional", "::absl::optional", "::llvm::Optional",
+          "absl::StatusOr", "::llvm::Expected"))))));
+  return match(PointerLikeTy, Ty, Context).size() > 0;
+}
+
 llvm::Optional tooling::buildParens(const Expr &E,
                                                  const ASTContext &Context) {
   StringRef Text = getText(E, Context);
@@ -114,8 +126,10 @@ llvm::Optional tooling::buildAddressOf(const Expr &E,
   return ("&" + Text).str();
 }
 
-llvm::Optional tooling::buildDot(const Expr &E,
-                                              const ASTContext &Context) {
+// Append the appropriate access operation (syntactically) to `E`, assuming `E`
+// is a non-pointer value.
+static llvm::Optional
+buildAccessForValue(const Expr &E, const ASTContext &Context) {
   if (const auto *Op = llvm::dyn_cast(&E))
     if (Op->getOpcode() == UO_Deref) {
       // Strip leading '*', add following '->'.
@@ -138,8 +152,10 @@ llvm::Optional tooling::buildDot(const Expr &E,
   return (Text + ".").str();
 }
 
-llvm::Optional tooling::buildArrow(const Expr &E,
-                                                const ASTContext &Context) {
+// Append the appropriate access operation (syntactically) to `E`, assuming `E`
+// is a pointer value.
+static llvm::Optional
+buildAccessForPointer(const Expr &E, const ASTContext &Context) {
   if (const auto *Op = llvm::dyn_cast(&E))
     if (Op->getOpcode() == UO_AddrOf) {
       // Strip leading '&', add following '.'.
@@ -160,3 +176,62 @@ llvm::Optional tooling::buildArrow(const Expr &E,
     return ("(" + Text + ")->").str();
   return (Text + "->").str();
 }
+
+llvm::Optional tooling::buildDot(const Expr &E,
+                                              const ASTContext &Context) {
+  return buildAccessForValue(E, Context);
+}
+
+llvm::Optional tooling::buildArrow(const Expr &E,
+                                                const ASTContext &Context) {
+  return buildAccessForPointer(E, Context);
+}
+
+// If `E` is an overloaded-operator call of kind `K` on an object `O`, returns
+// `O`. Otherwise, returns `nullptr`.
+static const Expr *maybeGetOperatorObjectArg(const Expr &E,
+                                             OverloadedOperatorKind K) {
+  if (const auto *OpCall = dyn_cast(&E)) {
+    if (OpCall->getOperator() == K && OpCall->getNumArgs() == 1)
+      return OpCall->getArg(0);
+  }
+  return nullptr;
+}
+
+static bool treatLikePointer(QualType Ty, PLTClass C, ASTContext &Context) {
+  switch (C) {
+  case PLTClass::Value:
+    return false;
+  case PLTClass::Pointer:
+    return isKnownPointerLikeType(Ty, Context);
+  }
+}
+
+// FIXME: move over the other `maybe` functionality from Stencil. Should all be
+// in one place.
+llvm::Optional tooling::buildAccess(const Expr &RawExpression,
+                                                 ASTContext &Context,
+                                                 PLTClass Classification) {
+  if (RawExpression.isImplicitCXXThis())
+    // Return the empty string, because `None` signifies some sort of failure.
+    return std::string();
+
+  const Expr *E = RawExpression.IgnoreImplicitAsWritten();
+
+  if (E->getType()->isAnyPointerType() ||
+      treatLikePointer(E->getType(), Classification, Context)) {
+    // Strip off operator-> calls. They can only occur inside an actual arrow
+    // member access, so we treat them as equivalent to an actual object
+    // expression.
+    if (const auto *Obj = maybeGetOperatorObjectArg(*E, clang::OO_Arrow))
+      E = Obj;
+    return buildAccessForPointer(*E, Context);
+  }
+
+  if (const auto *Obj = maybeGetOperatorObjectArg(*E, clang::OO_Star)) {
+    if (treatLikePointer(Obj->getType(), Classification, Context))
+      return buildAccessForPointer(*Obj, Context);
+  };
+
+  return buildAccessForValue(*E, Context);
+}
diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp
index 8b20ef34c3ff2..348d04dbaf4a3 100644
--- a/clang/lib/Tooling/Transformer/Stencil.cpp
+++ b/clang/lib/Tooling/Transformer/Stencil.cpp
@@ -11,7 +11,6 @@
 #include "clang/AST/ASTTypeTraits.h"
 #include "clang/AST/Expr.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Tooling/Transformer/SourceCode.h"
@@ -56,39 +55,6 @@ static Error printNode(StringRef Id, const MatchFinder::MatchResult &Match,
   return Error::success();
 }
 
-// FIXME: Consider memoizing this function using the `ASTContext`.
-static bool isSmartPointerType(QualType Ty, ASTContext &Context) {
-  using namespace ::clang::ast_matchers;
-
-  // Optimization: hard-code common smart-pointer types. This can/should be
-  // removed if we start caching the results of this function.
-  auto KnownSmartPointer =
-      cxxRecordDecl(hasAnyName("::std::unique_ptr", "::std::shared_ptr"));
-  const auto QuacksLikeASmartPointer = cxxRecordDecl(
-      hasMethod(cxxMethodDecl(hasOverloadedOperatorName("->"),
-                              returns(qualType(pointsTo(type()))))),
-      hasMethod(cxxMethodDecl(hasOverloadedOperatorName("*"),
-                              returns(qualType(references(type()))))));
-  const auto SmartPointer = qualType(hasDeclaration(
-      cxxRecordDecl(anyOf(KnownSmartPointer, QuacksLikeASmartPointer))));
-  return match(SmartPointer, Ty, Context).size() > 0;
-}
-
-// Identifies use of `operator*` on smart pointers, and returns the underlying
-// smart-pointer expression; otherwise, returns null.
-static const Expr *isSmartDereference(const Expr &E, ASTContext &Context) {
-  using namespace ::clang::ast_matchers;
-
-  const auto HasOverloadedArrow = cxxRecordDecl(hasMethod(cxxMethodDecl(
-      hasOverloadedOperatorName("->"), returns(qualType(pointsTo(type()))))));
-  // Verify it is a smart pointer by finding `operator->` in the class
-  // declaration.
-  auto Deref = cxxOperatorCallExpr(
-      hasOverloadedOperatorName("*"), hasUnaryOperand(expr().bind("arg")),
-      callee(cxxMethodDecl(ofClass(HasOverloadedArrow))));
-  return selectFirst("arg", match(Deref, E, Context));
-}
-
 namespace {
 // An arbitrary fragment of code within a stencil.
 class RawTextStencil : public StencilInterface {
@@ -196,7 +162,7 @@ class UnaryOperationStencil : public StencilInterface {
       break;
     case UnaryNodeOperator::MaybeDeref:
       if (E->getType()->isAnyPointerType() ||
-          isSmartPointerType(E->getType(), *Match.Context)) {
+          tooling::isKnownPointerLikeType(E->getType(), *Match.Context)) {
         // Strip off any operator->. This can only occur inside an actual arrow
         // member access, so we treat it as equivalent to an actual object
         // expression.
@@ -216,7 +182,7 @@ class UnaryOperationStencil : public StencilInterface {
       break;
     case UnaryNodeOperator::MaybeAddressOf:
       if (E->getType()->isAnyPointerType() ||
-          isSmartPointerType(E->getType(), *Match.Context)) {
+          tooling::isKnownPointerLikeType(E->getType(), *Match.Context)) {
         // Strip off any operator->. This can only occur inside an actual arrow
         // member access, so we treat it as equivalent to an actual object
         // expression.
@@ -311,34 +277,12 @@ class AccessStencil : public StencilInterface {
     if (E == nullptr)
       return llvm::make_error(errc::invalid_argument,
                                            "Id not bound: " + BaseId);
-    if (!E->isImplicitCXXThis()) {
-      llvm::Optional S;
-      if (E->getType()->isAnyPointerType() ||
-          isSmartPointerType(E->getType(), *Match.Context)) {
-        // Strip off any operator->. This can only occur inside an actual arrow
-        // member access, so we treat it as equivalent to an actual object
-        // expression.
-        if (const auto *OpCall = dyn_cast(E)) {
-          if (OpCall->getOperator() == clang::OO_Arrow &&
-              OpCall->getNumArgs() == 1) {
-            E = OpCall->getArg(0);
-          }
-        }
-        S = tooling::buildArrow(*E, *Match.Context);
-      } else if (const auto *Operand = isSmartDereference(*E, *Match.Context)) {
-        // `buildDot` already handles the built-in dereference operator, so we
-        // only need to catch overloaded `operator*`.
-        S = tooling::buildArrow(*Operand, *Match.Context);
-      } else {
-        S = tooling::buildDot(*E, *Match.Context);
-      }
-      if (S.hasValue())
-        *Result += *S;
-      else
-        return llvm::make_error(
-            errc::invalid_argument,
-            "Could not construct object text from ID: " + BaseId);
-    }
+    llvm::Optional S = tooling::buildAccess(*E, *Match.Context);
+    if (!S.hasValue())
+      return llvm::make_error(
+          errc::invalid_argument,
+          "Could not construct object text from ID: " + BaseId);
+    *Result += *S;
     return Member->eval(Match, Result);
   }
 };
diff --git a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp
index ce99d1e7f5217..50167881e5cd6 100644
--- a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp
+++ b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/Transformer/SourceCodeBuilders.h"
+#include "clang/AST/Type.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Tooling/Tooling.h"
@@ -24,8 +25,23 @@ using llvm::ValueIs;
 
 // Create a valid translation unit from a statement.
 static std::string wrapSnippet(StringRef StatementCode) {
-  return ("struct S { S(); S(int); int field; };\n"
+  return ("namespace std {\n"
+          "template  struct unique_ptr {\n"
+          "  T* operator->() const;\n"
+          "  T& operator*() const;\n"
+          "};\n"
+          "template  struct shared_ptr {\n"
+          "  T* operator->() const;\n"
+          "  T& operator*() const;\n"
+          "};\n"
+          "}\n"
+          "struct A { void super(); };\n"
+          "struct S : public A { S(); S(int); int Field; };\n"
           "S operator+(const S &a, const S &b);\n"
+          "struct Smart {\n"
+          "  S* operator->() const;\n"
+          "  S& operator*() const;\n"
+          "};\n"
           "auto test_snippet = []{" +
           StatementCode + "};")
       .str();
@@ -51,7 +67,8 @@ struct TestMatch {
 // `StatementCode` may contain other statements not described by `Matcher`.
 static llvm::Optional matchStmt(StringRef StatementCode,
                                            StatementMatcher Matcher) {
-  auto AstUnit = buildASTFromCode(wrapSnippet(StatementCode));
+  auto AstUnit = buildASTFromCodeWithArgs(wrapSnippet(StatementCode),
+                                          {"-Wno-unused-value"});
   if (AstUnit == nullptr) {
     ADD_FAILURE() << "AST construction failed";
     return llvm::None;
@@ -95,7 +112,7 @@ TEST(SourceCodeBuildersTest, needParensAfterUnaryOperator) {
   testPredicate(needParensAfterUnaryOperator, "int(3.0);", false);
   testPredicate(needParensAfterUnaryOperator, "void f(); f();", false);
   testPredicate(needParensAfterUnaryOperator, "int a[3]; a[0];", false);
-  testPredicate(needParensAfterUnaryOperator, "S x; x.field;", false);
+  testPredicate(needParensAfterUnaryOperator, "S x; x.Field;", false);
   testPredicate(needParensAfterUnaryOperator, "int x = 1; --x;", false);
   testPredicate(needParensAfterUnaryOperator, "int x = 1; -x;", false);
 }
@@ -117,7 +134,7 @@ TEST(SourceCodeBuildersTest, mayEverNeedParens) {
   testPredicate(mayEverNeedParens, "int(3.0);", false);
   testPredicate(mayEverNeedParens, "void f(); f();", false);
   testPredicate(mayEverNeedParens, "int a[3]; a[0];", false);
-  testPredicate(mayEverNeedParens, "S x; x.field;", false);
+  testPredicate(mayEverNeedParens, "S x; x.Field;", false);
 }
 
 TEST(SourceCodeBuildersTest, mayEverNeedParensInImplictConversion) {
@@ -126,6 +143,50 @@ TEST(SourceCodeBuildersTest, mayEverNeedParensInImplictConversion) {
   testPredicateOnArg(mayEverNeedParens, "void f(S); f(3 + 5);", true);
 }
 
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeUniquePtr) {
+  std::string Snippet = "std::unique_ptr P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_TRUE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeSharedPtr) {
+  std::string Snippet = "std::shared_ptr P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_TRUE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeUnknownTypeFalse) {
+  std::string Snippet = "Smart P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_FALSE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeNormalTypeFalse) {
+  std::string Snippet = "int *P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_FALSE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
 static void testBuilder(
     llvm::Optional (*Builder)(const Expr &, const ASTContext &),
     StringRef Snippet, StringRef Expected) {
@@ -136,6 +197,15 @@ static void testBuilder(
               ValueIs(std::string(Expected)));
 }
 
+static void testBuildAccess(StringRef Snippet, StringRef Expected,
+                            PLTClass C = PLTClass::Pointer) {
+  auto StmtMatch = matchStmt(Snippet, expr().bind("expr"));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"),
+                          *StmtMatch->Result.Context, C),
+              ValueIs(std::string(Expected)));
+}
+
 TEST(SourceCodeBuildersTest, BuildParensUnaryOp) {
   testBuilder(buildParens, "-4;", "(-4)");
 }
@@ -245,4 +315,117 @@ TEST(SourceCodeBuildersTest, BuildArrowBinaryOperation) {
 TEST(SourceCodeBuildersTest, BuildArrowValueAddressWithParens) {
   testBuilder(buildArrow, "S x; &(true ? x : x);", "(true ? x : x).");
 }
+
+TEST(SourceCodeBuildersTest, BuildAccessValue) {
+  testBuildAccess("S x; x;", "x.");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerDereference) {
+  testBuildAccess("S *x; *x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerDereferenceIgnoresParens) {
+  testBuildAccess("S *x; *(x);", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueBinaryOperation) {
+  testBuildAccess("S x; x + x;", "(x + x).");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerDereferenceExprWithParens) {
+  testBuildAccess("S *x; *(x + 1);", "(x + 1)->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointer) {
+  testBuildAccess("S *x; x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueAddress) {
+  testBuildAccess("S x; &x;", "x.");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueAddressIgnoresParens) {
+  testBuildAccess("S x; &(x);", "x.");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerBinaryOperation) {
+  testBuildAccess("S *x; x + 1;", "(x + 1)->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueAddressWithParens) {
+  testBuildAccess("S x; &(true ? x : x);", "(true ? x : x).");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointer) {
+  testBuildAccess("std::unique_ptr x; x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerAsValue) {
+  testBuildAccess("std::unique_ptr x; x;", "x.", PLTClass::Value);
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerDeref) {
+  testBuildAccess("std::unique_ptr x; *x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerDerefAsValue) {
+  testBuildAccess("std::unique_ptr x; *x;", "(*x).", PLTClass::Value);
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerMemberCall) {
+  StringRef Snippet = R"cc(
+    Smart x;
+    x->Field;
+  )cc";
+  auto StmtMatch =
+      matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string("x->")));
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessIgnoreImplicit) {
+  StringRef Snippet = R"cc(
+    S x;
+    A *a;
+    a = &x;
+  )cc";
+  auto StmtMatch =
+      matchStmt(Snippet, binaryOperator(isAssignmentOperator(),
+                                        hasRHS(expr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string("x.")));
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessImplicitThis) {
+  StringRef Snippet = R"cc(
+    struct Struct {
+      void foo() {}
+      void bar() {
+        foo();
+      }
+    };
+  )cc";
+  auto StmtMatch = matchStmt(
+      Snippet,
+      cxxMemberCallExpr(onImplicitObjectArgument(cxxThisExpr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string()));
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessImplicitThisIgnoreImplicitCasts) {
+  StringRef Snippet = "struct B : public A { void f() { super(); } };";
+  auto StmtMatch = matchStmt(
+      Snippet,
+      cxxMemberCallExpr(onImplicitObjectArgument(expr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string()));
+}
 } // namespace
diff --git a/clang/unittests/Tooling/StencilTest.cpp b/clang/unittests/Tooling/StencilTest.cpp
index 6036f8f4fa381..28cc8281485a8 100644
--- a/clang/unittests/Tooling/StencilTest.cpp
+++ b/clang/unittests/Tooling/StencilTest.cpp
@@ -36,10 +36,13 @@ static std::string wrapSnippet(StringRef ExtraPreface,
     namespace N { class C {}; }
     namespace { class AnonC {}; }
     struct S { int Field; };
-    struct Smart {
-      S* operator->() const;
-      S& operator*() const;
+    namespace std {
+    template 
+    struct unique_ptr {
+      T* operator->() const;
+      T& operator*() const;
     };
+    }
   )cc";
   return (Preface + ExtraPreface + "auto stencil_test_snippet = []{" +
           StatementCode + "};")
@@ -326,32 +329,15 @@ TEST_F(StencilTest, MaybeDerefAddressExpr) {
 TEST_F(StencilTest, MaybeDerefSmartPointer) {
   StringRef Id = "id";
   std::string Snippet = R"cc(
-    Smart x;
+    std::unique_ptr x;
     x;
   )cc";
   testExpr(Id, Snippet, maybeDeref(Id), "*x");
 }
 
-// Tests that unique_ptr specifically is handled.
-TEST_F(StencilTest, MaybeDerefSmartPointerUniquePtr) {
-  StringRef Id = "id";
-  // We deliberately specify `unique_ptr` as empty to verify that it matches
-  // because of its name, rather than its contents.
-  StringRef ExtraPreface =
-      "namespace std { template  class unique_ptr {}; }\n";
-  StringRef Snippet = R"cc(
-    std::unique_ptr x;
-    x;
-  )cc";
-  auto StmtMatch = matchStmt(Snippet, expr().bind(Id), ExtraPreface);
-  ASSERT_TRUE(StmtMatch);
-  EXPECT_THAT_EXPECTED(maybeDeref(Id)->eval(StmtMatch->Result),
-                       HasValue(std::string("*x")));
-}
-
 TEST_F(StencilTest, MaybeDerefSmartPointerFromMemberExpr) {
   StringRef Id = "id";
-  std::string Snippet = "Smart x; x->Field;";
+  std::string Snippet = "std::unique_ptr x; x->Field;";
   auto StmtMatch =
       matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind(Id))));
   ASSERT_TRUE(StmtMatch);
@@ -381,12 +367,12 @@ TEST_F(StencilTest, MaybeAddressOfDerefExpr) {
 
 TEST_F(StencilTest, MaybeAddressOfSmartPointer) {
   StringRef Id = "id";
-  testExpr(Id, "Smart x; x;", maybeAddressOf(Id), "x");
+  testExpr(Id, "std::unique_ptr x; x;", maybeAddressOf(Id), "x");
 }
 
 TEST_F(StencilTest, MaybeAddressOfSmartPointerFromMemberCall) {
   StringRef Id = "id";
-  std::string Snippet = "Smart x; x->Field;";
+  std::string Snippet = "std::unique_ptr x; x->Field;";
   auto StmtMatch =
       matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind(Id))));
   ASSERT_TRUE(StmtMatch);
@@ -396,7 +382,7 @@ TEST_F(StencilTest, MaybeAddressOfSmartPointerFromMemberCall) {
 
 TEST_F(StencilTest, MaybeAddressOfSmartPointerDerefNoCancel) {
   StringRef Id = "id";
-  testExpr(Id, "Smart x; *x;", maybeAddressOf(Id), "&*x");
+  testExpr(Id, "std::unique_ptr x; *x;", maybeAddressOf(Id), "&*x");
 }
 
 TEST_F(StencilTest, AccessOpValue) {
@@ -446,7 +432,7 @@ TEST_F(StencilTest, AccessOpPointerDereference) {
 
 TEST_F(StencilTest, AccessOpSmartPointer) {
   StringRef Snippet = R"cc(
-    Smart x;
+    std::unique_ptr x;
     x;
   )cc";
   StringRef Id = "id";
@@ -455,7 +441,7 @@ TEST_F(StencilTest, AccessOpSmartPointer) {
 
 TEST_F(StencilTest, AccessOpSmartPointerDereference) {
   StringRef Snippet = R"cc(
-    Smart x;
+    std::unique_ptr x;
     *x;
   )cc";
   StringRef Id = "id";
@@ -464,7 +450,7 @@ TEST_F(StencilTest, AccessOpSmartPointerDereference) {
 
 TEST_F(StencilTest, AccessOpSmartPointerMemberCall) {
   StringRef Snippet = R"cc(
-    Smart x;
+    std::unique_ptr x;
     x->Field;
   )cc";
   StringRef Id = "id";

From f3ab0ccd00db3bb26851d3e1cf4a789fe7fa6e2c Mon Sep 17 00:00:00 2001
From: MaheshRavishankar 
Date: Tue, 25 Jan 2022 11:44:54 -0800
Subject: [PATCH 601/946] [mlir][Linalg] Add couple of convenience methods to
 `LinalgInterface`.

Add methods to
- Get block argument that is tied with an opOperand
- Get the yield value that is tied with a output opOperand.

Differential Revision: https://reviews.llvm.org/D118085
---
 .../Dialect/Linalg/IR/LinalgInterfaces.td     | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
index 413c2cc18acea..f40f82ed7cd94 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
@@ -641,6 +641,19 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
         return !opOperand->get().getType().template isa();
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Return the block argument for an `opOperand`.
+      }],
+      /*retTy=*/"BlockArgument",
+      /*methodName=*/"getTiedBlockArgument",
+      /*args=*/(ins "OpOperand *":$opOperand),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        assert(opOperand->getOwner() == this->getOperation());
+        return getBlock()->getArgument(opOperand->getOperandNumber());
+      }]
+    >,
     InterfaceMethod<
       /*desc=*/[{
         Return the input or output indexing map for `opOperand`.
@@ -672,6 +685,24 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
         return this->getOperation()->getResult(resultIndex);
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Return the value yielded by the region corresponding to an output
+        `opOperand`.
+      }],
+      /*retTy=*/"OpOperand *",
+      /*methodName=*/"getTiedYieldValue",
+      /*args=*/(ins "OpOperand*":$opOperand),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        assert(opOperand->getOwner() == this->getOperation());
+        int64_t resultIndex = opOperand->getOperandNumber() - getNumInputs();
+        assert(resultIndex >= 0 &&
+               resultIndex < this->getOperation()->getNumResults());
+        Operation *yieldOp = getBlock()->getTerminator();
+        return &yieldOp->getOpOperand(resultIndex);
+      }]
+    >,
     //===------------------------------------------------------------------===//
     // Other interface methods.
     //===------------------------------------------------------------------===//

From 2868e2677b609ed0a1b0e441d5084a8956137816 Mon Sep 17 00:00:00 2001
From: Argyrios Kyrtzidis 
Date: Mon, 24 Jan 2022 17:49:37 -0800
Subject: [PATCH 602/946] [cmake] Some NFC changes in preparation for
 accomodating `Ninja Multi-Config`

* Use `MATCHES` so that `Ninja Multi-Config` generator also satisfies the Ninja check
* Pull out a couple of values into variables, inside `function(tablegen project ofn)`, NFC

Differential Revision: https://reviews.llvm.org/D118100
---
 llvm/cmake/config-ix.cmake                 |  4 ++--
 llvm/cmake/modules/HandleLLVMOptions.cmake |  8 ++++----
 llvm/cmake/modules/TableGen.cmake          | 11 +++++++----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 7e9d76d6c5648..a138d372d3b29 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -602,7 +602,7 @@ find_program(GOLD_EXECUTABLE NAMES ${LLVM_DEFAULT_TARGET_TRIPLE}-ld.gold ld.gold
 set(LLVM_BINUTILS_INCDIR "" CACHE PATH
     "PATH to binutils/include containing plugin-api.h for gold plugin.")
 
-if(CMAKE_GENERATOR STREQUAL "Ninja")
+if(CMAKE_GENERATOR MATCHES "Ninja")
   execute_process(COMMAND ${CMAKE_MAKE_PROGRAM} --version
     OUTPUT_VARIABLE NINJA_VERSION
     OUTPUT_STRIP_TRAILING_WHITESPACE)
@@ -610,7 +610,7 @@ if(CMAKE_GENERATOR STREQUAL "Ninja")
   message(STATUS "Ninja version: ${NINJA_VERSION}")
 endif()
 
-if(CMAKE_GENERATOR STREQUAL "Ninja" AND
+if(CMAKE_GENERATOR MATCHES "Ninja" AND
     NOT "${NINJA_VERSION}" VERSION_LESS "1.9.0" AND
     CMAKE_HOST_APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_GREATER "15.6.0")
   set(LLVM_TOUCH_STATIC_LIBRARIES ON)
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index c2feecc21a802..fcaa8f20bf941 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -34,7 +34,7 @@ string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
 set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
   "Define the maximum number of concurrent compilation jobs (Ninja only).")
 if(LLVM_PARALLEL_COMPILE_JOBS)
-  if(NOT CMAKE_GENERATOR STREQUAL "Ninja")
+  if(NOT CMAKE_GENERATOR MATCHES "Ninja")
     message(WARNING "Job pooling is only available with Ninja generators.")
   else()
     set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
@@ -44,7 +44,7 @@ endif()
 
 set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
   "Define the maximum number of concurrent link jobs (Ninja only).")
-if(CMAKE_GENERATOR STREQUAL "Ninja")
+if(CMAKE_GENERATOR MATCHES "Ninja")
   if(NOT LLVM_PARALLEL_LINK_JOBS AND uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
     message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
     set(LLVM_PARALLEL_LINK_JOBS "2")
@@ -920,7 +920,7 @@ add_definitions( -D__STDC_LIMIT_MACROS )
 
 # clang and gcc don't default-print colored diagnostics when invoked from Ninja.
 if (UNIX AND
-    CMAKE_GENERATOR STREQUAL "Ninja" AND
+    CMAKE_GENERATOR MATCHES "Ninja" AND
     (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR
      (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
       NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9))))
@@ -928,7 +928,7 @@ if (UNIX AND
 endif()
 
 # lld doesn't print colored diagnostics when invoked from Ninja
-if (UNIX AND CMAKE_GENERATOR STREQUAL "Ninja")
+if (UNIX AND CMAKE_GENERATOR MATCHES "Ninja")
   include(LLVMCheckLinkerFlag)
   llvm_check_linker_flag(CXX "-Wl,--color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS)
   append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,--color-diagnostics"
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index 6d6513c5ab459..59fd0d3b07336 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -10,7 +10,7 @@ function(tablegen project ofn)
   endif()
 
   # Use depfile instead of globbing arbitrary *.td(s) for Ninja.
-  if(CMAKE_GENERATOR STREQUAL "Ninja")
+  if(CMAKE_GENERATOR MATCHES "Ninja")
     # Make output path relative to build.ninja, assuming located on
     # ${CMAKE_BINARY_DIR}.
     # CMake emits build targets as relative paths but Ninja doesn't identify
@@ -93,8 +93,11 @@ function(tablegen project ofn)
   get_directory_property(tblgen_includes INCLUDE_DIRECTORIES)
   list(TRANSFORM tblgen_includes PREPEND -I)
 
+  set(tablegen_exe ${${project}_TABLEGEN_EXE})
+  set(tablegen_depends ${${project}_TABLEGEN_TARGET} ${tablegen_exe})
+
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
-    COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND ${tablegen_exe} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
     ${tblgen_includes}
     ${LLVM_TABLEGEN_FLAGS}
     ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
@@ -103,7 +106,7 @@ function(tablegen project ofn)
     # The file in LLVM_TARGET_DEFINITIONS may be not in the current
     # directory and local_tds may not contain it, so we must
     # explicitly list it here:
-    DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE}
+    DEPENDS ${tablegen_depends}
       ${local_tds} ${global_tds}
     ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
     ${LLVM_TARGET_DEPENDS}
@@ -137,7 +140,7 @@ macro(add_tablegen target project)
   set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen)
 
   # CMake doesn't let compilation units depend on their dependent libraries on some generators.
-  if(NOT CMAKE_GENERATOR STREQUAL "Ninja" AND NOT XCODE)
+  if(NOT CMAKE_GENERATOR MATCHES "Ninja" AND NOT XCODE)
     # FIXME: It leaks to user, callee of add_tablegen.
     set(LLVM_ENABLE_OBJLIB ON)
   endif()

From 2a1b7aa016c0f4b5598806205bdfbab1ea2d92c4 Mon Sep 17 00:00:00 2001
From: Pavel Labath 
Date: Tue, 25 Jan 2022 20:49:55 +0100
Subject: [PATCH 603/946] [lldb] Fix ProcessKDPLog for the logging refactor

---
 .../Process/MacOSX-Kernel/ProcessKDPLog.cpp   | 28 ++++++-----
 .../Process/MacOSX-Kernel/ProcessKDPLog.h     | 50 ++++++++++++-------
 2 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp
index 3b5f1157d5446..f741126f965bb 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp
@@ -11,24 +11,28 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-    {{"async"}, {"log asynchronous activity"}, KDP_LOG_ASYNC},
-    {{"break"}, {"log breakpoints"}, KDP_LOG_BREAKPOINTS},
-    {{"comm"}, {"log communication activity"}, KDP_LOG_COMM},
+    {{"async"}, {"log asynchronous activity"}, KDPLog::Async},
+    {{"break"}, {"log breakpoints"}, KDPLog::Breakpoints},
+    {{"comm"}, {"log communication activity"}, KDPLog::Comm},
     {{"data-long"},
      {"log memory bytes for memory reads and writes for all transactions"},
-     KDP_LOG_MEMORY_DATA_LONG},
+     KDPLog::MemoryDataLong},
     {{"data-short"},
      {"log memory bytes for memory reads and writes for short transactions "
       "only"},
-     KDP_LOG_MEMORY_DATA_SHORT},
-    {{"memory"}, {"log memory reads and writes"}, KDP_LOG_MEMORY},
-    {{"packets"}, {"log gdb remote packets"}, KDP_LOG_PACKETS},
-    {{"process"}, {"log process events and activities"}, KDP_LOG_PROCESS},
-    {{"step"}, {"log step related activities"}, KDP_LOG_STEP},
-    {{"thread"}, {"log thread events and activities"}, KDP_LOG_THREAD},
-    {{"watch"}, {"log watchpoint related activities"}, KDP_LOG_WATCHPOINTS},
+     KDPLog::MemoryDataShort},
+    {{"memory"}, {"log memory reads and writes"}, KDPLog::Memory},
+    {{"packets"}, {"log gdb remote packets"}, KDPLog::Packets},
+    {{"process"}, {"log process events and activities"}, KDPLog::Process},
+    {{"step"}, {"log step related activities"}, KDPLog::Step},
+    {{"thread"}, {"log thread events and activities"}, KDPLog::Thread},
+    {{"watch"}, {"log watchpoint related activities"}, KDPLog::Watchpoints},
 };
 
-Log::Channel ProcessKDPLog::g_channel(g_categories, KDP_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, KDPLog::Packets);
+
+template <> Log::Channel &lldb_private::LogChannelFor() {
+  return g_channel;
+}
 
 void ProcessKDPLog::Initialize() { Log::Register("kdp-remote", g_channel); }
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h
index 91b1b6e49b7af..f47a9f5dd0876 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h
@@ -11,32 +11,44 @@
 
 #include "lldb/Utility/Log.h"
 
-#define KDP_LOG_PROCESS (1u << 1)
-#define KDP_LOG_THREAD (1u << 2)
-#define KDP_LOG_PACKETS (1u << 3)
-#define KDP_LOG_MEMORY (1u << 4) // Log memory reads/writes calls
-#define KDP_LOG_MEMORY_DATA_SHORT                                              \
-  (1u << 5) // Log short memory reads/writes bytes
-#define KDP_LOG_MEMORY_DATA_LONG (1u << 6) // Log all memory reads/writes bytes
-#define KDP_LOG_BREAKPOINTS (1u << 7)
-#define KDP_LOG_WATCHPOINTS (1u << 8)
-#define KDP_LOG_STEP (1u << 9)
-#define KDP_LOG_COMM (1u << 10)
-#define KDP_LOG_ASYNC (1u << 11)
-#define KDP_LOG_ALL (UINT32_MAX)
-#define KDP_LOG_DEFAULT KDP_LOG_PACKETS
 
 namespace lldb_private {
-class ProcessKDPLog {
-  static Log::Channel g_channel;
 
+enum class KDPLog : Log::MaskType {
+  Async = Log::ChannelFlag<0>,
+  Breakpoints = Log::ChannelFlag<1>,
+  Comm = Log::ChannelFlag<2>,
+  MemoryDataLong = Log::ChannelFlag<3>,  // Log all memory reads/writes bytes
+  MemoryDataShort = Log::ChannelFlag<4>, // Log short memory reads/writes bytes
+  Memory = Log::ChannelFlag<5>,          // Log memory reads/writes calls
+  Packets = Log::ChannelFlag<6>,
+  Process = Log::ChannelFlag<7>,
+  Step = Log::ChannelFlag<8>,
+  Thread = Log::ChannelFlag<9>,
+  Watchpoints = Log::ChannelFlag<10>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints)
+};
+#define KDP_LOG_PROCESS ::lldb_private::KDPLog::Process
+#define KDP_LOG_THREAD ::lldb_private::KDPLog::Thread
+#define KDP_LOG_PACKETS ::lldb_private::KDPLog::Packets
+#define KDP_LOG_MEMORY ::lldb_private::KDPLog::Memory
+#define KDP_LOG_MEMORY_DATA_SHORT ::lldb_private::KDPLog::MemoryDataShort
+#define KDP_LOG_MEMORY_DATA_LONG ::lldb_private::KDPLog::MemoryDataLong
+#define KDP_LOG_BREAKPOINTS ::lldb_private::KDPLog::Breakpoints
+#define KDP_LOG_WATCHPOINTS ::lldb_private::KDPLog::Watchpoints
+#define KDP_LOG_STEP ::lldb_private::KDPLog::Step
+#define KDP_LOG_COMM ::lldb_private::KDPLog::Comm
+#define KDP_LOG_ASYNC ::lldb_private::KDPLog::Async
+#define KDP_LOG_DEFAULT KDP_LOG_PACKETS
+
+class ProcessKDPLog {
 public:
   static void Initialize();
 
-  static Log *GetLogIfAllCategoriesSet(uint32_t mask) {
-    return g_channel.GetLogIfAll(mask);
-  }
+  static Log *GetLogIfAllCategoriesSet(KDPLog mask) { return GetLog(mask); }
 };
+
+template <> Log::Channel &LogChannelFor();
 }
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_MACOSX_KERNEL_PROCESSKDPLOG_H

From 491c154677bcf0fba3c91fdaa7897a48ab605327 Mon Sep 17 00:00:00 2001
From: Aleksandr Platonov 
Date: Tue, 25 Jan 2022 22:46:25 +0300
Subject: [PATCH 604/946] [analyzer] Don't specify PLUGIN_TOOL for analyzer
 plugins

Analyzer plugins explicitly export clang_registerCheckers and clang_analyzerAPIVersionString symbols, so we don't need to specify a tool to link agains.

Also, without this patch MSVC build fails with cmake flags -DLLVM_ENABLE_PLUGINS=On -DCLANG_PLUGINS_SUPPORT=On -DLLVM_EXPORT_SYMBOLS_FOR_PLUGINS=On
```
[936/936] Linking CXX shared module bin\SampleAnalyzerPlugin.dll
FAILED: bin/SampleAnalyzerPlugin.dll
cmd.exe /C "cd . && "D:\Program Files\CMake\bin\cmake.exe" -E vs_link_dll --intdir=tools\clang\lib\Analysis\plugins\SampleAnalyzer\CMakeFiles\SampleAnalyzerPlugin.dir --rc=C:\PROGRA~2\WI3CF2~1\10\bin\100183~1.0\x64\rc.exe --mt=C:\PROGRA~2\WI3CF2~1\10\bin\100183~1.0\x64\mt.exe --manifests  -- C:\PROGRA~2\MICROS~4\2019\COMMUN~1\VC\Tools\MSVC\1428~1.299\bin\Hostx64\x64\link.exe /nologo tools\clang\lib\Analysis\plugins\SampleAnalyzer\CMakeFiles\SampleAnalyzerPlugin.dir\MainCallChecker.cpp.obj  /out:bin\SampleAnalyzerPlugin.dll /implib:lib\SampleAnalyzerPlugin.lib /pdb:bin\SampleAnalyzerPlugin.pdb /dll /version:0.0 /machine:x64 /INCREMENTAL:NO  /DEF:"D:/work/llvm-project-original/build-plugins/tools/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.def"  lib\clang.lib  lib\clangAnalysis.lib  lib\clangAST.lib  lib\clangStaticAnalyzerCore.lib  lib\clangStaticAnalyzerFrontend.lib  lib\clangStaticAnalyzerCheckers.lib  lib\clangStaticAnalyzerCore.lib  lib\clangCrossTU.lib  lib\clangIndex.lib  lib\clangFormat.lib  lib\clangToolingInclusions.lib  lib\clangFrontend.lib  lib\clangDriver.lib  version.lib  lib\clangParse.lib  lib\clangSerialization.lib  lib\clangSema.lib  lib\clangAnalysis.lib  lib\clangEdit.lib  lib\LLVMOption.lib  lib\clangToolingCore.lib  lib\clangRewrite.lib  lib\clangASTMatchers.lib  lib\clangAST.lib  lib\clangLex.lib  lib\clangBasic.lib  lib\LLVMFrontendOpenMP.lib  lib\LLVMScalarOpts.lib  lib\LLVMAggressiveInstCombine.lib  lib\LLVMInstCombine.lib  lib\LLVMTransformUtils.lib  lib\LLVMAnalysis.lib  lib\LLVMProfileData.lib  lib\LLVMDebugInfoDWARF.lib  lib\LLVMObject.lib  lib\LLVMBitReader.lib  lib\LLVMCore.lib  lib\LLVMRemarks.lib  lib\LLVMBitstreamReader.lib  lib\LLVMMCParser.lib  lib\LLVMMC.lib  lib\LLVMDebugInfoCodeView.lib  lib\LLVMTextAPI.lib  lib\LLVMBinaryFormat.lib  lib\LLVMSupport.lib  psapi.lib  shell32.lib  ole32.lib  uuid.lib  advapi32.lib  delayimp.lib  -delayload:shell32.dll  -delayload:ole32.dll  lib\LLVMDemangle.lib  kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib  && cd ."
LINK: command "C:\PROGRA~2\MICROS~4\2019\COMMUN~1\VC\Tools\MSVC\1428~1.299\bin\Hostx64\x64\link.exe /nologo tools\clang\lib\Analysis\plugins\SampleAnalyzer\CMakeFiles\SampleAnalyzerPlugin.dir\MainCallChecker.cpp.obj /out:bin\SampleAnalyzerPlugin.dll /implib:lib\SampleAnalyzerPlugin.lib /pdb:bin\SampleAnalyzerPlugin.pdb /dll /version:0.0 /machine:x64 /INCREMENTAL:NO /DEF:D:/work/llvm-project-original/build-plugins/tools/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.def lib\clang.lib lib\clangAnalysis.lib lib\clangAST.lib lib\clangStaticAnalyzerCore.lib lib\clangStaticAnalyzerFrontend.lib lib\clangStaticAnalyzerCheckers.lib lib\clangStaticAnalyzerCore.lib lib\clangCrossTU.lib lib\clangIndex.lib lib\clangFormat.lib lib\clangToolingInclusions.lib lib\clangFrontend.lib lib\clangDriver.lib version.lib lib\clangParse.lib lib\clangSerialization.lib lib\clangSema.lib lib\clangAnalysis.lib lib\clangEdit.lib lib\LLVMOption.lib lib\clangToolingCore.lib lib\clangRewrite.lib lib\clangASTMatchers.lib lib\clangAST.lib lib\clangLex.lib lib\clangBasic.lib lib\LLVMFrontendOpenMP.lib lib\LLVMScalarOpts.lib lib\LLVMAggressiveInstCombine.lib lib\LLVMInstCombine.lib lib\LLVMTransformUtils.lib lib\LLVMAnalysis.lib lib\LLVMProfileData.lib lib\LLVMDebugInfoDWARF.lib lib\LLVMObject.lib lib\LLVMBitReader.lib lib\LLVMCore.lib lib\LLVMRemarks.lib lib\LLVMBitstreamReader.lib lib\LLVMMCParser.lib lib\LLVMMC.lib lib\LLVMDebugInfoCodeView.lib lib\LLVMTextAPI.lib lib\LLVMBinaryFormat.lib lib\LLVMSupport.lib psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib delayimp.lib -delayload:shell32.dll -delayload:ole32.dll lib\LLVMDemangle.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib /MANIFEST /MANIFESTFILE:bin\SampleAnalyzerPlugin.dll.manifest" failed (exit code 1169) with the following output:
clangStaticAnalyzerCore.lib(BugReporter.cpp.obj) : error LNK2005: "public: __cdecl clang::ento::PathSensitiveBugReport::PathSensitiveBugReport(class clang::ento::BugType const &,class llvm::StringRef,class llvm::StringRef,class clang::ento::ExplodedNode const *,class clang::ento::PathDiagnosticLocation,class clang::Decl const *)" (??0PathSensitiveBugReport@ento@clang@@QEAA@AEBVBugType@12@VStringRef@llvm@@1PEBVExplodedNode@12@VPathDiagnosticLocation@12@PEBVDecl@2@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(BugReporter.cpp.obj) : error LNK2005: "private: virtual void __cdecl clang::ento::BugType::anchor(void)" (?anchor@BugType@ento@clang@@EEAAXXZ) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(SVals.cpp.obj) : error LNK2005: "public: class clang::FunctionDecl const * __cdecl clang::ento::SVal::getAsFunctionDecl(void)const " (?getAsFunctionDecl@SVal@ento@clang@@QEBAPEBVFunctionDecl@3@XZ) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(ProgramState.cpp.obj) : error LNK2005: "void __cdecl clang::ento::ProgramStateRelease(class clang::ento::ProgramState const *)" (?ProgramStateRelease@ento@clang@@YAXPEBVProgramState@12@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(ProgramState.cpp.obj) : error LNK2005: "void __cdecl clang::ento::ProgramStateRetain(class clang::ento::ProgramState const *)" (?ProgramStateRetain@ento@clang@@YAXPEBVProgramState@12@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(Environment.cpp.obj) : error LNK2005: "public: __cdecl clang::ento::EnvironmentEntry::EnvironmentEntry(class clang::Stmt const *,class clang::LocationContext const *)" (??0EnvironmentEntry@ento@clang@@QEAA@PEBVStmt@2@PEBVLocationContext@2@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(Environment.cpp.obj) : error LNK2005: "public: class clang::ento::SVal __cdecl clang::ento::Environment::getSVal(class clang::ento::EnvironmentEntry const &,class clang::ento::SValBuilder &)const " (?getSVal@Environment@ento@clang@@QEBA?AVSVal@23@AEBVEnvironmentEntry@23@AEAVSValBuilder@23@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(CheckerManager.cpp.obj) : error LNK2005: "public: void __cdecl clang::ento::CheckerManager::_registerForPreStmt(class clang::ento::CheckerFn,bool (__cdecl*)(class clang::Stmt const *))" (?_registerForPreStmt@CheckerManager@ento@clang@@QEAAXV?$CheckerFn@$$A6AXPEBVStmt@clang@@AEAVCheckerContext@ento@2@@Z@23@P6A_NPEBVStmt@3@@Z@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(CoreEngine.cpp.obj) : error LNK2005: "protected: class clang::ento::ExplodedNode * __cdecl clang::ento::NodeBuilder::generateNodeImpl(class clang::ProgramPoint const &,class llvm::IntrusiveRefCntPtr,class clang::ento::ExplodedNode *,bool)" (?generateNodeImpl@NodeBuilder@ento@clang@@IEAAPEAVExplodedNode@23@AEBVProgramPoint@3@V?$IntrusiveRefCntPtr@$$CBVProgramState@ento@clang@@@llvm@@PEAV423@_N@Z) already defined in clang.lib(clang.exe)
LLVMSupport.lib(SmallVector.cpp.obj) : error LNK2005: "protected: void __cdecl llvm::SmallVectorBase::grow_pod(void *,unsigned __int64,unsigned __int64)" (?grow_pod@?$SmallVectorBase@I@llvm@@IEAAXPEAX_K1@Z) already defined in clang.lib(clang.exe)
LLVMSupport.lib(FoldingSet.cpp.obj) : error LNK2005: "protected: __cdecl llvm::FoldingSetBase::~FoldingSetBase(void)" (??1FoldingSetBase@llvm@@IEAA@XZ) already defined in clang.lib(clang.exe)
clangAST.lib(ASTImporter.cpp.obj) : error LNK2005: "public: __cdecl clang::ASTImporter::ASTImporter(class clang::ASTContext &,class clang::FileManager &,class clang::ASTContext &,class clang::FileManager &,bool,class std::shared_ptr)" (??0ASTImporter@clang@@QEAA@AEAVASTContext@1@AEAVFileManager@1@01_NV?$shared_ptr@VASTImporterSharedState@clang@@@std@@@Z) already defined in clang.lib(clang.exe)
clangAST.lib(ASTImporter.cpp.obj) : error LNK2005: "public: class llvm::Expected __cdecl clang::ASTImporter::Import(class clang::Decl *)" (?Import@ASTImporter@clang@@QEAA?AV?$Expected@PEAVDecl@clang@@@llvm@@PEAVDecl@2@@Z) already defined in clang.lib(clang.exe)
clangAST.lib(ExternalASTSource.cpp.obj) : error LNK2005: "public: virtual __cdecl clang::ExternalASTSource::~ExternalASTSource(void)" (??1ExternalASTSource@clang@@UEAA@XZ) already defined in clang.lib(clang.exe)
clangAST.lib(ExternalASTSource.cpp.obj) : error LNK2005: "public: virtual void __cdecl clang::ExternalASTSource::CompleteRedeclChain(class clang::Decl const *)" (?CompleteRedeclChain@ExternalASTSource@clang@@UEAAXPEBVDecl@2@@Z) already defined in clang.lib(clang.exe)
clangAST.lib(ExternalASTSource.cpp.obj) : error LNK2005: "public: virtual void __cdecl clang::ExternalASTSource::CompleteType(class clang::ObjCInterfaceDecl *)" (?CompleteType@ExternalASTSource@clang@@UEAAXPEAVObjCInterfaceDecl@2@@Z) already defined in clang.lib(clang.exe)
...
```

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D116966
---
 .../Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt   | 2 +-
 clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt | 2 +-
 clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
index 229de54814926..fc8e2bbc25e11 100644
--- a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerDependencyHandlingAnalyzerPlugin.exports)
-add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerDependencyHandling.cpp PLUGIN_TOOL clang)
+add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerDependencyHandling.cpp)
 
 clang_target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
   clangAnalysis
diff --git a/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
index 432383efba5cc..e8315a0513064 100644
--- a/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerOptionHandlingAnalyzerPlugin.exports)
-add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerOptionHandling.cpp PLUGIN_TOOL clang)
+add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerOptionHandling.cpp)
 
 clang_target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
   clangAnalysis
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
index d9b3f05cbd1bb..77acc47fd4832 100644
--- a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
-add_llvm_library(SampleAnalyzerPlugin MODULE BUILDTREE_ONLY MainCallChecker.cpp PLUGIN_TOOL clang)
+add_llvm_library(SampleAnalyzerPlugin MODULE BUILDTREE_ONLY MainCallChecker.cpp)
 
 clang_target_link_libraries(SampleAnalyzerPlugin PRIVATE
   clangAnalysis

From 82df72cc67d4a2c07b766908a8aa0e272403421b Mon Sep 17 00:00:00 2001
From: Tue Ly 
Date: Tue, 25 Jan 2022 09:54:01 -0500
Subject: [PATCH 605/946] [libc] Make logf function correctly rounded for all
 rounding modes.

Make logf function correctly rounded for all rounding modes.

Reviewed By: sivachandra, zimmermann6, santoshn, jpl169

Differential Revision: https://reviews.llvm.org/D118149
---
 libc/src/math/generic/logf.cpp   | 66 ++++++++++++++++++++++----------
 libc/test/src/math/logf_test.cpp | 36 ++++++++++++++---
 2 files changed, 76 insertions(+), 26 deletions(-)

diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp
index 1c3aef8650ec2..99bd7a473c7b7 100644
--- a/libc/src/math/generic/logf.cpp
+++ b/libc/src/math/generic/logf.cpp
@@ -9,13 +9,15 @@
 #include "src/math/logf.h"
 #include "common_constants.h" // Lookup table for (1/f)
 #include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FMA.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/common.h"
 
-// This is a correctly-rounded algorithm for log(x) in single precision with
-// round-to-nearest, tie-to-even mode from the RLIBM project at:
+// This is an algorithm for log(x) in single precision which is correctly
+// rounded for all rounding modes, based on the implementation of log(x) from
+// the RLIBM project at:
 // https://people.cs.rutgers.edu/~sn349/rlibm
 
 // Step 1 - Range reduction:
@@ -101,6 +103,42 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) {
   constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
   using FPBits = typename fputil::FPBits;
   FPBits xbits(x);
+
+  switch (FPBits(x).uintval()) {
+  case 0x41178febU: // x = 0x1.2f1fd6p+3f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.1fcbcep+1f;
+    break;
+  case 0x4c5d65a5U: // x = 0x1.bacb4ap+25f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.1e0696p+4f;
+    break;
+  case 0x65d890d3U: // x = 0x1.b121a6p+76f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.a9a3f2p+5f;
+    break;
+  case 0x6f31a8ecU: // x = 0x1.6351d8p+95f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.08b512p+6f;
+    break;
+  case 0x3f800001U: // x = 0x1.000002p+0f
+    if (fputil::get_round() == FE_UPWARD)
+      return 0x1p-23f;
+    return 0x1.fffffep-24f;
+  case 0x500ffb03U: // x = 0x1.1ff606p+33f
+    if (fputil::get_round() != FE_UPWARD)
+      return 0x1.6fdd34p+4f;
+    break;
+  case 0x7a17f30aU: // x = 0x1.2fe614p+117f
+    if (fputil::get_round() != FE_UPWARD)
+      return 0x1.451436p+6f;
+    break;
+  case 0x5cd69e88U: // x = 0x1.ad3d1p+58f
+    if (fputil::get_round() != FE_UPWARD)
+      return 0x1.45c146p+5f;
+    break;
+  }
+
   int m = 0;
 
   if (xbits.uintval() < FPBits::MIN_NORMAL ||
@@ -130,26 +168,14 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) {
   double d = static_cast(xbits) - static_cast(f);
   d *= ONE_OVER_F[f_index];
 
+  double extra_factor =
+      fputil::fma(static_cast(m), LOG_2, LOG_F[f_index]);
+
   double r = __llvm_libc::fputil::polyeval(
-      d, 0x1.0000000008169p+0, -0x1.0000004f78405p-1, 0x1.555654d2bc769p-2,
-      -0x1.00a570d090322p-2, 0x1.e158d823f89cap-3);
+      d, extra_factor, 0x1.fffffffffffacp-1, -0x1.fffffffef9cb2p-2,
+      0x1.5555513bc679ap-2, -0x1.fff4805ea441p-3, 0x1.930180dbde91ap-3);
 
-  double extra_factor =
-      __llvm_libc::fputil::fma(static_cast(m), LOG_2, LOG_F[f_index]);
-  switch (FPBits(x).uintval()) {
-  case 0x3f80d19f:
-    return 0x1.a1e82cp-8f;
-  case 0x41178feb:
-    return 0x1.1fcbcep+1f;
-  case 0x4c5d65a5:
-    return 0x1.1e0696p+4f;
-  case 0x65d890d3:
-    return 0x1.a9a3f2p+5f;
-  case 0x6f31a8ec:
-    return 0x1.08b512p+6f;
-  default:
-    return static_cast(__llvm_libc::fputil::fma(d, r, extra_factor));
-  }
+  return static_cast(r);
 }
 
 #pragma clang diagnostic pop
diff --git a/libc/test/src/math/logf_test.cpp b/libc/test/src/math/logf_test.cpp
index eb4da810486d3..1f2834d3cf31b 100644
--- a/libc/test/src/math/logf_test.cpp
+++ b/libc/test/src/math/logf_test.cpp
@@ -31,13 +31,37 @@ TEST(LlvmLibcLogfTest, SpecialNumbers) {
 }
 
 TEST(LlvmLibcLogfTest, TrickyInputs) {
-  constexpr int N = 24;
+  constexpr int N = 28;
   constexpr uint32_t INPUTS[N] = {
-      0x3509dcf6U, 0x3bf86ef0U, 0x3ca1c99fU, 0x3d13e105U, 0x3f7ff1f2U,
-      0x3f7fffffU, 0x3f800006U, 0x3f800014U, 0x3f80001cU, 0x3f80c777U,
-      0x3f80ce72U, 0x3f80d19fU, 0x3f80f7bfU, 0x3f80fcfeU, 0x3f81feb4U,
-      0x3f83d731U, 0x3f90cb1dU, 0x3fc55379U, 0x3fd364d7U, 0x41178febU,
-      0x4c5d65a5U, 0x4e85f412U, 0x65d890d3U, 0x6f31a8ecU};
+      0x3509dcf6U, /*0x1.13b9ecp-21f*/
+      0x3bf86ef0U, /*0x1.f0ddep-8f*/
+      0x3ca1c99fU, /*0x1.43933ep-6f*/
+      0x3d13e105U, /*0x1.27c20ap-5f*/
+      0x3f7ff1f2U, /*0x1.ffe3e4p-1f*/
+      0x3f7fffffU, /*0x1.fffffep-1f*/
+      0x3f800001U, /*0x1.000002p+0f*/
+      0x3f800006U, /*0x1.00000cp+0f*/
+      0x3f800014U, /*0x1.000028p+0f*/
+      0x3f80001cU, /*0x1.000038p+0f*/
+      0x3f80c777U, /*0x1.018eeep+0f*/
+      0x3f80ce72U, /*0x1.019ce4p+0f*/
+      0x3f80d19fU, /*0x1.01a33ep+0f*/
+      0x3f80f7bfU, /*0x1.01ef7ep+0f*/
+      0x3f80fcfeU, /*0x1.01f9fcp+0f*/
+      0x3f81feb4U, /*0x1.03fd68p+0f*/
+      0x3f83d731U, /*0x1.07ae62p+0f*/
+      0x3f90cb1dU, /*0x1.21963ap+0f*/
+      0x3fc55379U, /*0x1.8aa6f2p+0f*/
+      0x3fd364d7U, /*0x1.a6c9aep+0f*/
+      0x41178febU, /*0x1.2f1fd6p+3f*/
+      0x4c5d65a5U, /*0x1.bacb4ap+25f*/
+      0x4e85f412U, /*0x1.0be824p+30f*/
+      0x500ffb03U, /*0x1.1ff606p+33f*/
+      0x5cd69e88U, /*0x1.ad3d1p+58f*/
+      0x65d890d3U, /*0x1.b121a6p+76f*/
+      0x6f31a8ecU, /*0x1.6351d8p+95f*/
+      0x7a17f30aU, /*0x1.2fe614p+117f*/
+  };
   for (int i = 0; i < N; ++i) {
     float x = float(FPBits(INPUTS[i]));
     EXPECT_MPFR_MATCH(mpfr::Operation::Log, x, __llvm_libc::logf(x), 0.5);

From b35ef580d870fbb4e932a3e3a580860ac9d4ba49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= 
Date: Tue, 25 Jan 2022 21:24:05 +0100
Subject: [PATCH 606/946] [NFC] Added test with select with unpredictable
 metadata; regenerate x86-cmov-converter.ll

---
 llvm/test/CodeGen/X86/x86-cmov-converter.ll | 420 ++++++++++++++------
 1 file changed, 307 insertions(+), 113 deletions(-)

diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index 59f68269381a2..c5ebe87f9754e 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -101,11 +102,34 @@
 
 %struct.Node = type { i32, %struct.Node*, %struct.Node* }
 
-; CHECK-LABEL: CmovInHotPath
-; CHECK-NOT: cmov
-; CHECK: jg
-
 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
+; CHECK-LABEL: CmovInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB0_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    leal 1(%rax), %r9d
+; CHECK-NEXT:    imull %esi, %eax
+; CHECK-NEXT:    movl $10, %r10d
+; CHECK-NEXT:    cmpl %edx, %eax
+; CHECK-NEXT:    jg .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    movl %r9d, %r10d
+; CHECK-NEXT:  .LBB0_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    imull %r9d, %r10d
+; CHECK-NEXT:    movl %r10d, (%rcx,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r8
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 0
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -132,10 +156,33 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: CmovNotInHotPath
-; CHECK: cmovg
-
 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
+; CHECK-LABEL: CmovNotInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB1_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edx, %r9d
+; CHECK-NEXT:    movl %edi, %r10d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $10, %r11d
+; CHECK-NEXT:  .LBB1_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    imull %esi, %edx
+; CHECK-NEXT:    cmpl %r9d, %edx
+; CHECK-NEXT:    cmovgl %r11d, %eax
+; CHECK-NEXT:    movl %eax, (%rcx,%rdi,4)
+; CHECK-NEXT:    movl (%r8,%rdi,4), %eax
+; CHECK-NEXT:    cltd
+; CHECK-NEXT:    idivl %r9d
+; CHECK-NEXT:    movl %eax, (%r8,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r10
+; CHECK-NEXT:    jne .LBB1_2
+; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp18 = icmp sgt i32 %n, 0
   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
@@ -164,11 +211,34 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: MaxIndex
-; CHECK-NOT: cmov
-; CHECK: jg
-
 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxIndex:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB2_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB2_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-NEXT:    movslq %edi, %rcx
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-NEXT:    jg .LBB2_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:  .LBB2_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    cmpq %rdx, %r8
+; CHECK-NEXT:    jne .LBB2_2
+; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -197,11 +267,82 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: MaxValue
-; CHECK-NOT: jg
-; CHECK: cmovg
+; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch.
+define i32 @MaxIndex_unpredictable(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxIndex_unpredictable:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB3_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB3_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-NEXT:    movslq %edi, %rcx
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-NEXT:    jg .LBB3_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:  .LBB3_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    cmpq %rdx, %r8
+; CHECK-NEXT:    jne .LBB3_2
+; CHECK-NEXT:  .LBB3_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
+entry:
+  %cmp14 = icmp sgt i32 %n, 1
+  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
+  ret i32 %t.0.lcssa
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
+  %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %t.015 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
+  %1 = load i32, i32* %arrayidx2, align 4
+  %cmp3 = icmp sgt i32 %0, %1
+  %2 = trunc i64 %indvars.iv to i32
+  %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
 
 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxValue:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB4_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB4_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
+; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    cmovgl %edi, %eax
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    cmpq %rdx, %rcx
+; CHECK-NEXT:    jne .LBB4_2
+; CHECK-NEXT:  .LBB4_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %0 = load i32, i32* %a, align 4
   %cmp13 = icmp sgt i32 %n, 1
@@ -227,10 +368,25 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: BinarySearch
-; CHECK: set
-
 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
+; CHECK-LABEL: BinarySearch:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    jmp .LBB5_2
+; CHECK-NEXT:  .LBB5_1: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btl %eax, %edi
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
+; CHECK-NEXT:  .LBB5_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rdx), %ecx
+; CHECK-NEXT:    cmpl %ecx, %eax
+; CHECK-NEXT:    ja .LBB5_1
+; CHECK-NEXT:  # %bb.3: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
   %0 = load i32, i32* %Val8, align 8
@@ -287,20 +443,40 @@ while.end:                                        ; preds = %while.body, %entry
 ;;                                          ; previous Phi instruction result
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-; CHECK-LABEL: Transform
-; CHECK-NOT: cmov
-; CHECK:         divl    [[a:%[0-9a-z]*]]
-; CHECK:         movl    $11, [[s1:%[0-9a-z]*]]
-; CHECK:         movl    [[a]], [[s2:%[0-9a-z]*]]
-; CHECK:         cmpl    [[a]], %edx
-; CHECK:         ja      [[SinkBB:.*]]
-; CHECK: [[FalseBB:.*]]:
-; CHECK:         movl    $22, [[s1]]
-; CHECK:         movl    $22, [[s2]]
-; CHECK: [[SinkBB]]:
-; CHECK:         ja
-
 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
+; CHECK-LABEL: Transform:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB6_5
+; CHECK-NEXT:  # %bb.1: # %while.body.preheader
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:  .LBB6_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movslq %esi, %rsi
+; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %r8d
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    movl $11, %eax
+; CHECK-NEXT:    movl %r8d, %ecx
+; CHECK-NEXT:    cmpl %r8d, %edx
+; CHECK-NEXT:    ja .LBB6_4
+; CHECK-NEXT:  # %bb.3: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT:    movl $22, %eax
+; CHECK-NEXT:    movl $22, %ecx
+; CHECK-NEXT:  .LBB6_4: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ecx
+; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
+; CHECK-NEXT:    addl $1, %esi
+; CHECK-NEXT:    cmpl %r9d, %esi
+; CHECK-NEXT:    ja .LBB6_2
+; CHECK-NEXT:  .LBB6_5: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %cmp10 = icmp ugt i32 0, %n
   br i1 %cmp10, label %while.body, label %while.end
@@ -328,16 +504,36 @@ while.end:                                        ; preds = %while.body, %entry
 ; even outside of a loop.
 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB7_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB7_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %load = load i32, i32* %y
   %z = select i1 %cond, i32 %x, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%rcx), %eax
-; CHECK:       [[FALSE_BB]]:
+  ret i32 %z
+}
+
+; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch.
+define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
+; CHECK-LABEL: test_cmov_memoperand_unpredictable:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB8_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB8_2: # %entry
+; CHECK-NEXT:    retq
+entry:
+  %cond = icmp ugt i32 %a, %b
+  %load = load i32, i32* %y
+  %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0
   ret i32 %z
 }
 
@@ -345,29 +541,25 @@ entry:
 ; operand.
 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB9_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB9_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z1 = select i1 %cond, i32 %x, i32 %a
   %z2 = select i1 %cond, i32 %x, i32 %y
   %z3 = select i1 %cond, i32 %x, i32 %b
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -376,29 +568,25 @@ entry:
 ; Same as before but with operands reversed in the select with a load.
 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    jbe .LBB10_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB10_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z2 = select i1 %cond, i32 %a, i32 %x
   %z1 = select i1 %cond, i32 %y, i32 %x
   %z3 = select i1 %cond, i32 %b, i32 %x
-; CHECK-NOT:     cmov
-; CHECK:         jbe [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -408,15 +596,19 @@ entry:
 ; loads.
 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:    cmoval %edx, %eax
+; CHECK-NEXT:    cmoval (%r8), %edx
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         cmpl
   %y1 = load i32, i32* %y1.ptr
   %y2 = load i32, i32* %y2.ptr
   %z1 = select i1 %cond, i32 %x, i32 %y1
   %z2 = select i1 %cond, i32 %y2, i32 %x
-; CHECK:         cmoval
-; CHECK:         cmoval
   %s1 = add i32 %z1, %z2
   ret i32 %s1
 }
@@ -426,18 +618,19 @@ entry:
 ; the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB12_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB12_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %load = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -445,20 +638,21 @@ entry:
 ; uses the result of the other as part of the address.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB13_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movq (%rcx), %rax
+; CHECK-NEXT:    movl (%rax), %eax
+; CHECK-NEXT:  .LBB13_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %load1 = load i32*, i32** %y
   %p = select i1 %cond, i32* %x, i32* %load1
   %load2 = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load2
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movq (%r{{..}}), %[[R1:.*]]
-; CHECK:         movl (%[[R1]]), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -467,19 +661,20 @@ entry:
 ; where that cmov gets *its* input from a prior cmov in the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB14_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB14_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %p2 = select i1 %cond, i32* %z, i32* %p
   %load = load i32, i32* %p2
   %r = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %r
 }
 
@@ -495,34 +690,35 @@ define void @test_memoperand_loop(i32 %data) #0 {
 ; CHECK-NEXT:    movq (%rcx), %rdx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movq %rax, %rcx
-entry:
-  %begin = load i32*, i32** @begin, align 8
-  %end = load i32*, i32** @end, align 8
-  br label %loop.body
-
-; CHECK-NEXT:  .LBB13_1: # %loop.body
+; CHECK-NEXT:  .LBB15_1: # %loop.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_3
+; CHECK-NEXT:    ja .LBB15_3
 ; CHECK-NEXT:  # %bb.2: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movq (%r8), %rcx
-; CHECK-NEXT:  .LBB13_3: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB15_3: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_5
+; CHECK-NEXT:    ja .LBB15_5
 ; CHECK-NEXT:  # %bb.4: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB13_5: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB15_5: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addl $1, %esi
 ; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
-; CHECK-NEXT:    jl .LBB13_1
+; CHECK-NEXT:    jl .LBB15_1
+; CHECK-NEXT:  # %bb.6: # %exit
+; CHECK-NEXT:    retq
+entry:
+  %begin = load i32*, i32** @begin, align 8
+  %end = load i32*, i32** @end, align 8
+  br label %loop.body
 loop.body:
   %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
@@ -538,11 +734,9 @@ loop.body:
   %iv.next = add i32 %phi.iv, 1
   %cond = icmp slt i32 %iv.next, 1024
   br i1 %cond, label %loop.body, label %exit
-
-; CHECK-NEXT:  # %bb.6: # %exit
-; CHECK-NEXT:    retq
 exit:
   ret void
 }
 
 attributes #0 = {"target-cpu"="x86-64"}
+!0 = !{}

From 7dc705f86dd3a0fd8afd6f7db3ea132e2a36fa61 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Tue, 25 Jan 2022 15:26:32 -0500
Subject: [PATCH 607/946] [libc++][NFC] Fix typo

---
 libcxx/src/filesystem/directory_iterator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index 90b255d9877f9..98928f06c868a 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -197,9 +197,9 @@ class __dir_stream {
       : __stream_(nullptr), __root_(root) {
     if ((__stream_ = ::opendir(root.c_str())) == nullptr) {
       ec = detail::capture_errno();
-      const bool allow_eacess =
+      const bool allow_eacces =
           bool(opts & directory_options::skip_permission_denied);
-      if (allow_eacess && ec.value() == EACCES)
+      if (allow_eacces && ec.value() == EACCES)
         ec.clear();
       return;
     }

From d2cc23a337ddf4e12c5f49664099be2874b16db8 Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Tue, 25 Jan 2022 12:39:32 -0800
Subject: [PATCH 608/946] [docs] HowToCrossCompileLLVM.rst: prefer --target=
 over legacy -target

---
 llvm/docs/HowToCrossCompileLLVM.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/HowToCrossCompileLLVM.rst b/llvm/docs/HowToCrossCompileLLVM.rst
index d2dc7bf60e5cb..e1ad8e5f5f4ff 100644
--- a/llvm/docs/HowToCrossCompileLLVM.rst
+++ b/llvm/docs/HowToCrossCompileLLVM.rst
@@ -58,7 +58,7 @@ specific Linux distribution, version or GCC layout, so you'll need to fudge.
 
 In addition to the ones above, you'll also need:
 
- * ``'-target arm-linux-gnueabihf'`` or whatever is the triple of your cross GCC.
+ * ``--target=arm-linux-gnueabihf`` or whatever is the triple of your cross GCC.
  * ``'--sysroot=/usr/arm-linux-gnueabihf'``, ``'--sysroot=/opt/gcc/arm-linux-gnueabihf'``
    or whatever is the location of your GCC's sysroot (where /lib, /bin etc are).
  * Appropriate use of ``-I`` and ``-L``, depending on how the cross GCC is installed,

From 5b55e733a9c3ce471f28d10492f02de9cd213f54 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Tue, 25 Jan 2022 04:32:33 -0500
Subject: [PATCH 609/946] Remove unused  include

---
 llvm/include/llvm/ADT/AllocatorList.h        | 1 -
 llvm/include/llvm/ADT/CoalescingBitVector.h  | 1 -
 llvm/include/llvm/ADT/DenseSet.h             | 1 -
 llvm/include/llvm/ADT/MapVector.h            | 1 -
 llvm/include/llvm/ADT/PriorityWorklist.h     | 1 -
 llvm/include/llvm/ADT/SetVector.h            | 1 -
 llvm/include/llvm/Support/BinaryByteStream.h | 1 -
 llvm/include/llvm/Support/Error.h            | 1 -
 llvm/include/llvm/Support/ScopedPrinter.h    | 3 +--
 9 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/llvm/include/llvm/ADT/AllocatorList.h b/llvm/include/llvm/ADT/AllocatorList.h
index 404a657f27de4..04d0afc9d076e 100644
--- a/llvm/include/llvm/ADT/AllocatorList.h
+++ b/llvm/include/llvm/ADT/AllocatorList.h
@@ -13,7 +13,6 @@
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/simple_ilist.h"
 #include "llvm/Support/Allocator.h"
-#include 
 #include 
 #include 
 #include 
diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h
index 82e2e1a9f9e25..6935c255a099d 100644
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@@ -21,7 +21,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include 
 #include 
 
 namespace llvm {
diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h
index edce7c43773cf..e767211a09000 100644
--- a/llvm/include/llvm/ADT/DenseSet.h
+++ b/llvm/include/llvm/ADT/DenseSet.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/type_traits.h"
-#include 
 #include 
 #include 
 #include 
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index f9540999381aa..d281166b3e19b 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -18,7 +18,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include 
 #include 
 #include 
 #include 
diff --git a/llvm/include/llvm/ADT/PriorityWorklist.h b/llvm/include/llvm/ADT/PriorityWorklist.h
index 01dd59a2e71a3..e9fbf296973d2 100644
--- a/llvm/include/llvm/ADT/PriorityWorklist.h
+++ b/llvm/include/llvm/ADT/PriorityWorklist.h
@@ -19,7 +19,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
-#include 
 #include 
 #include 
 #include 
diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h
index 32bcd50966cca..82d5e98afb5dc 100644
--- a/llvm/include/llvm/ADT/SetVector.h
+++ b/llvm/include/llvm/ADT/SetVector.h
@@ -23,7 +23,6 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
-#include 
 #include 
 #include 
 #include 
diff --git a/llvm/include/llvm/Support/BinaryByteStream.h b/llvm/include/llvm/Support/BinaryByteStream.h
index 7d8b6d2dc43d0..dc4adba26f160 100644
--- a/llvm/include/llvm/Support/BinaryByteStream.h
+++ b/llvm/include/llvm/Support/BinaryByteStream.h
@@ -17,7 +17,6 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include 
 #include 
 #include 
 #include 
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index 3997f0ea6db79..881049b15b0df 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -25,7 +25,6 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include 
 #include 
 #include 
 #include 
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index 803ae47793df0..9bde4f455a2df 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -18,7 +18,6 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/raw_ostream.h"
-#include 
 
 namespace llvm {
 
@@ -123,7 +122,7 @@ class ScopedPrinter {
   void indent(int Levels = 1) { IndentLevel += Levels; }
 
   void unindent(int Levels = 1) {
-    IndentLevel = std::max(0, IndentLevel - Levels);
+    IndentLevel = IndentLevel > Levels ? IndentLevel - Levels : 0;
   }
 
   void resetIndent() { IndentLevel = 0; }

From 6427f4c52c31cc36004b14825e6598cd4a43f385 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Tue, 25 Jan 2022 15:48:51 -0500
Subject: [PATCH 610/946] [NFC] Use an llvm::DenseMap instead of std::map in
 CategorizedHelpPrinter::printOptions

---
 llvm/lib/Support/CommandLine.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index ed4f01f176c2f..e517ceb05d9bd 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -45,7 +45,6 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include 
-#include 
 #include 
 using namespace llvm;
 using namespace cl;
@@ -2339,7 +2338,7 @@ class CategorizedHelpPrinter : public HelpPrinter {
 protected:
   void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) override {
     std::vector SortedCategories;
-    std::map> CategorizedOptions;
+    DenseMap> CategorizedOptions;
 
     // Collect registered option categories into vector in preparation for
     // sorting.
@@ -2351,10 +2350,6 @@ class CategorizedHelpPrinter : public HelpPrinter {
     array_pod_sort(SortedCategories.begin(), SortedCategories.end(),
                    OptionCategoryCompare);
 
-    // Create map to empty vectors.
-    for (OptionCategory *Category : SortedCategories)
-      CategorizedOptions[Category] = std::vector